diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2022-02-05 18:04:23 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2022-02-05 18:05:05 +0000 |
commit | ecbca9f5fb7d7613d2b94982c4825eb0d33d6842 (patch) | |
tree | 3a4038f3b7bafaeade9fd6146ea8021237616657 | |
parent | 6f8fc217eaa12bf657be1c6468ed9938d10168b3 (diff) |
Vendor import of llvm-project main llvmorg-14-init-18294-gdb01b123d012,vendor/llvm-project/llvmorg-14-init-18294-gdb01b123d012
the last commit before the upstream release/14.x branch was created.
800 files changed, 9694 insertions, 5577 deletions
diff --git a/clang/include/clang/AST/ASTContext.h b/clang/include/clang/AST/ASTContext.h index f39ce14bc82c..63c11e237d6c 100644 --- a/clang/include/clang/AST/ASTContext.h +++ b/clang/include/clang/AST/ASTContext.h @@ -653,6 +653,20 @@ public: /// Returns the clang bytecode interpreter context. interp::Context &getInterpContext(); + struct CUDAConstantEvalContext { + /// Do not allow wrong-sided variables in constant expressions. + bool NoWrongSidedVars = false; + } CUDAConstantEvalCtx; + struct CUDAConstantEvalContextRAII { + ASTContext &Ctx; + CUDAConstantEvalContext SavedCtx; + CUDAConstantEvalContextRAII(ASTContext &Ctx_, bool NoWrongSidedVars) + : Ctx(Ctx_), SavedCtx(Ctx_.CUDAConstantEvalCtx) { + Ctx_.CUDAConstantEvalCtx.NoWrongSidedVars = NoWrongSidedVars; + } + ~CUDAConstantEvalContextRAII() { Ctx.CUDAConstantEvalCtx = SavedCtx; } + }; + /// Returns the dynamic AST node parent map context. ParentMapContext &getParentMapContext(); @@ -2616,23 +2630,32 @@ public: /// template name uses the shortest form of the dependent /// nested-name-specifier, which itself contains all canonical /// types, values, and templates. - TemplateName getCanonicalTemplateName(TemplateName Name) const; + TemplateName getCanonicalTemplateName(const TemplateName &Name) const; /// Determine whether the given template names refer to the same /// template. - bool hasSameTemplateName(TemplateName X, TemplateName Y); + bool hasSameTemplateName(const TemplateName &X, const TemplateName &Y) const; /// Determine whether the two declarations refer to the same entity. - bool isSameEntity(NamedDecl *X, NamedDecl *Y); + /// + /// FIXME: isSameEntity is not const due to its implementation calls + /// hasSameFunctionTypeIgnoringExceptionSpec which may alter this. + bool isSameEntity(const NamedDecl *X, const NamedDecl *Y); /// Determine whether two template parameter lists are similar enough /// that they may be used in declarations of the same template. - bool isSameTemplateParameterList(TemplateParameterList *X, - TemplateParameterList *Y); + /// + /// FIXME: isSameTemplateParameterList is not const since it calls + /// isSameTemplateParameter. + bool isSameTemplateParameterList(const TemplateParameterList *X, + const TemplateParameterList *Y); /// Determine whether two template parameters are similar enough /// that they may be used in declarations of the same template. - bool isSameTemplateParameter(NamedDecl *X, NamedDecl *Y); + /// + /// FIXME: isSameTemplateParameterList is not const since it calls + /// isSameEntity. + bool isSameTemplateParameter(const NamedDecl *X, const NamedDecl *Y); /// Retrieve the "canonical" template argument. /// diff --git a/clang/include/clang/Analysis/Analyses/Dominators.h b/clang/include/clang/Analysis/Analyses/Dominators.h index f588a5c7d1d7..9ac9cbe7d3ec 100644 --- a/clang/include/clang/Analysis/Analyses/Dominators.h +++ b/clang/include/clang/Analysis/Analyses/Dominators.h @@ -193,7 +193,7 @@ namespace IDFCalculatorDetail { /// Specialize ChildrenGetterTy to skip nullpointer successors. template <bool IsPostDom> struct ChildrenGetterTy<clang::CFGBlock, IsPostDom> { - using NodeRef = typename GraphTraits<clang::CFGBlock>::NodeRef; + using NodeRef = typename GraphTraits<clang::CFGBlock *>::NodeRef; using ChildrenTy = SmallVector<NodeRef, 8>; ChildrenTy get(const NodeRef &N) { diff --git a/clang/include/clang/Analysis/CFG.h b/clang/include/clang/Analysis/CFG.h index c5512a7e1499..d8e7e1e43d81 100644 --- a/clang/include/clang/Analysis/CFG.h +++ b/clang/include/clang/Analysis/CFG.h @@ -1494,9 +1494,6 @@ template <> struct GraphTraits< ::clang::CFGBlock *> { static ChildIteratorType child_end(NodeRef N) { return N->succ_end(); } }; -template <> struct GraphTraits<clang::CFGBlock> - : GraphTraits<clang::CFGBlock *> {}; - template <> struct GraphTraits< const ::clang::CFGBlock *> { using NodeRef = const ::clang::CFGBlock *; using ChildIteratorType = ::clang::CFGBlock::const_succ_iterator; @@ -1506,9 +1503,6 @@ template <> struct GraphTraits< const ::clang::CFGBlock *> { static ChildIteratorType child_end(NodeRef N) { return N->succ_end(); } }; -template <> struct GraphTraits<const clang::CFGBlock> - : GraphTraits<clang::CFGBlock *> {}; - template <> struct GraphTraits<Inverse< ::clang::CFGBlock *>> { using NodeRef = ::clang::CFGBlock *; using ChildIteratorType = ::clang::CFGBlock::const_pred_iterator; @@ -1521,9 +1515,6 @@ template <> struct GraphTraits<Inverse< ::clang::CFGBlock *>> { static ChildIteratorType child_end(NodeRef N) { return N->pred_end(); } }; -template <> struct GraphTraits<Inverse<clang::CFGBlock>> - : GraphTraits<clang::CFGBlock *> {}; - template <> struct GraphTraits<Inverse<const ::clang::CFGBlock *>> { using NodeRef = const ::clang::CFGBlock *; using ChildIteratorType = ::clang::CFGBlock::const_pred_iterator; @@ -1536,9 +1527,6 @@ template <> struct GraphTraits<Inverse<const ::clang::CFGBlock *>> { static ChildIteratorType child_end(NodeRef N) { return N->pred_end(); } }; -template <> struct GraphTraits<const Inverse<clang::CFGBlock>> - : GraphTraits<clang::CFGBlock *> {}; - // Traits for: CFG template <> struct GraphTraits< ::clang::CFG* > diff --git a/clang/include/clang/Analysis/FlowSensitive/DataflowAnalysis.h b/clang/include/clang/Analysis/FlowSensitive/DataflowAnalysis.h index f327abe63751..b5a7c061e17b 100644 --- a/clang/include/clang/Analysis/FlowSensitive/DataflowAnalysis.h +++ b/clang/include/clang/Analysis/FlowSensitive/DataflowAnalysis.h @@ -27,6 +27,7 @@ #include "llvm/ADT/Any.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/Support/Error.h" namespace clang { namespace dataflow { @@ -106,18 +107,24 @@ template <typename LatticeT> struct DataflowAnalysisState { /// Performs dataflow analysis and returns a mapping from basic block IDs to /// dataflow analysis states that model the respective basic blocks. Indices -/// of the returned vector correspond to basic block IDs. +/// of the returned vector correspond to basic block IDs. Returns an error if +/// the dataflow analysis cannot be performed successfully. template <typename AnalysisT> -std::vector<llvm::Optional<DataflowAnalysisState<typename AnalysisT::Lattice>>> +llvm::Expected<std::vector< + llvm::Optional<DataflowAnalysisState<typename AnalysisT::Lattice>>>> runDataflowAnalysis(const ControlFlowContext &CFCtx, AnalysisT &Analysis, const Environment &InitEnv) { auto TypeErasedBlockStates = runTypeErasedDataflowAnalysis(CFCtx, Analysis, InitEnv); + if (!TypeErasedBlockStates) + return TypeErasedBlockStates.takeError(); + std::vector< llvm::Optional<DataflowAnalysisState<typename AnalysisT::Lattice>>> BlockStates; - BlockStates.reserve(TypeErasedBlockStates.size()); - llvm::transform(std::move(TypeErasedBlockStates), + BlockStates.reserve(TypeErasedBlockStates->size()); + + llvm::transform(std::move(*TypeErasedBlockStates), std::back_inserter(BlockStates), [](auto &OptState) { return std::move(OptState).map([](auto &&State) { return DataflowAnalysisState<typename AnalysisT::Lattice>{ diff --git a/clang/include/clang/Analysis/FlowSensitive/DataflowEnvironment.h b/clang/include/clang/Analysis/FlowSensitive/DataflowEnvironment.h index e560305cf5ca..cebfb66ef242 100644 --- a/clang/include/clang/Analysis/FlowSensitive/DataflowEnvironment.h +++ b/clang/include/clang/Analysis/FlowSensitive/DataflowEnvironment.h @@ -51,19 +51,36 @@ enum class SkipPast { /// Holds the state of the program (store and heap) at a given program point. class Environment { public: - /// Supplements `Environment` with non-standard join operations. - class Merger { + /// Supplements `Environment` with non-standard comparison and join + /// operations. + class ValueModel { public: - virtual ~Merger() = default; + virtual ~ValueModel() = default; - /// Given distinct `Val1` and `Val2`, modifies `MergedVal` to approximate - /// both `Val1` and `Val2`. This could be a strict lattice join or a more - /// general widening operation. If this function returns true, `MergedVal` - /// will be assigned to a storage location of type `Type` in `Env`. + /// Returns true if and only if `Val1` is equivalent to `Val2`. /// /// Requirements: /// /// `Val1` and `Val2` must be distinct. + /// + /// `Val1` and `Val2` must model values of type `Type`. + virtual bool compareEquivalent(QualType Type, const Value &Val1, + const Value &Val2) { + // FIXME: Consider adding QualType to StructValue and removing the Type + // argument here. + return false; + } + + /// Modifies `MergedVal` to approximate both `Val1` and `Val2`. This could + /// be a strict lattice join or a more general widening operation. If this + /// function returns true, `MergedVal` will be assigned to a storage + /// location of type `Type` in `Env`. + /// + /// Requirements: + /// + /// `Val1` and `Val2` must be distinct. + /// + /// `Val1`, `Val2`, and `MergedVal` must model values of type `Type`. virtual bool merge(QualType Type, const Value &Val1, const Value &Val2, Value &MergedVal, Environment &Env) { return false; @@ -84,9 +101,29 @@ public: /// with a symbolic representation of the `this` pointee. Environment(DataflowAnalysisContext &DACtx, const DeclContext &DeclCtx); - bool operator==(const Environment &) const; - - LatticeJoinEffect join(const Environment &, Environment::Merger &); + /// Returns true if and only if the environment is equivalent to `Other`, i.e + /// the two environments: + /// - have the same mappings from declarations to storage locations, + /// - have the same mappings from expressions to storage locations, + /// - have the same or equivalent (according to `Model`) values assigned to + /// the same storage locations. + /// + /// Requirements: + /// + /// `Other` and `this` must use the same `DataflowAnalysisContext`. + bool equivalentTo(const Environment &Other, + Environment::ValueModel &Model) const; + + /// Joins the environment with `Other` by taking the intersection of storage + /// locations and values that are stored in them. Distinct values that are + /// assigned to the same storage locations in the environment and `Other` are + /// merged using `Model`. + /// + /// Requirements: + /// + /// `Other` and `this` must use the same `DataflowAnalysisContext`. + LatticeJoinEffect join(const Environment &Other, + Environment::ValueModel &Model); // FIXME: Rename `createOrGetStorageLocation` to `getOrCreateStorageLocation`, // `getStableStorageLocation`, or something more appropriate. diff --git a/clang/include/clang/Analysis/FlowSensitive/MapLattice.h b/clang/include/clang/Analysis/FlowSensitive/MapLattice.h index ff403f68b7c5..014cd60841ee 100644 --- a/clang/include/clang/Analysis/FlowSensitive/MapLattice.h +++ b/clang/include/clang/Analysis/FlowSensitive/MapLattice.h @@ -112,7 +112,7 @@ template <typename Key, typename ElementLattice> std::ostream & operator<<(std::ostream &Os, const clang::dataflow::MapLattice<Key, ElementLattice> &M) { - std::string Separator = ""; + std::string Separator; Os << "{"; for (const auto &E : M) { Os << std::exchange(Separator, ", ") << E.first << " => " << E.second; @@ -125,7 +125,7 @@ template <typename ElementLattice> std::ostream & operator<<(std::ostream &Os, const clang::dataflow::VarMapLattice<ElementLattice> &M) { - std::string Separator = ""; + std::string Separator; Os << "{"; for (const auto &E : M) { Os << std::exchange(Separator, ", ") << E.first->getName().str() << " => " diff --git a/clang/include/clang/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.h b/clang/include/clang/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.h index 9f44475b14ba..2d3a9e456370 100644 --- a/clang/include/clang/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.h +++ b/clang/include/clang/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.h @@ -25,6 +25,7 @@ #include "clang/Analysis/FlowSensitive/DataflowLattice.h" #include "llvm/ADT/Any.h" #include "llvm/ADT/Optional.h" +#include "llvm/Support/Error.h" namespace clang { namespace dataflow { @@ -40,7 +41,7 @@ struct TypeErasedLattice { }; /// Type-erased base class for dataflow analyses built on a single lattice type. -class TypeErasedDataflowAnalysis : public Environment::Merger { +class TypeErasedDataflowAnalysis : public Environment::ValueModel { /// Determines whether to apply the built-in transfer functions. // FIXME: Remove this option once the framework supports composing analyses // (at which point the built-in transfer functions can be simply a standalone @@ -115,8 +116,9 @@ TypeErasedDataflowAnalysisState transferBlock( /// Performs dataflow analysis and returns a mapping from basic block IDs to /// dataflow analysis states that model the respective basic blocks. Indices -/// of the returned vector correspond to basic block IDs. -std::vector<llvm::Optional<TypeErasedDataflowAnalysisState>> +/// of the returned vector correspond to basic block IDs. Returns an error if +/// the dataflow analysis cannot be performed successfully. +llvm::Expected<std::vector<llvm::Optional<TypeErasedDataflowAnalysisState>>> runTypeErasedDataflowAnalysis(const ControlFlowContext &CFCtx, TypeErasedDataflowAnalysis &Analysis, const Environment &InitEnv); diff --git a/clang/include/clang/Basic/AttrDocs.td b/clang/include/clang/Basic/AttrDocs.td index 18fac924b114..efd2af1ab1df 100644 --- a/clang/include/clang/Basic/AttrDocs.td +++ b/clang/include/clang/Basic/AttrDocs.td @@ -432,45 +432,45 @@ implementation detail and not intended to be used by external users. The syntax of the attribute is as follows: -.. code-block:: c++ +.. code-block:: text - class __attribute__((sycl_special_class)) accessor {}; - class [[clang::sycl_special_class]] accessor {}; + class __attribute__((sycl_special_class)) accessor {}; + class [[clang::sycl_special_class]] accessor {}; This is a code example that illustrates the use of the attribute: .. code-block:: c++ - class __attribute__((sycl_special_class)) SpecialType { - int F1; - int F2; - void __init(int f1) { - F1 = f1; - F2 = f1; - } - void __finalize() {} - public: - SpecialType() = default; - int getF2() const { return F2; } - }; - - int main () { - SpecialType T; - cgh.single_task([=] { - T.getF2(); - }); -} + class __attribute__((sycl_special_class)) SpecialType { + int F1; + int F2; + void __init(int f1) { + F1 = f1; + F2 = f1; + } + void __finalize() {} + public: + SpecialType() = default; + int getF2() const { return F2; } + }; + + int main () { + SpecialType T; + cgh.single_task([=] { + T.getF2(); + }); + } This would trigger the following kernel entry point in the AST: .. code-block:: c++ - void __sycl_kernel(int f1) { - SpecialType T; - T.__init(f1); - ... - T.__finalize() - } + void __sycl_kernel(int f1) { + SpecialType T; + T.__init(f1); + ... + T.__finalize() + } }]; } diff --git a/clang/include/clang/Basic/BuiltinsAArch64.def b/clang/include/clang/Basic/BuiltinsAArch64.def index 634bcaed20a6..0869b87e32fb 100644 --- a/clang/include/clang/Basic/BuiltinsAArch64.def +++ b/clang/include/clang/Basic/BuiltinsAArch64.def @@ -62,6 +62,9 @@ BUILTIN(__builtin_arm_ldg, "v*v*", "t") BUILTIN(__builtin_arm_stg, "vv*", "t") BUILTIN(__builtin_arm_subp, "Uiv*v*", "t") +// Memory Operations +BUILTIN(__builtin_arm_mops_memset_tag, "v*v*iz", "") + // Memory barrier BUILTIN(__builtin_arm_dmb, "vUi", "nc") BUILTIN(__builtin_arm_dsb, "vUi", "nc") diff --git a/clang/include/clang/Basic/CodeGenOptions.h b/clang/include/clang/Basic/CodeGenOptions.h index 5a5c2689c689..128ca2f5df3c 100644 --- a/clang/include/clang/Basic/CodeGenOptions.h +++ b/clang/include/clang/Basic/CodeGenOptions.h @@ -276,6 +276,11 @@ public: /// CUDA runtime back-end for incorporating them into host-side object file. std::string CudaGpuBinaryFileName; + /// List of filenames and section name pairs passed in using the + /// -fembed-offload-object option to embed device-side offloading objects into + /// the host as a named section. Input passed in as '<filename>,<section>' + std::vector<std::string> OffloadObjects; + /// The name of the file to which the backend should save YAML optimization /// records. std::string OptRecordFile; diff --git a/clang/include/clang/Basic/DiagnosticCommonKinds.td b/clang/include/clang/Basic/DiagnosticCommonKinds.td index fe4ac5ed6cb0..5ea55b0fd31b 100644 --- a/clang/include/clang/Basic/DiagnosticCommonKinds.td +++ b/clang/include/clang/Basic/DiagnosticCommonKinds.td @@ -145,6 +145,13 @@ def warn_conflicting_nullability_attr_overriding_param_types : Warning< def err_nullability_conflicting : Error< "nullability specifier %0 conflicts with existing specifier %1">; +def warn_target_unsupported_branch_protection_option: Warning < + "ignoring '-mbranch-protection=' option because the '%0' architecture does not support it">, + InGroup<BranchProtection>; + +def warn_target_unsupported_branch_protection_attribute: Warning < + "ignoring the 'branch-protection' attribute because the '%0' architecture does not support it">, + InGroup<BranchProtection>; } // OpenCL Section 6.8.g diff --git a/clang/include/clang/Basic/DiagnosticDriverKinds.td b/clang/include/clang/Basic/DiagnosticDriverKinds.td index e635be6b6d1b..3efedbe0f642 100644 --- a/clang/include/clang/Basic/DiagnosticDriverKinds.td +++ b/clang/include/clang/Basic/DiagnosticDriverKinds.td @@ -627,8 +627,10 @@ def err_cc1_unbounded_vscale_min : Error< def err_drv_ssp_missing_offset_argument : Error< "'%0' is used without '-mstack-protector-guard-offset', and there is no default">; -def err_drv_only_one_offload_target_supported_in : Error< - "Only one offload target is supported in %0.">; +def err_drv_only_one_offload_target_supported : Error< + "only one offload target is supported">; def err_drv_invalid_or_unsupported_offload_target : Error< - "Invalid or unsupported offload target: '%0'.">; + "invalid or unsupported offload target: '%0'">; +def err_drv_cuda_offload_only_emit_bc : Error< + "CUDA offload target is supported only along with --emit-llvm">; } diff --git a/clang/include/clang/Basic/LangOptions.h b/clang/include/clang/Basic/LangOptions.h index 09afa641acf9..50c7f038fc6b 100644 --- a/clang/include/clang/Basic/LangOptions.h +++ b/clang/include/clang/Basic/LangOptions.h @@ -181,6 +181,10 @@ public: /// global-scope inline variables incorrectly. Ver12, + /// Attempt to be ABI-compatible with code generated by Clang 13.0.x. + /// This causes clang to not pack non-POD members of packed structs. + Ver13, + /// Conform to the underlying platform's C and C++ ABIs as closely /// as we can. Latest diff --git a/clang/include/clang/Basic/TargetInfo.h b/clang/include/clang/Basic/TargetInfo.h index 686a365b8c12..a49342a34f3e 100644 --- a/clang/include/clang/Basic/TargetInfo.h +++ b/clang/include/clang/Basic/TargetInfo.h @@ -590,6 +590,17 @@ public: return false; } + // Different targets may support a different maximum width for the _BitInt + // type, depending on what operations are supported. + virtual size_t getMaxBitIntWidth() const { + // FIXME: this value should be llvm::IntegerType::MAX_INT_BITS, which is + // maximum bit width that LLVM claims its IR can support. However, most + // backends currently have a bug where they only support division + // operations on types that are <= 128 bits and crash otherwise. We're + // setting the max supported value to 128 to be conservative. + return 128; + } + /// Determine whether _Float16 is supported on this target. virtual bool hasLegalHalfType() const { return HasLegalHalfType; } @@ -1289,9 +1300,15 @@ public: bool BranchTargetEnforcement = false; }; + /// Determine if the Architecture in this TargetInfo supports branch + /// protection + virtual bool isBranchProtectionSupportedArch(StringRef Arch) const { + return false; + } + /// Determine if this TargetInfo supports the given branch protection /// specification - virtual bool validateBranchProtection(StringRef Spec, + virtual bool validateBranchProtection(StringRef Spec, StringRef Arch, BranchProtectionInfo &BPI, StringRef &Err) const { Err = ""; diff --git a/clang/include/clang/CodeGen/BackendUtil.h b/clang/include/clang/CodeGen/BackendUtil.h index 77d500079f01..d97af65a3d01 100644 --- a/clang/include/clang/CodeGen/BackendUtil.h +++ b/clang/include/clang/CodeGen/BackendUtil.h @@ -44,6 +44,9 @@ namespace clang { void EmbedBitcode(llvm::Module *M, const CodeGenOptions &CGOpts, llvm::MemoryBufferRef Buf); + + void EmbedObject(llvm::Module *M, const CodeGenOptions &CGOpts, + DiagnosticsEngine &Diags); } #endif diff --git a/clang/include/clang/Driver/Action.h b/clang/include/clang/Driver/Action.h index ba84d886a6cf..3b6c9e31faa3 100644 --- a/clang/include/clang/Driver/Action.h +++ b/clang/include/clang/Driver/Action.h @@ -73,6 +73,7 @@ public: OffloadBundlingJobClass, OffloadUnbundlingJobClass, OffloadWrapperJobClass, + LinkerWrapperJobClass, StaticLibJobClass, JobClassFirst = PreprocessJobClass, @@ -642,6 +643,17 @@ public: } }; +class LinkerWrapperJobAction : public JobAction { + void anchor() override; + +public: + LinkerWrapperJobAction(ActionList &Inputs, types::ID Type); + + static bool classof(const Action *A) { + return A->getKind() == LinkerWrapperJobClass; + } +}; + class StaticLibJobAction : public JobAction { void anchor() override; diff --git a/clang/include/clang/Driver/Driver.h b/clang/include/clang/Driver/Driver.h index 9ae34a2eaf01..93e1eca6a981 100644 --- a/clang/include/clang/Driver/Driver.h +++ b/clang/include/clang/Driver/Driver.h @@ -12,6 +12,7 @@ #include "clang/Basic/Diagnostic.h" #include "clang/Basic/LLVM.h" #include "clang/Driver/Action.h" +#include "clang/Driver/InputInfo.h" #include "clang/Driver/Options.h" #include "clang/Driver/Phases.h" #include "clang/Driver/ToolChain.h" @@ -38,13 +39,14 @@ namespace clang { namespace driver { - class Command; - class Compilation; - class InputInfo; - class JobList; - class JobAction; - class SanitizerArgs; - class ToolChain; +typedef SmallVector<InputInfo, 4> InputInfoList; + +class Command; +class Compilation; +class JobList; +class JobAction; +class SanitizerArgs; +class ToolChain; /// Describes the kind of LTO mode selected via -f(no-)?lto(=.*)? options. enum LTOKind { @@ -171,9 +173,11 @@ public: /// The file to log CC_LOG_DIAGNOSTICS output to, if enabled. std::string CCLogDiagnosticsFilename; + /// An input type and its arguments. + using InputTy = std::pair<types::ID, const llvm::opt::Arg *>; + /// A list of inputs and their types for the given arguments. - typedef SmallVector<std::pair<types::ID, const llvm::opt::Arg *>, 16> - InputList; + using InputList = SmallVector<InputTy, 16>; /// Whether the driver should follow g++ like behavior. bool CCCIsCXX() const { return Mode == GXXMode; } @@ -413,6 +417,18 @@ public: void BuildUniversalActions(Compilation &C, const ToolChain &TC, const InputList &BAInputs) const; + /// BuildOffloadingActions - Construct the list of actions to perform for the + /// offloading toolchain that will be embedded in the host. + /// + /// \param C - The compilation that is being built. + /// \param Args - The input arguments. + /// \param Input - The input type and arguments + /// \param HostAction - The host action used in the offloading toolchain. + Action *BuildOffloadingActions(Compilation &C, + llvm::opt::DerivedArgList &Args, + const InputTy &Input, + Action *HostAction) const; + /// Check that the file referenced by Value exists. If it doesn't, /// issue a diagnostic and return false. /// If TypoCorrect is true and the file does not exist, see if it looks @@ -503,13 +519,12 @@ public: /// BuildJobsForAction - Construct the jobs to perform for the action \p A and /// return an InputInfo for the result of running \p A. Will only construct /// jobs for a given (Action, ToolChain, BoundArch, DeviceKind) tuple once. - InputInfo - BuildJobsForAction(Compilation &C, const Action *A, const ToolChain *TC, - StringRef BoundArch, bool AtTopLevel, bool MultipleArchs, - const char *LinkingOutput, - std::map<std::pair<const Action *, std::string>, InputInfo> - &CachedResults, - Action::OffloadKind TargetDeviceOffloadKind) const; + InputInfoList BuildJobsForAction( + Compilation &C, const Action *A, const ToolChain *TC, StringRef BoundArch, + bool AtTopLevel, bool MultipleArchs, const char *LinkingOutput, + std::map<std::pair<const Action *, std::string>, InputInfoList> + &CachedResults, + Action::OffloadKind TargetDeviceOffloadKind) const; /// Returns the default name for linked images (e.g., "a.out"). const char *getDefaultImageName() const; @@ -617,10 +632,10 @@ private: /// Helper used in BuildJobsForAction. Doesn't use the cache when building /// jobs specifically for the given action, but will use the cache when /// building jobs for the Action's inputs. - InputInfo BuildJobsForActionNoCache( + InputInfoList BuildJobsForActionNoCache( Compilation &C, const Action *A, const ToolChain *TC, StringRef BoundArch, bool AtTopLevel, bool MultipleArchs, const char *LinkingOutput, - std::map<std::pair<const Action *, std::string>, InputInfo> + std::map<std::pair<const Action *, std::string>, InputInfoList> &CachedResults, Action::OffloadKind TargetDeviceOffloadKind) const; diff --git a/clang/include/clang/Driver/Job.h b/clang/include/clang/Driver/Job.h index 6e3b51f2a799..ae9337f3c2d0 100644 --- a/clang/include/clang/Driver/Job.h +++ b/clang/include/clang/Driver/Job.h @@ -208,6 +208,8 @@ public: Arguments = std::move(List); } + void replaceExecutable(const char *Exe) { Executable = Exe; } + const char *getExecutable() const { return Executable; } const llvm::opt::ArgStringList &getArguments() const { return Arguments; } diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index b3de12e8c7b5..53e68ed2cef9 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -638,8 +638,8 @@ def _DASH_DASH : Option<["--"], "", KIND_REMAINING_ARGS>, Flags<[NoXarchOption, CoreOption]>; def A : JoinedOrSeparate<["-"], "A">, Flags<[RenderJoined]>, Group<gfortran_Group>; def B : JoinedOrSeparate<["-"], "B">, MetaVarName<"<prefix>">, - HelpText<"Search $prefix/$triple-$file and $prefix$file for executables, libraries, " - "includes, and data files used by the compiler. $prefix may or may not be a directory">; + HelpText<"Search $prefix$file for executables, libraries, and data files. " + "If $prefix is a directory, search $prefix/$file">; def gcc_toolchain : Joined<["--"], "gcc-toolchain=">, Flags<[NoXarchOption]>, HelpText<"Search for GCC installation in the specified directory on targets which commonly use GCC. " "The directory usually contains 'lib{,32,64}/gcc{,-cross}/$triple' and 'include'. If specified, " @@ -1143,8 +1143,7 @@ defm autolink : BoolFOption<"autolink", // languages and accept other values such as CPU/GPU architectures, // offload kinds and target aliases. def offload_EQ : CommaJoined<["--"], "offload=">, Flags<[NoXarchOption]>, - HelpText<"Specify comma-separated list of offloading target triples" - " (HIP only)">; + HelpText<"Specify comma-separated list of offloading target triples (CUDA and HIP only)">; // C++ Coroutines TS defm coroutines_ts : BoolFOption<"coroutines-ts", @@ -1152,6 +1151,10 @@ defm coroutines_ts : BoolFOption<"coroutines-ts", PosFlag<SetTrue, [CC1Option], "Enable support for the C++ Coroutines TS">, NegFlag<SetFalse>>; +def fembed_offload_object_EQ : Joined<["-"], "fembed-offload-object=">, + Group<f_Group>, Flags<[NoXarchOption, CC1Option]>, + HelpText<"Embed Offloading device-side binary into host object file as a section.">, + MarshallingInfoStringVector<CodeGenOpts<"OffloadObjects">>; def fembed_bitcode_EQ : Joined<["-"], "fembed-bitcode=">, Group<f_Group>, Flags<[NoXarchOption, CC1Option, CC1AsOption]>, MetaVarName<"<option>">, HelpText<"Embed LLVM bitcode (option: off, all, bitcode, marker)">, @@ -1907,7 +1910,7 @@ defm legacy_pass_manager : BoolOption<"f", "legacy-pass-manager", def fexperimental_new_pass_manager : Flag<["-"], "fexperimental-new-pass-manager">, Group<f_clang_Group>, Flags<[CC1Option]>, Alias<fno_legacy_pass_manager>; def fno_experimental_new_pass_manager : Flag<["-"], "fno-experimental-new-pass-manager">, - Group<f_clang_Group>, Flags<[CC1Option]>, Alias<flegacy_pass_manager>; + Group<f_clang_Group>, Flags<[CC1Option,NoDriverOption]>, Alias<flegacy_pass_manager>; def fexperimental_strict_floating_point : Flag<["-"], "fexperimental-strict-floating-point">, Group<f_clang_Group>, Flags<[CC1Option]>, HelpText<"Enables experimental strict floating point in LLVM.">, @@ -2473,6 +2476,8 @@ defm openmp_optimistic_collapse : BoolFOption<"openmp-optimistic-collapse", PosFlag<SetTrue, [CC1Option]>, NegFlag<SetFalse>, BothFlags<[NoArgumentUnused, HelpHidden]>>; def static_openmp: Flag<["-"], "static-openmp">, HelpText<"Use the static host OpenMP runtime while linking.">; +def fopenmp_new_driver : Flag<["-"], "fopenmp-new-driver">, Flags<[CC1Option]>, Group<Action_Group>, + HelpText<"Use the new driver for OpenMP offloading.">; def fno_optimize_sibling_calls : Flag<["-"], "fno-optimize-sibling-calls">, Group<f_Group>; def foptimize_sibling_calls : Flag<["-"], "foptimize-sibling-calls">, Group<f_Group>; defm escaping_block_tail_calls : BoolFOption<"escaping-block-tail-calls", @@ -3895,6 +3900,11 @@ def frtlib_add_rpath: Flag<["-"], "frtlib-add-rpath">, Flags<[NoArgumentUnused]> HelpText<"Add -rpath with architecture-specific resource directory to the linker flags">; def fno_rtlib_add_rpath: Flag<["-"], "fno-rtlib-add-rpath">, Flags<[NoArgumentUnused]>, HelpText<"Do not add -rpath with architecture-specific resource directory to the linker flags">; +defm openmp_implicit_rpath: BoolFOption<"openmp-implicit-rpath", + LangOpts<"OpenMP">, + DefaultTrue, + PosFlag<SetTrue, [], "Set rpath on OpenMP executables">, + NegFlag<SetFalse>>; def r : Flag<["-"], "r">, Flags<[LinkerInput,NoArgumentUnused]>, Group<Link_Group>; def save_temps_EQ : Joined<["-", "--"], "save-temps=">, Flags<[CC1Option, NoXarchOption]>, diff --git a/clang/include/clang/Driver/ToolChain.h b/clang/include/clang/Driver/ToolChain.h index 329833bb13be..bfc46af00265 100644 --- a/clang/include/clang/Driver/ToolChain.h +++ b/clang/include/clang/Driver/ToolChain.h @@ -151,6 +151,7 @@ private: mutable std::unique_ptr<Tool> IfsMerge; mutable std::unique_ptr<Tool> OffloadBundler; mutable std::unique_ptr<Tool> OffloadWrapper; + mutable std::unique_ptr<Tool> LinkerWrapper; Tool *getClang() const; Tool *getFlang() const; @@ -161,6 +162,7 @@ private: Tool *getClangAs() const; Tool *getOffloadBundler() const; Tool *getOffloadWrapper() const; + Tool *getLinkerWrapper() const; mutable bool SanitizerArgsChecked = false; mutable std::unique_ptr<XRayArgs> XRayArguments; @@ -711,6 +713,22 @@ public: const llvm::fltSemantics *FPType = nullptr) const { return llvm::DenormalMode::getIEEE(); } + + // We want to expand the shortened versions of the triples passed in to + // the values used for the bitcode libraries. + static llvm::Triple getOpenMPTriple(StringRef TripleStr) { + llvm::Triple TT(TripleStr); + if (TT.getVendor() == llvm::Triple::UnknownVendor || + TT.getOS() == llvm::Triple::UnknownOS) { + if (TT.getArch() == llvm::Triple::nvptx) + return llvm::Triple("nvptx-nvidia-cuda"); + if (TT.getArch() == llvm::Triple::nvptx64) + return llvm::Triple("nvptx64-nvidia-cuda"); + if (TT.getArch() == llvm::Triple::amdgcn) + return llvm::Triple("amdgcn-amd-amdhsa"); + } + return TT; + } }; /// Set a ToolChain's effective triple. Reset it when the registration object diff --git a/clang/include/clang/StaticAnalyzer/Checkers/Checkers.td b/clang/include/clang/StaticAnalyzer/Checkers/Checkers.td index bd21d7778f93..f037c33a1304 100644 --- a/clang/include/clang/StaticAnalyzer/Checkers/Checkers.td +++ b/clang/include/clang/StaticAnalyzer/Checkers/Checkers.td @@ -552,7 +552,7 @@ def StdCLibraryFunctionArgsChecker : Checker<"StdCLibraryFunctionArgs">, "or is EOF.">, Dependencies<[StdCLibraryFunctionsChecker]>, WeakDependencies<[CallAndMessageChecker, NonNullParamChecker, StreamChecker]>, - Documentation<NotDocumented>; + Documentation<HasAlphaDocumentation>; } // end "alpha.unix" diff --git a/clang/lib/AST/ASTContext.cpp b/clang/lib/AST/ASTContext.cpp index 8a780250b6d8..5fa2d46de89b 100644 --- a/clang/lib/AST/ASTContext.cpp +++ b/clang/lib/AST/ASTContext.cpp @@ -3370,8 +3370,9 @@ QualType ASTContext::getBlockPointerType(QualType T) const { /// lvalue reference to the specified type. QualType ASTContext::getLValueReferenceType(QualType T, bool SpelledAsLValue) const { - assert(getCanonicalType(T) != OverloadTy && - "Unresolved overloaded function type"); + assert((!T->isPlaceholderType() || + T->isSpecificPlaceholderType(BuiltinType::UnknownAny)) && + "Unresolved placeholder type"); // Unique pointers, to guarantee there is only one pointer of a particular // structure. @@ -3409,6 +3410,10 @@ ASTContext::getLValueReferenceType(QualType T, bool SpelledAsLValue) const { /// getRValueReferenceType - Return the uniqued reference to the type for an /// rvalue reference to the specified type. QualType ASTContext::getRValueReferenceType(QualType T) const { + assert((!T->isPlaceholderType() || + T->isSpecificPlaceholderType(BuiltinType::UnknownAny)) && + "Unresolved placeholder type"); + // Unique pointers, to guarantee there is only one pointer of a particular // structure. llvm::FoldingSetNodeID ID; @@ -6099,7 +6104,8 @@ ASTContext::getNameForTemplate(TemplateName Name, llvm_unreachable("bad template name kind!"); } -TemplateName ASTContext::getCanonicalTemplateName(TemplateName Name) const { +TemplateName +ASTContext::getCanonicalTemplateName(const TemplateName &Name) const { switch (Name.getKind()) { case TemplateName::QualifiedTemplate: case TemplateName::Template: { @@ -6141,13 +6147,14 @@ TemplateName ASTContext::getCanonicalTemplateName(TemplateName Name) const { llvm_unreachable("bad template name!"); } -bool ASTContext::hasSameTemplateName(TemplateName X, TemplateName Y) { - X = getCanonicalTemplateName(X); - Y = getCanonicalTemplateName(Y); - return X.getAsVoidPointer() == Y.getAsVoidPointer(); +bool ASTContext::hasSameTemplateName(const TemplateName &X, + const TemplateName &Y) const { + return getCanonicalTemplateName(X).getAsVoidPointer() == + getCanonicalTemplateName(Y).getAsVoidPointer(); } -bool ASTContext::isSameTemplateParameter(NamedDecl *X, NamedDecl *Y) { +bool ASTContext::isSameTemplateParameter(const NamedDecl *X, + const NamedDecl *Y) { if (X->getKind() != Y->getKind()) return false; @@ -6198,8 +6205,8 @@ bool ASTContext::isSameTemplateParameter(NamedDecl *X, NamedDecl *Y) { TY->getTemplateParameters()); } -bool ASTContext::isSameTemplateParameterList(TemplateParameterList *X, - TemplateParameterList *Y) { +bool ASTContext::isSameTemplateParameterList(const TemplateParameterList *X, + const TemplateParameterList *Y) { if (X->size() != Y->size()) return false; @@ -6302,7 +6309,7 @@ static bool hasSameOverloadableAttrs(const FunctionDecl *A, return true; } -bool ASTContext::isSameEntity(NamedDecl *X, NamedDecl *Y) { +bool ASTContext::isSameEntity(const NamedDecl *X, const NamedDecl *Y) { if (X == Y) return true; @@ -6409,6 +6416,8 @@ bool ASTContext::isSameEntity(NamedDecl *X, NamedDecl *Y) { if (getLangOpts().CPlusPlus17 && XFPT && YFPT && (isUnresolvedExceptionSpec(XFPT->getExceptionSpecType()) || isUnresolvedExceptionSpec(YFPT->getExceptionSpecType())) && + // FIXME: We could make isSameEntity const after we make + // hasSameFunctionTypeIgnoringExceptionSpec const. hasSameFunctionTypeIgnoringExceptionSpec(XT, YT)) return true; return false; @@ -8286,6 +8295,11 @@ void ASTContext::getObjCEncodingForTypeImpl(QualType T, std::string &S, *NotEncodedT = T; return; + case Type::BitInt: + if (NotEncodedT) + *NotEncodedT = T; + return; + // We could see an undeduced auto type here during error recovery. // Just ignore it. case Type::Auto: @@ -8293,7 +8307,6 @@ void ASTContext::getObjCEncodingForTypeImpl(QualType T, std::string &S, return; case Type::Pipe: - case Type::BitInt: #define ABSTRACT_TYPE(KIND, BASE) #define TYPE(KIND, BASE) #define DEPENDENT_TYPE(KIND, BASE) \ diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index f9416e8e215d..9e4088f94015 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -983,6 +983,8 @@ namespace { discardCleanups(); } + ASTContext &getCtx() const override { return Ctx; } + void setEvaluatingDecl(APValue::LValueBase Base, APValue &Value, EvaluatingDeclKind EDK = EvaluatingDeclKind::Ctor) { EvaluatingDecl = Base; @@ -1116,8 +1118,6 @@ namespace { Expr::EvalStatus &getEvalStatus() const override { return EvalStatus; } - ASTContext &getCtx() const override { return Ctx; } - // If we have a prior diagnostic, it will be noting that the expression // isn't a constant expression. This diagnostic is more important, // unless we require this evaluation to produce a constant expression. @@ -2216,6 +2216,19 @@ static bool CheckLValueConstantExpression(EvalInfo &Info, SourceLocation Loc, if (!isForManglingOnly(Kind) && Var->hasAttr<DLLImportAttr>()) // FIXME: Diagnostic! return false; + + // In CUDA/HIP device compilation, only device side variables have + // constant addresses. + if (Info.getCtx().getLangOpts().CUDA && + Info.getCtx().getLangOpts().CUDAIsDevice && + Info.getCtx().CUDAConstantEvalCtx.NoWrongSidedVars) { + if ((!Var->hasAttr<CUDADeviceAttr>() && + !Var->hasAttr<CUDAConstantAttr>() && + !Var->getType()->isCUDADeviceBuiltinSurfaceType() && + !Var->getType()->isCUDADeviceBuiltinTextureType()) || + Var->hasAttr<HIPManagedAttr>()) + return false; + } } if (const auto *FD = dyn_cast<const FunctionDecl>(BaseVD)) { // __declspec(dllimport) must be handled very carefully: diff --git a/clang/lib/AST/RecordLayoutBuilder.cpp b/clang/lib/AST/RecordLayoutBuilder.cpp index 61a30ead165e..709e05716a56 100644 --- a/clang/lib/AST/RecordLayoutBuilder.cpp +++ b/clang/lib/AST/RecordLayoutBuilder.cpp @@ -1887,7 +1887,12 @@ void ItaniumRecordLayoutBuilder::LayoutField(const FieldDecl *D, UnfilledBitsInLastUnit = 0; LastBitfieldStorageUnitSize = 0; - bool FieldPacked = Packed || D->hasAttr<PackedAttr>(); + llvm::Triple Target = Context.getTargetInfo().getTriple(); + bool FieldPacked = (Packed && (!FieldClass || FieldClass->isPOD() || + Context.getLangOpts().getClangABICompat() <= + LangOptions::ClangABI::Ver13 || + Target.isPS4() || Target.isOSDarwin())) || + D->hasAttr<PackedAttr>(); AlignRequirementKind AlignRequirement = AlignRequirementKind::None; CharUnits FieldSize; diff --git a/clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp b/clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp index 938f7338b640..eca58b313761 100644 --- a/clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp +++ b/clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp @@ -41,6 +41,21 @@ llvm::DenseMap<K, V> intersectDenseMaps(const llvm::DenseMap<K, V> &Map1, return Result; } +/// Returns true if and only if `Val1` is equivalent to `Val2`. +static bool equivalentValues(QualType Type, Value *Val1, Value *Val2, + Environment::ValueModel &Model) { + if (Val1 == Val2) + return true; + + if (auto *IndVal1 = dyn_cast<IndirectionValue>(Val1)) { + auto *IndVal2 = cast<IndirectionValue>(Val2); + assert(IndVal1->getKind() == IndVal2->getKind()); + return &IndVal1->getPointeeLoc() == &IndVal2->getPointeeLoc(); + } + + return Model.compareEquivalent(Type, *Val1, *Val2); +} + Environment::Environment(DataflowAnalysisContext &DACtx, const DeclContext &DeclCtx) : Environment(DACtx) { @@ -68,13 +83,40 @@ Environment::Environment(DataflowAnalysisContext &DACtx, } } -bool Environment::operator==(const Environment &Other) const { +bool Environment::equivalentTo(const Environment &Other, + Environment::ValueModel &Model) const { assert(DACtx == Other.DACtx); - return DeclToLoc == Other.DeclToLoc && LocToVal == Other.LocToVal; + + if (DeclToLoc != Other.DeclToLoc) + return false; + + if (ExprToLoc != Other.ExprToLoc) + return false; + + if (LocToVal.size() != Other.LocToVal.size()) + return false; + + for (auto &Entry : LocToVal) { + const StorageLocation *Loc = Entry.first; + assert(Loc != nullptr); + + Value *Val = Entry.second; + assert(Val != nullptr); + + auto It = Other.LocToVal.find(Loc); + if (It == Other.LocToVal.end()) + return false; + assert(It->second != nullptr); + + if (!equivalentValues(Loc->getType(), Val, It->second, Model)) + return false; + } + + return true; } LatticeJoinEffect Environment::join(const Environment &Other, - Environment::Merger &Merger) { + Environment::ValueModel &Model) { assert(DACtx == Other.DACtx); auto Effect = LatticeJoinEffect::Unchanged; @@ -89,8 +131,12 @@ LatticeJoinEffect Environment::join(const Environment &Other, if (ExprToLocSizeBefore != ExprToLoc.size()) Effect = LatticeJoinEffect::Changed; - llvm::DenseMap<const StorageLocation *, Value *> MergedLocToVal; - for (auto &Entry : LocToVal) { + // Move `LocToVal` so that `Environment::ValueModel::merge` can safely assign + // values to storage locations while this code iterates over the current + // assignments. + llvm::DenseMap<const StorageLocation *, Value *> OldLocToVal = + std::move(LocToVal); + for (auto &Entry : OldLocToVal) { const StorageLocation *Loc = Entry.first; assert(Loc != nullptr); @@ -102,20 +148,19 @@ LatticeJoinEffect Environment::join(const Environment &Other, continue; assert(It->second != nullptr); - if (It->second == Val) { - MergedLocToVal.insert({Loc, Val}); + if (equivalentValues(Loc->getType(), Val, It->second, Model)) { + LocToVal.insert({Loc, Val}); continue; } - // FIXME: Consider destroying `MergedValue` immediately if `Merger::merge` - // returns false to avoid storing unneeded values in `DACtx`. + // FIXME: Consider destroying `MergedValue` immediately if + // `ValueModel::merge` returns false to avoid storing unneeded values in + // `DACtx`. if (Value *MergedVal = createValue(Loc->getType())) - if (Merger.merge(Loc->getType(), *Val, *It->second, *MergedVal, *this)) - MergedLocToVal.insert({Loc, MergedVal}); + if (Model.merge(Loc->getType(), *Val, *It->second, *MergedVal, *this)) + LocToVal.insert({Loc, MergedVal}); } - const unsigned LocToValSizeBefore = LocToVal.size(); - LocToVal = std::move(MergedLocToVal); - if (LocToValSizeBefore != LocToVal.size()) + if (OldLocToVal.size() != LocToVal.size()) Effect = LatticeJoinEffect::Changed; return Effect; diff --git a/clang/lib/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.cpp b/clang/lib/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.cpp index aaf6a834f5b3..6b14b5ceaf69 100644 --- a/clang/lib/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.cpp +++ b/clang/lib/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.cpp @@ -12,6 +12,7 @@ //===----------------------------------------------------------------------===// #include <memory> +#include <system_error> #include <utility> #include <vector> @@ -26,7 +27,7 @@ #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/None.h" #include "llvm/ADT/Optional.h" -#include "llvm/Support/raw_ostream.h" +#include "llvm/Support/Error.h" namespace clang { namespace dataflow { @@ -190,7 +191,7 @@ TypeErasedDataflowAnalysisState transferBlock( return State; } -std::vector<llvm::Optional<TypeErasedDataflowAnalysisState>> +llvm::Expected<std::vector<llvm::Optional<TypeErasedDataflowAnalysisState>>> runTypeErasedDataflowAnalysis(const ControlFlowContext &CFCtx, TypeErasedDataflowAnalysis &Analysis, const Environment &InitEnv) { @@ -216,8 +217,8 @@ runTypeErasedDataflowAnalysis(const ControlFlowContext &CFCtx, static constexpr uint32_t MaxIterations = 1 << 16; while (const CFGBlock *Block = Worklist.dequeue()) { if (++Iterations > MaxIterations) { - llvm::errs() << "Maximum number of iterations reached, giving up.\n"; - break; + return llvm::createStringError(std::errc::timed_out, + "maximum number of iterations reached"); } const llvm::Optional<TypeErasedDataflowAnalysisState> &OldBlockState = @@ -228,7 +229,7 @@ runTypeErasedDataflowAnalysis(const ControlFlowContext &CFCtx, if (OldBlockState.hasValue() && Analysis.isEqualTypeErased(OldBlockState.getValue().Lattice, NewBlockState.Lattice) && - OldBlockState->Env == NewBlockState.Env) { + OldBlockState->Env.equivalentTo(NewBlockState.Env, Analysis)) { // The state of `Block` didn't change after transfer so there's no need to // revisit its successors. continue; diff --git a/clang/lib/Basic/Targets/AArch64.cpp b/clang/lib/Basic/Targets/AArch64.cpp index 8e23cc4c421a..34bdb58dffc1 100644 --- a/clang/lib/Basic/Targets/AArch64.cpp +++ b/clang/lib/Basic/Targets/AArch64.cpp @@ -138,7 +138,7 @@ bool AArch64TargetInfo::setABI(const std::string &Name) { return true; } -bool AArch64TargetInfo::validateBranchProtection(StringRef Spec, +bool AArch64TargetInfo::validateBranchProtection(StringRef Spec, StringRef, BranchProtectionInfo &BPI, StringRef &Err) const { llvm::ARM::ParsedBranchProtection PBP; diff --git a/clang/lib/Basic/Targets/AArch64.h b/clang/lib/Basic/Targets/AArch64.h index ebddce0c1c73..9e22aeaff251 100644 --- a/clang/lib/Basic/Targets/AArch64.h +++ b/clang/lib/Basic/Targets/AArch64.h @@ -70,8 +70,9 @@ public: StringRef getABI() const override; bool setABI(const std::string &Name) override; - bool validateBranchProtection(StringRef, BranchProtectionInfo &, - StringRef &) const override; + bool validateBranchProtection(StringRef Spec, StringRef Arch, + BranchProtectionInfo &BPI, + StringRef &Err) const override; bool isValidCPUName(StringRef Name) const override; void fillValidCPUList(SmallVectorImpl<StringRef> &Values) const override; diff --git a/clang/lib/Basic/Targets/ARM.cpp b/clang/lib/Basic/Targets/ARM.cpp index 478a0233398d..9c9d198e8f32 100644 --- a/clang/lib/Basic/Targets/ARM.cpp +++ b/clang/lib/Basic/Targets/ARM.cpp @@ -371,13 +371,34 @@ bool ARMTargetInfo::setABI(const std::string &Name) { return false; } -bool ARMTargetInfo::validateBranchProtection(StringRef Spec, +bool ARMTargetInfo::isBranchProtectionSupportedArch(StringRef Arch) const { + llvm::ARM::ArchKind CPUArch = llvm::ARM::parseCPUArch(Arch); + if (CPUArch == llvm::ARM::ArchKind::INVALID) + CPUArch = llvm::ARM::parseArch(getTriple().getArchName()); + + if (CPUArch == llvm::ARM::ArchKind::INVALID) + return false; + + StringRef ArchFeature = llvm::ARM::getArchName(CPUArch); + auto a = + llvm::Triple(ArchFeature, getTriple().getVendorName(), + getTriple().getOSName(), getTriple().getEnvironmentName()); + + StringRef SubArch = llvm::ARM::getSubArch(CPUArch); + llvm::ARM::ProfileKind Profile = llvm::ARM::parseArchProfile(SubArch); + return a.isArmT32() && (Profile == llvm::ARM::ProfileKind::M); +} + +bool ARMTargetInfo::validateBranchProtection(StringRef Spec, StringRef Arch, BranchProtectionInfo &BPI, StringRef &Err) const { llvm::ARM::ParsedBranchProtection PBP; if (!llvm::ARM::parseBranchProtection(Spec, PBP, Err)) return false; + if (!isBranchProtectionSupportedArch(Arch)) + return false; + BPI.SignReturnAddr = llvm::StringSwitch<LangOptions::SignReturnAddressScopeKind>(PBP.Scope) .Case("non-leaf", LangOptions::SignReturnAddressScopeKind::NonLeaf) diff --git a/clang/lib/Basic/Targets/ARM.h b/clang/lib/Basic/Targets/ARM.h index f074dac57f9b..e85336b6e32f 100644 --- a/clang/lib/Basic/Targets/ARM.h +++ b/clang/lib/Basic/Targets/ARM.h @@ -126,8 +126,10 @@ public: StringRef getABI() const override; bool setABI(const std::string &Name) override; - bool validateBranchProtection(StringRef, BranchProtectionInfo &, - StringRef &) const override; + bool isBranchProtectionSupportedArch(StringRef Arch) const override; + bool validateBranchProtection(StringRef Spec, StringRef Arch, + BranchProtectionInfo &BPI, + StringRef &Err) const override; // FIXME: This should be based on Arch attributes, not CPU names. bool diff --git a/clang/lib/Basic/Targets/WebAssembly.cpp b/clang/lib/Basic/Targets/WebAssembly.cpp index 4cba861f61d2..2309997eb77b 100644 --- a/clang/lib/Basic/Targets/WebAssembly.cpp +++ b/clang/lib/Basic/Targets/WebAssembly.cpp @@ -260,6 +260,7 @@ void WebAssemblyTargetInfo::adjust(DiagnosticsEngine &Diags, if (!HasAtomics) { Opts.POSIXThreads = false; Opts.setThreadModel(LangOptions::ThreadModelKind::Single); + Opts.ThreadsafeStatics = false; } } diff --git a/clang/lib/CodeGen/BackendUtil.cpp b/clang/lib/CodeGen/BackendUtil.cpp index 9ae5c870afc8..a4d330c0ba93 100644 --- a/clang/lib/CodeGen/BackendUtil.cpp +++ b/clang/lib/CodeGen/BackendUtil.cpp @@ -84,6 +84,7 @@ #include "llvm/Transforms/Utils/CanonicalizeAliases.h" #include "llvm/Transforms/Utils/Debugify.h" #include "llvm/Transforms/Utils/EntryExitInstrumenter.h" +#include "llvm/Transforms/Utils/ModuleUtils.h" #include "llvm/Transforms/Utils/NameAnonGlobals.h" #include "llvm/Transforms/Utils/SymbolRewriter.h" #include <memory> @@ -1745,8 +1746,36 @@ void clang::EmbedBitcode(llvm::Module *M, const CodeGenOptions &CGOpts, llvm::MemoryBufferRef Buf) { if (CGOpts.getEmbedBitcode() == CodeGenOptions::Embed_Off) return; - llvm::EmbedBitcodeInModule( + llvm::embedBitcodeInModule( *M, Buf, CGOpts.getEmbedBitcode() != CodeGenOptions::Embed_Marker, CGOpts.getEmbedBitcode() != CodeGenOptions::Embed_Bitcode, CGOpts.CmdArgs); } + +void clang::EmbedObject(llvm::Module *M, const CodeGenOptions &CGOpts, + DiagnosticsEngine &Diags) { + if (CGOpts.OffloadObjects.empty()) + return; + + for (StringRef OffloadObject : CGOpts.OffloadObjects) { + if (OffloadObject.count(',') != 1) { + Diags.Report(Diags.getCustomDiagID( + DiagnosticsEngine::Error, "Invalid string pair for embedding '%0'")) + << OffloadObject; + return; + } + auto FilenameAndSection = OffloadObject.split(','); + llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> ObjectOrErr = + llvm::MemoryBuffer::getFileOrSTDIN(std::get<0>(FilenameAndSection)); + if (std::error_code EC = ObjectOrErr.getError()) { + auto DiagID = Diags.getCustomDiagID(DiagnosticsEngine::Error, + "could not open '%0' for embedding"); + Diags.Report(DiagID) << std::get<0>(FilenameAndSection); + return; + } + + SmallString<128> SectionName( + {".llvm.offloading.", std::get<1>(FilenameAndSection)}); + llvm::embedBufferInModule(*M, **ObjectOrErr, SectionName); + } +} diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 2b7862e618bd..d071c7a5b4a4 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -9777,6 +9777,18 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, return Builder.CreateCall(F, {Arg0, Arg1}); } + // Memory Operations (MOPS) + if (BuiltinID == AArch64::BI__builtin_arm_mops_memset_tag) { + Value *Dst = EmitScalarExpr(E->getArg(0)); + Value *Val = EmitScalarExpr(E->getArg(1)); + Value *Size = EmitScalarExpr(E->getArg(2)); + Dst = Builder.CreatePointerCast(Dst, Int8PtrTy); + Val = Builder.CreateTrunc(Val, Int8Ty); + Size = Builder.CreateIntCast(Size, Int64Ty, false); + return Builder.CreateCall( + CGM.getIntrinsic(Intrinsic::aarch64_mops_memset_tag), {Dst, Val, Size}); + } + // Memory Tagging Extensions (MTE) Intrinsics Intrinsic::ID MTEIntrinsicID = Intrinsic::not_intrinsic; switch (BuiltinID) { diff --git a/clang/lib/CodeGen/CGClass.cpp b/clang/lib/CodeGen/CGClass.cpp index 520e119ada26..76b90924750c 100644 --- a/clang/lib/CodeGen/CGClass.cpp +++ b/clang/lib/CodeGen/CGClass.cpp @@ -162,7 +162,8 @@ CodeGenFunction::EmitCXXMemberDataPointerAddress(const Expr *E, Address base, CGM.getDynamicOffsetAlignment(base.getAlignment(), memberPtrType->getClass()->getAsCXXRecordDecl(), memberAlign); - return Address(ptr, memberAlign); + return Address(ptr, ConvertTypeForMem(memberPtrType->getPointeeType()), + memberAlign); } CharUnits CodeGenModule::computeNonVirtualBaseClassOffset( diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp index 0fb7ec26a85e..bb5d18b74894 100644 --- a/clang/lib/CodeGen/CGExpr.cpp +++ b/clang/lib/CodeGen/CGExpr.cpp @@ -150,7 +150,7 @@ Address CodeGenFunction::CreateMemTemp(QualType Ty, CharUnits Align, Result = Address( Builder.CreateBitCast(Result.getPointer(), VectorTy->getPointerTo()), - Result.getAlignment()); + VectorTy, Result.getAlignment()); } return Result; } diff --git a/clang/lib/CodeGen/CGExprAgg.cpp b/clang/lib/CodeGen/CGExprAgg.cpp index 0968afd82064..73b05690537d 100644 --- a/clang/lib/CodeGen/CGExprAgg.cpp +++ b/clang/lib/CodeGen/CGExprAgg.cpp @@ -1834,8 +1834,8 @@ void AggExprEmitter::VisitArrayInitLoopExpr(const ArrayInitLoopExpr *E, // at the end of each iteration. CodeGenFunction::RunCleanupsScope CleanupsScope(CGF); CodeGenFunction::ArrayInitLoopExprScope Scope(CGF, index); - LValue elementLV = - CGF.MakeAddrLValue(Address(element, elementAlign), elementType); + LValue elementLV = CGF.MakeAddrLValue( + Address(element, llvmElementType, elementAlign), elementType); if (InnerLoop) { // If the subexpression is an ArrayInitLoopExpr, share its cleanup. diff --git a/clang/lib/CodeGen/CGExprCXX.cpp b/clang/lib/CodeGen/CGExprCXX.cpp index 0571c498c377..f06d21861740 100644 --- a/clang/lib/CodeGen/CGExprCXX.cpp +++ b/clang/lib/CodeGen/CGExprCXX.cpp @@ -1108,10 +1108,10 @@ void CodeGenFunction::EmitNewArrayInitializer( StoreAnyExprIntoOneUnit(*this, ILE->getInit(i), ILE->getInit(i)->getType(), CurPtr, AggValueSlot::DoesNotOverlap); - CurPtr = Address(Builder.CreateInBoundsGEP(CurPtr.getElementType(), - CurPtr.getPointer(), - Builder.getSize(1), - "array.exp.next"), + CurPtr = Address(Builder.CreateInBoundsGEP( + CurPtr.getElementType(), CurPtr.getPointer(), + Builder.getSize(1), "array.exp.next"), + CurPtr.getElementType(), StartAlign.alignmentAtOffset((i + 1) * ElementSize)); } diff --git a/clang/lib/CodeGen/CGExprConstant.cpp b/clang/lib/CodeGen/CGExprConstant.cpp index cf1f2e0eab92..ac4b4d1308ab 100644 --- a/clang/lib/CodeGen/CGExprConstant.cpp +++ b/clang/lib/CodeGen/CGExprConstant.cpp @@ -851,6 +851,7 @@ bool ConstStructBuilder::Build(const APValue &Val, const RecordDecl *RD, } llvm::Constant *ConstStructBuilder::Finalize(QualType Type) { + Type = Type.getNonReferenceType(); RecordDecl *RD = Type->castAs<RecordType>()->getDecl(); llvm::Type *ValTy = CGM.getTypes().ConvertType(Type); return Builder.build(ValTy, RD->hasFlexibleArrayMember()); diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp index e09ea5e01b1a..2d5511336851 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp @@ -1125,20 +1125,25 @@ void CGOpenMPRuntimeGPU::createOffloadEntry(llvm::Constant *ID, llvm::GlobalValue::LinkageTypes) { // TODO: Add support for global variables on the device after declare target // support. - if (!isa<llvm::Function>(Addr)) + llvm::Function *Fn = dyn_cast<llvm::Function>(Addr); + if (!Fn) return; + llvm::Module &M = CGM.getModule(); llvm::LLVMContext &Ctx = CGM.getLLVMContext(); - // Get "nvvm.annotations" metadata node + // Get "nvvm.annotations" metadata node. llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("nvvm.annotations"); llvm::Metadata *MDVals[] = { - llvm::ConstantAsMetadata::get(Addr), llvm::MDString::get(Ctx, "kernel"), + llvm::ConstantAsMetadata::get(Fn), llvm::MDString::get(Ctx, "kernel"), llvm::ConstantAsMetadata::get( llvm::ConstantInt::get(llvm::Type::getInt32Ty(Ctx), 1))}; - // Append metadata to nvvm.annotations + // Append metadata to nvvm.annotations. MD->addOperand(llvm::MDNode::get(Ctx, MDVals)); + + // Add a function attribute for the kernel. + Fn->addFnAttr(llvm::Attribute::get(Ctx, "kernel")); } void CGOpenMPRuntimeGPU::emitTargetOutlinedFunction( @@ -1198,7 +1203,8 @@ CGOpenMPRuntimeGPU::CGOpenMPRuntimeGPU(CodeGenModule &CGM) llvm_unreachable("OpenMP can only handle device code."); llvm::OpenMPIRBuilder &OMPBuilder = getOMPBuilder(); - if (CGM.getLangOpts().OpenMPTargetNewRuntime) { + if (CGM.getLangOpts().OpenMPTargetNewRuntime && + !CGM.getLangOpts().OMPHostIRFile.empty()) { OMPBuilder.createGlobalFlag(CGM.getLangOpts().OpenMPTargetDebug, "__omp_rtl_debug_kind"); OMPBuilder.createGlobalFlag(CGM.getLangOpts().OpenMPTeamSubscription, diff --git a/clang/lib/CodeGen/CGStmt.cpp b/clang/lib/CodeGen/CGStmt.cpp index 520483bc08b6..9e939bb545ad 100644 --- a/clang/lib/CodeGen/CGStmt.cpp +++ b/clang/lib/CodeGen/CGStmt.cpp @@ -385,7 +385,7 @@ void CodeGenFunction::EmitStmt(const Stmt *S, ArrayRef<const Attr *> Attrs) { cast<OMPTargetTeamsDistributeSimdDirective>(*S)); break; case Stmt::OMPInteropDirectiveClass: - llvm_unreachable("Interop directive not supported yet."); + EmitOMPInteropDirective(cast<OMPInteropDirective>(*S)); break; case Stmt::OMPDispatchDirectiveClass: llvm_unreachable("Dispatch directive not supported yet."); diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp index 0db59dd2624c..39dd4c00765d 100644 --- a/clang/lib/CodeGen/CGStmtOpenMP.cpp +++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp @@ -30,6 +30,7 @@ #include "llvm/IR/Constants.h" #include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Metadata.h" #include "llvm/Support/AtomicOrdering.h" using namespace clang; @@ -6568,6 +6569,60 @@ void CodeGenFunction::EmitOMPTeamsDistributeParallelForSimdDirective( [](CodeGenFunction &) { return nullptr; }); } +void CodeGenFunction::EmitOMPInteropDirective(const OMPInteropDirective &S) { + llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); + llvm::Value *Device = nullptr; + if (const auto *C = S.getSingleClause<OMPDeviceClause>()) + Device = EmitScalarExpr(C->getDevice()); + + llvm::Value *NumDependences = nullptr; + llvm::Value *DependenceAddress = nullptr; + if (const auto *DC = S.getSingleClause<OMPDependClause>()) { + OMPTaskDataTy::DependData Dependencies(DC->getDependencyKind(), + DC->getModifier()); + Dependencies.DepExprs.append(DC->varlist_begin(), DC->varlist_end()); + std::pair<llvm::Value *, Address> DependencePair = + CGM.getOpenMPRuntime().emitDependClause(*this, Dependencies, + DC->getBeginLoc()); + NumDependences = DependencePair.first; + DependenceAddress = Builder.CreatePointerCast( + DependencePair.second.getPointer(), CGM.Int8PtrTy); + } + + assert(!(S.hasClausesOfKind<OMPNowaitClause>() && + !(S.getSingleClause<OMPInitClause>() || + S.getSingleClause<OMPDestroyClause>() || + S.getSingleClause<OMPUseClause>())) && + "OMPNowaitClause clause is used separately in OMPInteropDirective."); + + if (const auto *C = S.getSingleClause<OMPInitClause>()) { + llvm::Value *InteropvarPtr = + EmitLValue(C->getInteropVar()).getPointer(*this); + llvm::omp::OMPInteropType InteropType = llvm::omp::OMPInteropType::Unknown; + if (C->getIsTarget()) { + InteropType = llvm::omp::OMPInteropType::Target; + } else { + assert(C->getIsTargetSync() && "Expected interop-type target/targetsync"); + InteropType = llvm::omp::OMPInteropType::TargetSync; + } + OMPBuilder.createOMPInteropInit(Builder, InteropvarPtr, InteropType, Device, + NumDependences, DependenceAddress, + S.hasClausesOfKind<OMPNowaitClause>()); + } else if (const auto *C = S.getSingleClause<OMPDestroyClause>()) { + llvm::Value *InteropvarPtr = + EmitLValue(C->getInteropVar()).getPointer(*this); + OMPBuilder.createOMPInteropDestroy(Builder, InteropvarPtr, Device, + NumDependences, DependenceAddress, + S.hasClausesOfKind<OMPNowaitClause>()); + } else if (const auto *C = S.getSingleClause<OMPUseClause>()) { + llvm::Value *InteropvarPtr = + EmitLValue(C->getInteropVar()).getPointer(*this); + OMPBuilder.createOMPInteropUse(Builder, InteropvarPtr, Device, + NumDependences, DependenceAddress, + S.hasClausesOfKind<OMPNowaitClause>()); + } +} + static void emitTargetTeamsDistributeParallelForRegion( CodeGenFunction &CGF, const OMPTargetTeamsDistributeParallelForDirective &S, PrePostActionTy &Action) { diff --git a/clang/lib/CodeGen/CodeGenAction.cpp b/clang/lib/CodeGen/CodeGenAction.cpp index b72b16cf2b5f..c2c508dedb09 100644 --- a/clang/lib/CodeGen/CodeGenAction.cpp +++ b/clang/lib/CodeGen/CodeGenAction.cpp @@ -1134,6 +1134,7 @@ void CodeGenAction::ExecuteAction() { TheModule->setTargetTriple(TargetOpts.Triple); } + EmbedObject(TheModule.get(), CodeGenOpts, Diagnostics); EmbedBitcode(TheModule.get(), CodeGenOpts, *MainFile); LLVMContext &Ctx = TheModule->getContext(); diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h index 6db888dcec08..df99cd9a1b79 100644 --- a/clang/lib/CodeGen/CodeGenFunction.h +++ b/clang/lib/CodeGen/CodeGenFunction.h @@ -3562,6 +3562,7 @@ public: void EmitOMPTargetTeamsDistributeSimdDirective( const OMPTargetTeamsDistributeSimdDirective &S); void EmitOMPGenericLoopDirective(const OMPGenericLoopDirective &S); + void EmitOMPInteropDirective(const OMPInteropDirective &S); /// Emit device code for the target directive. static void EmitOMPTargetDeviceFunction(CodeGenModule &CGM, diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp index d534cf182f5a..2346176a1562 100644 --- a/clang/lib/CodeGen/CodeGenModule.cpp +++ b/clang/lib/CodeGen/CodeGenModule.cpp @@ -731,6 +731,7 @@ void CodeGenModule::Release() { "tag-stack-memory-buildattr", 1); if (Arch == llvm::Triple::thumb || Arch == llvm::Triple::thumbeb || + Arch == llvm::Triple::arm || Arch == llvm::Triple::armeb || Arch == llvm::Triple::aarch64 || Arch == llvm::Triple::aarch64_32 || Arch == llvm::Triple::aarch64_be) { getModule().addModuleFlag(llvm::Module::Error, "branch-target-enforcement", @@ -742,11 +743,9 @@ void CodeGenModule::Release() { getModule().addModuleFlag(llvm::Module::Error, "sign-return-address-all", LangOpts.isSignReturnAddressScopeAll()); - if (Arch != llvm::Triple::thumb && Arch != llvm::Triple::thumbeb) { - getModule().addModuleFlag(llvm::Module::Error, - "sign-return-address-with-bkey", - !LangOpts.isSignReturnAddressWithAKey()); - } + getModule().addModuleFlag(llvm::Module::Error, + "sign-return-address-with-bkey", + !LangOpts.isSignReturnAddressWithAKey()); } if (!CodeGenOpts.MemoryProfileOutput.empty()) { diff --git a/clang/lib/CodeGen/CodeGenModule.h b/clang/lib/CodeGen/CodeGenModule.h index e803022508a4..1fcd5d4d808a 100644 --- a/clang/lib/CodeGen/CodeGenModule.h +++ b/clang/lib/CodeGen/CodeGenModule.h @@ -394,13 +394,6 @@ private: llvm::MapVector<GlobalDecl, StringRef> MangledDeclNames; llvm::StringMap<GlobalDecl, llvm::BumpPtrAllocator> Manglings; - // An ordered map of canonical GlobalDecls paired with the cpu-index for - // cpu-specific name manglings. - llvm::MapVector<std::pair<GlobalDecl, unsigned>, StringRef> - CPUSpecificMangledDeclNames; - llvm::StringMap<std::pair<GlobalDecl, unsigned>, llvm::BumpPtrAllocator> - CPUSpecificManglings; - /// Global annotations. std::vector<llvm::Constant*> Annotations; diff --git a/clang/lib/CodeGen/TargetInfo.cpp b/clang/lib/CodeGen/TargetInfo.cpp index fb81169003fc..8a0150218a7a 100644 --- a/clang/lib/CodeGen/TargetInfo.cpp +++ b/clang/lib/CodeGen/TargetInfo.cpp @@ -5563,8 +5563,8 @@ public: TargetInfo::BranchProtectionInfo BPI; StringRef Error; - (void)CGM.getTarget().validateBranchProtection(Attr.BranchProtection, - BPI, Error); + (void)CGM.getTarget().validateBranchProtection( + Attr.BranchProtection, Attr.Architecture, BPI, Error); assert(Error.empty()); auto *Fn = cast<llvm::Function>(GV); @@ -6377,17 +6377,36 @@ public: if (!Attr.BranchProtection.empty()) { TargetInfo::BranchProtectionInfo BPI; StringRef DiagMsg; - (void)CGM.getTarget().validateBranchProtection(Attr.BranchProtection, - BPI, DiagMsg); - - static const char *SignReturnAddrStr[] = {"none", "non-leaf", "all"}; - assert(static_cast<unsigned>(BPI.SignReturnAddr) <= 2 && - "Unexpected SignReturnAddressScopeKind"); - Fn->addFnAttr("sign-return-address", - SignReturnAddrStr[static_cast<int>(BPI.SignReturnAddr)]); - - Fn->addFnAttr("branch-target-enforcement", - BPI.BranchTargetEnforcement ? "true" : "false"); + StringRef Arch = Attr.Architecture.empty() + ? CGM.getTarget().getTargetOpts().CPU + : Attr.Architecture; + if (!CGM.getTarget().validateBranchProtection(Attr.BranchProtection, + Arch, BPI, DiagMsg)) { + CGM.getDiags().Report( + D->getLocation(), + diag::warn_target_unsupported_branch_protection_attribute) + << Arch; + } else { + static const char *SignReturnAddrStr[] = {"none", "non-leaf", "all"}; + assert(static_cast<unsigned>(BPI.SignReturnAddr) <= 2 && + "Unexpected SignReturnAddressScopeKind"); + Fn->addFnAttr( + "sign-return-address", + SignReturnAddrStr[static_cast<int>(BPI.SignReturnAddr)]); + + Fn->addFnAttr("branch-target-enforcement", + BPI.BranchTargetEnforcement ? "true" : "false"); + } + } else if (CGM.getLangOpts().BranchTargetEnforcement || + CGM.getLangOpts().hasSignReturnAddress()) { + // If the Branch Protection attribute is missing, validate the target + // Architecture attribute against Branch Protection command line + // settings. + if (!CGM.getTarget().isBranchProtectionSupportedArch(Attr.Architecture)) + CGM.getDiags().Report( + D->getLocation(), + diag::warn_target_unsupported_branch_protection_attribute) + << Attr.Architecture; } } @@ -8285,12 +8304,14 @@ public: // Check if global/static variable is defined in address space // 1~6 (__flash, __flash1, __flash2, __flash3, __flash4, __flash5) // but not constant. - LangAS AS = D->getType().getAddressSpace(); - if (isTargetAddressSpace(AS) && 1 <= toTargetAddressSpace(AS) && - toTargetAddressSpace(AS) <= 6 && !D->getType().isConstQualified()) - CGM.getDiags().Report(D->getLocation(), - diag::err_verify_nonconst_addrspace) - << "__flash*"; + if (D) { + LangAS AS = D->getType().getAddressSpace(); + if (isTargetAddressSpace(AS) && 1 <= toTargetAddressSpace(AS) && + toTargetAddressSpace(AS) <= 6 && !D->getType().isConstQualified()) + CGM.getDiags().Report(D->getLocation(), + diag::err_verify_nonconst_addrspace) + << "__flash*"; + } return TargetCodeGenInfo::getGlobalVarAddressSpace(CGM, D); } diff --git a/clang/lib/Driver/Action.cpp b/clang/lib/Driver/Action.cpp index e2d2f6c22de0..eb08bfe9cde5 100644 --- a/clang/lib/Driver/Action.cpp +++ b/clang/lib/Driver/Action.cpp @@ -43,6 +43,8 @@ const char *Action::getClassName(ActionClass AC) { return "clang-offload-unbundler"; case OffloadWrapperJobClass: return "clang-offload-wrapper"; + case LinkerWrapperJobClass: + return "clang-linker-wrapper"; case StaticLibJobClass: return "static-lib-linker"; } @@ -418,6 +420,12 @@ OffloadWrapperJobAction::OffloadWrapperJobAction(ActionList &Inputs, types::ID Type) : JobAction(OffloadWrapperJobClass, Inputs, Type) {} +void LinkerWrapperJobAction::anchor() {} + +LinkerWrapperJobAction::LinkerWrapperJobAction(ActionList &Inputs, + types::ID Type) + : JobAction(LinkerWrapperJobClass, Inputs, Type) {} + void StaticLibJobAction::anchor() {} StaticLibJobAction::StaticLibJobAction(ActionList &Inputs, types::ID Type) diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp index 2e4ebc10e9ba..3bfddeefc7b2 100644 --- a/clang/lib/Driver/Driver.cpp +++ b/clang/lib/Driver/Driver.cpp @@ -103,39 +103,58 @@ using namespace clang; using namespace llvm::opt; static llvm::Optional<llvm::Triple> -getHIPOffloadTargetTriple(const Driver &D, const ArgList &Args) { - if (Args.hasArg(options::OPT_offload_EQ)) { - auto HIPOffloadTargets = Args.getAllArgValues(options::OPT_offload_EQ); +getOffloadTargetTriple(const Driver &D, const ArgList &Args) { + auto OffloadTargets = Args.getAllArgValues(options::OPT_offload_EQ); + // Offload compilation flow does not support multiple targets for now. We + // need the HIPActionBuilder (and possibly the CudaActionBuilder{,Base}too) + // to support multiple tool chains first. + switch (OffloadTargets.size()) { + default: + D.Diag(diag::err_drv_only_one_offload_target_supported); + return llvm::None; + case 0: + D.Diag(diag::err_drv_invalid_or_unsupported_offload_target) << ""; + return llvm::None; + case 1: + break; + } + return llvm::Triple(OffloadTargets[0]); +} - // HIP compilation flow does not support multiple targets for now. We need - // the HIPActionBuilder (and possibly the CudaActionBuilder{,Base}too) to - // support multiple tool chains first. - switch (HIPOffloadTargets.size()) { - default: - D.Diag(diag::err_drv_only_one_offload_target_supported_in) << "HIP"; - return llvm::None; - case 0: - D.Diag(diag::err_drv_invalid_or_unsupported_offload_target) << ""; - return llvm::None; - case 1: - break; - } - llvm::Triple TT(HIPOffloadTargets[0]); - if (TT.getArch() == llvm::Triple::amdgcn && - TT.getVendor() == llvm::Triple::AMD && - TT.getOS() == llvm::Triple::AMDHSA) - return TT; - if (TT.getArch() == llvm::Triple::spirv64 && - TT.getVendor() == llvm::Triple::UnknownVendor && - TT.getOS() == llvm::Triple::UnknownOS) +static llvm::Optional<llvm::Triple> +getNVIDIAOffloadTargetTriple(const Driver &D, const ArgList &Args, + const llvm::Triple &HostTriple) { + if (!Args.hasArg(options::OPT_offload_EQ)) { + return llvm::Triple(HostTriple.isArch64Bit() ? "nvptx64-nvidia-cuda" + : "nvptx-nvidia-cuda"); + } + auto TT = getOffloadTargetTriple(D, Args); + if (TT && (TT->getArch() == llvm::Triple::spirv32 || + TT->getArch() == llvm::Triple::spirv64)) { + if (Args.hasArg(options::OPT_emit_llvm)) return TT; - D.Diag(diag::err_drv_invalid_or_unsupported_offload_target) - << HIPOffloadTargets[0]; + D.Diag(diag::err_drv_cuda_offload_only_emit_bc); return llvm::None; } - - static const llvm::Triple T("amdgcn-amd-amdhsa"); // Default HIP triple. - return T; + D.Diag(diag::err_drv_invalid_or_unsupported_offload_target) << TT->str(); + return llvm::None; +} +static llvm::Optional<llvm::Triple> +getHIPOffloadTargetTriple(const Driver &D, const ArgList &Args) { + if (!Args.hasArg(options::OPT_offload_EQ)) { + return llvm::Triple("amdgcn-amd-amdhsa"); // Default HIP triple. + } + auto TT = getOffloadTargetTriple(D, Args); + if (!TT) + return llvm::None; + if (TT->getArch() == llvm::Triple::amdgcn && + TT->getVendor() == llvm::Triple::AMD && + TT->getOS() == llvm::Triple::AMDHSA) + return TT; + if (TT->getArch() == llvm::Triple::spirv64) + return TT; + D.Diag(diag::err_drv_invalid_or_unsupported_offload_target) << TT->str(); + return llvm::None; } // static @@ -719,17 +738,17 @@ void Driver::CreateOffloadingDeviceToolChains(Compilation &C, if (IsCuda) { const ToolChain *HostTC = C.getSingleOffloadToolChain<Action::OFK_Host>(); const llvm::Triple &HostTriple = HostTC->getTriple(); - StringRef DeviceTripleStr; auto OFK = Action::OFK_Cuda; - DeviceTripleStr = - HostTriple.isArch64Bit() ? "nvptx64-nvidia-cuda" : "nvptx-nvidia-cuda"; - llvm::Triple CudaTriple(DeviceTripleStr); + auto CudaTriple = + getNVIDIAOffloadTargetTriple(*this, C.getInputArgs(), HostTriple); + if (!CudaTriple) + return; // Use the CUDA and host triples as the key into the ToolChains map, // because the device toolchain we create depends on both. - auto &CudaTC = ToolChains[CudaTriple.str() + "/" + HostTriple.str()]; + auto &CudaTC = ToolChains[CudaTriple->str() + "/" + HostTriple.str()]; if (!CudaTC) { CudaTC = std::make_unique<toolchains::CudaToolChain>( - *this, CudaTriple, *HostTC, C.getInputArgs(), OFK); + *this, *CudaTriple, *HostTC, C.getInputArgs(), OFK); } C.addOffloadDeviceToolChain(CudaTC.get(), OFK); } else if (IsHIP) { @@ -773,21 +792,9 @@ void Driver::CreateOffloadingDeviceToolChains(Compilation &C, if (HasValidOpenMPRuntime) { llvm::StringMap<const char *> FoundNormalizedTriples; for (const char *Val : OpenMPTargets->getValues()) { - llvm::Triple TT(Val); + llvm::Triple TT(ToolChain::getOpenMPTriple(Val)); std::string NormalizedName = TT.normalize(); - // We want to expand the shortened versions of the triples passed in to - // the values used for the bitcode libraries for convenience. - if (TT.getVendor() == llvm::Triple::UnknownVendor || - TT.getOS() == llvm::Triple::UnknownOS) { - if (TT.getArch() == llvm::Triple::nvptx) - TT = llvm::Triple("nvptx-nvidia-cuda"); - else if (TT.getArch() == llvm::Triple::nvptx64) - TT = llvm::Triple("nvptx64-nvidia-cuda"); - else if (TT.getArch() == llvm::Triple::amdgcn) - TT = llvm::Triple("amdgcn-amd-amdhsa"); - } - // Make sure we don't have a duplicate triple. auto Duplicate = FoundNormalizedTriples.find(NormalizedName); if (Duplicate != FoundNormalizedTriples.end()) { @@ -3823,6 +3830,11 @@ void Driver::BuildActions(Compilation &C, DerivedArgList &Args, // Builder to be used to build offloading actions. OffloadingActionBuilder OffloadBuilder(C, Args, Inputs); + // Offload kinds active for this compilation. + unsigned OffloadKinds = Action::OFK_None; + if (C.hasOffloadToolChain<Action::OFK_OpenMP>()) + OffloadKinds |= Action::OFK_OpenMP; + // Construct the actions to perform. HeaderModulePrecompileJobAction *HeaderModuleAction = nullptr; ActionList LinkerInputs; @@ -3843,14 +3855,16 @@ void Driver::BuildActions(Compilation &C, DerivedArgList &Args, // Use the current host action in any of the offloading actions, if // required. - if (OffloadBuilder.addHostDependenceToDeviceActions(Current, InputArg)) - break; + if (!Args.hasArg(options::OPT_fopenmp_new_driver)) + if (OffloadBuilder.addHostDependenceToDeviceActions(Current, InputArg)) + break; for (phases::ID Phase : PL) { // Add any offload action the host action depends on. - Current = OffloadBuilder.addDeviceDependencesToHostAction( - Current, InputArg, Phase, PL.back(), FullPL); + if (!Args.hasArg(options::OPT_fopenmp_new_driver)) + Current = OffloadBuilder.addDeviceDependencesToHostAction( + Current, InputArg, Phase, PL.back(), FullPL); if (!Current) break; @@ -3883,6 +3897,11 @@ void Driver::BuildActions(Compilation &C, DerivedArgList &Args, break; } + // Try to build the offloading actions and add the result as a dependency + // to the host. + if (Args.hasArg(options::OPT_fopenmp_new_driver)) + Current = BuildOffloadingActions(C, Args, I, Current); + // FIXME: Should we include any prior module file outputs as inputs of // later actions in the same command line? @@ -3900,8 +3919,9 @@ void Driver::BuildActions(Compilation &C, DerivedArgList &Args, // Use the current host action in any of the offloading actions, if // required. - if (OffloadBuilder.addHostDependenceToDeviceActions(Current, InputArg)) - break; + if (!Args.hasArg(options::OPT_fopenmp_new_driver)) + if (OffloadBuilder.addHostDependenceToDeviceActions(Current, InputArg)) + break; if (Current->getType() == types::TY_Nothing) break; @@ -3912,7 +3932,11 @@ void Driver::BuildActions(Compilation &C, DerivedArgList &Args, Actions.push_back(Current); // Add any top level actions generated for offloading. - OffloadBuilder.appendTopLevelActions(Actions, Current, InputArg); + if (!Args.hasArg(options::OPT_fopenmp_new_driver)) + OffloadBuilder.appendTopLevelActions(Actions, Current, InputArg); + else if (Current) + Current->propagateHostOffloadInfo(OffloadKinds, + /*BoundArch=*/nullptr); } // Add a link action if necessary. @@ -3924,16 +3948,23 @@ void Driver::BuildActions(Compilation &C, DerivedArgList &Args, } if (!LinkerInputs.empty()) { - if (Action *Wrapper = OffloadBuilder.makeHostLinkAction()) - LinkerInputs.push_back(Wrapper); + if (!Args.hasArg(options::OPT_fopenmp_new_driver)) + if (Action *Wrapper = OffloadBuilder.makeHostLinkAction()) + LinkerInputs.push_back(Wrapper); Action *LA; // Check if this Linker Job should emit a static library. if (ShouldEmitStaticLibrary(Args)) { LA = C.MakeAction<StaticLibJobAction>(LinkerInputs, types::TY_Image); + } else if (Args.hasArg(options::OPT_fopenmp_new_driver) && + OffloadKinds != Action::OFK_None) { + LA = C.MakeAction<LinkerWrapperJobAction>(LinkerInputs, types::TY_Image); + LA->propagateHostOffloadInfo(OffloadKinds, + /*BoundArch=*/nullptr); } else { LA = C.MakeAction<LinkJobAction>(LinkerInputs, types::TY_Image); } - LA = OffloadBuilder.processHostLinkAction(LA); + if (!Args.hasArg(options::OPT_fopenmp_new_driver)) + LA = OffloadBuilder.processHostLinkAction(LA); Actions.push_back(LA); } @@ -4019,6 +4050,68 @@ void Driver::BuildActions(Compilation &C, DerivedArgList &Args, Args.ClaimAllArgs(options::OPT_cuda_compile_host_device); } +Action *Driver::BuildOffloadingActions(Compilation &C, + llvm::opt::DerivedArgList &Args, + const InputTy &Input, + Action *HostAction) const { + if (!isa<CompileJobAction>(HostAction)) + return HostAction; + + SmallVector<const ToolChain *, 2> ToolChains; + ActionList DeviceActions; + + types::ID InputType = Input.first; + const Arg *InputArg = Input.second; + + auto OpenMPTCRange = C.getOffloadToolChains<Action::OFK_OpenMP>(); + for (auto TI = OpenMPTCRange.first, TE = OpenMPTCRange.second; TI != TE; ++TI) + ToolChains.push_back(TI->second); + + for (unsigned I = 0; I < ToolChains.size(); ++I) + DeviceActions.push_back(C.MakeAction<InputAction>(*InputArg, InputType)); + + if (DeviceActions.empty()) + return HostAction; + + auto PL = types::getCompilationPhases(*this, Args, InputType); + + for (phases::ID Phase : PL) { + if (Phase == phases::Link) { + assert(Phase == PL.back() && "linking must be final compilation step."); + break; + } + + auto TC = ToolChains.begin(); + for (Action *&A : DeviceActions) { + A = ConstructPhaseAction(C, Args, Phase, A, Action::OFK_OpenMP); + + if (isa<CompileJobAction>(A)) { + HostAction->setCannotBeCollapsedWithNextDependentAction(); + OffloadAction::HostDependence HDep( + *HostAction, *C.getSingleOffloadToolChain<Action::OFK_Host>(), + /*BourdArch=*/nullptr, Action::OFK_OpenMP); + OffloadAction::DeviceDependences DDep; + DDep.add(*A, **TC, /*BoundArch=*/nullptr, Action::OFK_OpenMP); + A = C.MakeAction<OffloadAction>(HDep, DDep); + } + ++TC; + } + } + + OffloadAction::DeviceDependences DDeps; + + auto TC = ToolChains.begin(); + for (Action *A : DeviceActions) { + DDeps.add(*A, **TC, /*BoundArch=*/nullptr, Action::OFK_OpenMP); + TC++; + } + + OffloadAction::HostDependence HDep( + *HostAction, *C.getSingleOffloadToolChain<Action::OFK_Host>(), + /*BoundArch=*/nullptr, DDeps); + return C.MakeAction<OffloadAction>(HDep, DDeps); +} + Action *Driver::ConstructPhaseAction( Compilation &C, const ArgList &Args, phases::ID Phase, Action *Input, Action::OffloadKind TargetDeviceOffloadKind) const { @@ -4110,6 +4203,12 @@ Action *Driver::ConstructPhaseAction( Args.hasArg(options::OPT_S) ? types::TY_LTO_IR : types::TY_LTO_BC; return C.MakeAction<BackendJobAction>(Input, Output); } + if (isUsingLTO(/* IsOffload */ true) && + TargetDeviceOffloadKind == Action::OFK_OpenMP) { + types::ID Output = + Args.hasArg(options::OPT_S) ? types::TY_LTO_IR : types::TY_LTO_BC; + return C.MakeAction<BackendJobAction>(Input, Output); + } if (Args.hasArg(options::OPT_emit_llvm) || (TargetDeviceOffloadKind == Action::OFK_HIP && Args.hasFlag(options::OPT_fgpu_rdc, options::OPT_fno_gpu_rdc, @@ -4181,7 +4280,7 @@ void Driver::BuildJobs(Compilation &C) const { ArchNames.insert(A->getValue()); // Set of (Action, canonical ToolChain triple) pairs we've built jobs for. - std::map<std::pair<const Action *, std::string>, InputInfo> CachedResults; + std::map<std::pair<const Action *, std::string>, InputInfoList> CachedResults; for (Action *A : C.getActions()) { // If we are linking an image for multiple archs then the linker wants // -arch_multiple and -final_output <final image name>. Unfortunately, this @@ -4638,10 +4737,11 @@ static std::string GetTriplePlusArchString(const ToolChain *TC, return TriplePlusArch; } -InputInfo Driver::BuildJobsForAction( +InputInfoList Driver::BuildJobsForAction( Compilation &C, const Action *A, const ToolChain *TC, StringRef BoundArch, bool AtTopLevel, bool MultipleArchs, const char *LinkingOutput, - std::map<std::pair<const Action *, std::string>, InputInfo> &CachedResults, + std::map<std::pair<const Action *, std::string>, InputInfoList> + &CachedResults, Action::OffloadKind TargetDeviceOffloadKind) const { std::pair<const Action *, std::string> ActionTC = { A, GetTriplePlusArchString(TC, BoundArch, TargetDeviceOffloadKind)}; @@ -4649,17 +4749,18 @@ InputInfo Driver::BuildJobsForAction( if (CachedResult != CachedResults.end()) { return CachedResult->second; } - InputInfo Result = BuildJobsForActionNoCache( + InputInfoList Result = BuildJobsForActionNoCache( C, A, TC, BoundArch, AtTopLevel, MultipleArchs, LinkingOutput, CachedResults, TargetDeviceOffloadKind); CachedResults[ActionTC] = Result; return Result; } -InputInfo Driver::BuildJobsForActionNoCache( +InputInfoList Driver::BuildJobsForActionNoCache( Compilation &C, const Action *A, const ToolChain *TC, StringRef BoundArch, bool AtTopLevel, bool MultipleArchs, const char *LinkingOutput, - std::map<std::pair<const Action *, std::string>, InputInfo> &CachedResults, + std::map<std::pair<const Action *, std::string>, InputInfoList> + &CachedResults, Action::OffloadKind TargetDeviceOffloadKind) const { llvm::PrettyStackTraceString CrashInfo("Building compilation jobs"); @@ -4697,7 +4798,7 @@ InputInfo Driver::BuildJobsForActionNoCache( // If there is a single device option, just generate the job for it. if (OA->hasSingleDeviceDependence()) { - InputInfo DevA; + InputInfoList DevA; OA->doOnEachDeviceDependence([&](Action *DepA, const ToolChain *DepTC, const char *DepBoundArch) { DevA = @@ -4715,7 +4816,7 @@ InputInfo Driver::BuildJobsForActionNoCache( OA->doOnEachDependence( /*IsHostDependence=*/BuildingForOffloadDevice, [&](Action *DepA, const ToolChain *DepTC, const char *DepBoundArch) { - OffloadDependencesInputInfo.push_back(BuildJobsForAction( + OffloadDependencesInputInfo.append(BuildJobsForAction( C, DepA, DepTC, DepBoundArch, /*AtTopLevel=*/false, /*MultipleArchs*/ !!DepBoundArch, LinkingOutput, CachedResults, DepA->getOffloadingDeviceKind())); @@ -4724,6 +4825,17 @@ InputInfo Driver::BuildJobsForActionNoCache( A = BuildingForOffloadDevice ? OA->getSingleDeviceDependence(/*DoNotConsiderHostActions=*/true) : OA->getHostDependence(); + + // We may have already built this action as a part of the offloading + // toolchain, return the cached input if so. + std::pair<const Action *, std::string> ActionTC = { + OA->getHostDependence(), + GetTriplePlusArchString(TC, BoundArch, TargetDeviceOffloadKind)}; + if (CachedResults.find(ActionTC) != CachedResults.end()) { + InputInfoList Inputs = CachedResults[ActionTC]; + Inputs.append(OffloadDependencesInputInfo); + return Inputs; + } } if (const InputAction *IA = dyn_cast<InputAction>(A)) { @@ -4733,9 +4845,9 @@ InputInfo Driver::BuildJobsForActionNoCache( Input.claim(); if (Input.getOption().matches(options::OPT_INPUT)) { const char *Name = Input.getValue(); - return InputInfo(A, Name, /* _BaseInput = */ Name); + return {InputInfo(A, Name, /* _BaseInput = */ Name)}; } - return InputInfo(A, &Input, /* _BaseInput = */ ""); + return {InputInfo(A, &Input, /* _BaseInput = */ "")}; } if (const BindArchAction *BAA = dyn_cast<BindArchAction>(A)) { @@ -4765,7 +4877,7 @@ InputInfo Driver::BuildJobsForActionNoCache( const Tool *T = TS.getTool(Inputs, CollapsedOffloadActions); if (!T) - return InputInfo(); + return {InputInfo()}; if (BuildingForOffloadDevice && A->getOffloadingDeviceKind() == Action::OFK_OpenMP) { @@ -4792,7 +4904,7 @@ InputInfo Driver::BuildJobsForActionNoCache( cast<OffloadAction>(OA)->doOnEachDependence( /*IsHostDependence=*/BuildingForOffloadDevice, [&](Action *DepA, const ToolChain *DepTC, const char *DepBoundArch) { - OffloadDependencesInputInfo.push_back(BuildJobsForAction( + OffloadDependencesInputInfo.append(BuildJobsForAction( C, DepA, DepTC, DepBoundArch, /* AtTopLevel */ false, /*MultipleArchs=*/!!DepBoundArch, LinkingOutput, CachedResults, DepA->getOffloadingDeviceKind())); @@ -4806,7 +4918,7 @@ InputInfo Driver::BuildJobsForActionNoCache( // FIXME: Clean this up. bool SubJobAtTopLevel = AtTopLevel && (isa<DsymutilJobAction>(A) || isa<VerifyJobAction>(A)); - InputInfos.push_back(BuildJobsForAction( + InputInfos.append(BuildJobsForAction( C, Input, TC, BoundArch, SubJobAtTopLevel, MultipleArchs, LinkingOutput, CachedResults, A->getOffloadingDeviceKind())); } @@ -4890,8 +5002,8 @@ InputInfo Driver::BuildJobsForActionNoCache( Arch = BoundArch; CachedResults[{A, GetTriplePlusArchString(UI.DependentToolChain, Arch, - UI.DependentOffloadKind)}] = - CurI; + UI.DependentOffloadKind)}] = { + CurI}; } // Now that we have all the results generated, select the one that should be @@ -4900,9 +5012,9 @@ InputInfo Driver::BuildJobsForActionNoCache( A, GetTriplePlusArchString(TC, BoundArch, TargetDeviceOffloadKind)}; assert(CachedResults.find(ActionTC) != CachedResults.end() && "Result does not exist??"); - Result = CachedResults[ActionTC]; + Result = CachedResults[ActionTC].front(); } else if (JA->getType() == types::TY_Nothing) - Result = InputInfo(A, BaseInput); + Result = {InputInfo(A, BaseInput)}; else { // We only have to generate a prefix for the host if this is not a top-level // action. @@ -4955,7 +5067,7 @@ InputInfo Driver::BuildJobsForActionNoCache( C.getArgsForToolChain(TC, BoundArch, JA->getOffloadingDeviceKind()), LinkingOutput); } - return Result; + return {Result}; } const char *Driver::getDefaultImageName() const { diff --git a/clang/lib/Driver/ToolChain.cpp b/clang/lib/Driver/ToolChain.cpp index 5fef1fb2ee5a..d657d21bfcdb 100644 --- a/clang/lib/Driver/ToolChain.cpp +++ b/clang/lib/Driver/ToolChain.cpp @@ -327,6 +327,12 @@ Tool *ToolChain::getOffloadWrapper() const { return OffloadWrapper.get(); } +Tool *ToolChain::getLinkerWrapper() const { + if (!LinkerWrapper) + LinkerWrapper.reset(new tools::LinkerWrapper(*this, getLink())); + return LinkerWrapper.get(); +} + Tool *ToolChain::getTool(Action::ActionClass AC) const { switch (AC) { case Action::AssembleJobClass: @@ -365,6 +371,8 @@ Tool *ToolChain::getTool(Action::ActionClass AC) const { case Action::OffloadWrapperJobClass: return getOffloadWrapper(); + case Action::LinkerWrapperJobClass: + return getLinkerWrapper(); } llvm_unreachable("Invalid tool kind."); @@ -1129,8 +1137,10 @@ llvm::opt::DerivedArgList *ToolChain::TranslateOpenMPTargetArgs( A->getOption().matches(options::OPT_Xopenmp_target); if (A->getOption().matches(options::OPT_Xopenmp_target_EQ)) { + llvm::Triple TT(getOpenMPTriple(A->getValue(0))); + // Passing device args: -Xopenmp-target=<triple> -opt=val. - if (A->getValue(0) == getTripleString()) + if (TT.getTriple() == getTripleString()) Index = Args.getBaseArgs().MakeIndex(A->getValue(1)); else continue; diff --git a/clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp b/clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp index 6899f9360da5..d7cf41e4b660 100644 --- a/clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp +++ b/clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp @@ -285,6 +285,10 @@ void AMDGPUOpenMPToolChain::addClangTargetOptions( if (DriverArgs.hasArg(options::OPT_nogpulib)) return; + // Link the bitcode library late if we're using device LTO. + if (getDriver().isUsingLTO(/* IsOffload */ true)) + return; + std::string BitcodeSuffix; if (DriverArgs.hasFlag(options::OPT_fopenmp_target_new_runtime, options::OPT_fno_openmp_target_new_runtime, true)) diff --git a/clang/lib/Driver/ToolChains/AVR.cpp b/clang/lib/Driver/ToolChains/AVR.cpp index a66cae8b4d6b..2cf16cf9fdb4 100644 --- a/clang/lib/Driver/ToolChains/AVR.cpp +++ b/clang/lib/Driver/ToolChains/AVR.cpp @@ -379,6 +379,11 @@ void AVRToolChain::addClangTargetOptions( if (!DriverArgs.hasFlag(options::OPT_fuse_init_array, options::OPT_fno_use_init_array, false)) CC1Args.push_back("-fno-use-init-array"); + // Use `-fno-use-cxa-atexit` as default, since avr-libc does not support + // `__cxa_atexit()`. + if (!DriverArgs.hasFlag(options::OPT_fuse_cxa_atexit, + options::OPT_fno_use_cxa_atexit, false)) + CC1Args.push_back("-fno-use-cxa-atexit"); } Tool *AVRToolChain::buildLinker() const { diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index 4386e395bc6c..7aac977209eb 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -1627,7 +1627,7 @@ void RenderARMABI(const Driver &D, const llvm::Triple &Triple, } } -static void CollectARMPACBTIOptions(const Driver &D, const ArgList &Args, +static void CollectARMPACBTIOptions(const ToolChain &TC, const ArgList &Args, ArgStringList &CmdArgs, bool isAArch64) { const Arg *A = isAArch64 ? Args.getLastArg(options::OPT_msign_return_address_EQ, @@ -1636,6 +1636,12 @@ static void CollectARMPACBTIOptions(const Driver &D, const ArgList &Args, if (!A) return; + const Driver &D = TC.getDriver(); + const llvm::Triple &Triple = TC.getEffectiveTriple(); + if (!(isAArch64 || (Triple.isArmT32() && Triple.isArmMClass()))) + D.Diag(diag::warn_target_unsupported_branch_protection_option) + << Triple.getArchName(); + StringRef Scope, Key; bool IndirectBranches; @@ -1713,8 +1719,7 @@ void Clang::AddARMTargetArgs(const llvm::Triple &Triple, const ArgList &Args, AddAAPCSVolatileBitfieldArgs(Args, CmdArgs); // Enable/disable return address signing and indirect branch targets. - CollectARMPACBTIOptions(getToolChain().getDriver(), Args, CmdArgs, - false /*isAArch64*/); + CollectARMPACBTIOptions(getToolChain(), Args, CmdArgs, false /*isAArch64*/); } void Clang::RenderTargetOptions(const llvm::Triple &EffectiveTriple, @@ -1841,8 +1846,7 @@ void Clang::AddAArch64TargetArgs(const ArgList &Args, } // Enable/disable return address signing and indirect branch targets. - CollectARMPACBTIOptions(getToolChain().getDriver(), Args, CmdArgs, - true /*isAArch64*/); + CollectARMPACBTIOptions(getToolChain(), Args, CmdArgs, true /*isAArch64*/); // Handle -msve_vector_bits=<bits> if (Arg *A = Args.getLastArg(options::OPT_msve_vector_bits_EQ)) { @@ -4347,6 +4351,7 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, bool IsHIP = JA.isOffloading(Action::OFK_HIP); bool IsHIPDevice = JA.isDeviceOffloading(Action::OFK_HIP); bool IsOpenMPDevice = JA.isDeviceOffloading(Action::OFK_OpenMP); + bool IsOpenMPHost = JA.isHostOffloading(Action::OFK_OpenMP); bool IsHeaderModulePrecompile = isa<HeaderModulePrecompileJobAction>(JA); bool IsDeviceOffloadAction = !(JA.isDeviceOffloading(Action::OFK_None) || JA.isDeviceOffloading(Action::OFK_Host)); @@ -4365,6 +4370,7 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, IsHeaderModulePrecompile ? HeaderModuleInput : Inputs[0]; InputInfoList ModuleHeaderInputs; + InputInfoList OpenMPHostInputs; const InputInfo *CudaDeviceInput = nullptr; const InputInfo *OpenMPDeviceInput = nullptr; for (const InputInfo &I : Inputs) { @@ -4383,6 +4389,8 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, CudaDeviceInput = &I; } else if (IsOpenMPDevice && !OpenMPDeviceInput) { OpenMPDeviceInput = &I; + } else if (IsOpenMPHost) { + OpenMPHostInputs.push_back(I); } else { llvm_unreachable("unexpectedly given multiple inputs"); } @@ -4611,7 +4619,7 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, if (JA.getType() == types::TY_LLVM_BC) CmdArgs.push_back("-emit-llvm-uselists"); - if (IsUsingLTO) { + if (IsUsingLTO && !Args.hasArg(options::OPT_fopenmp_new_driver)) { // Only AMDGPU supports device-side LTO. if (IsDeviceOffloadAction && !Triple.isAMDGPU()) { D.Diag(diag::err_drv_unsupported_opt_for_target) @@ -6262,7 +6270,6 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, if (!Args.hasFlag( options::OPT_fuse_cxa_atexit, options::OPT_fno_use_cxa_atexit, !RawTriple.isOSAIX() && !RawTriple.isOSWindows() && - TC.getArch() != llvm::Triple::xcore && ((RawTriple.getVendor() != llvm::Triple::MipsTechnologies) || RawTriple.hasEnvironment())) || KernelOrKext) @@ -6890,6 +6897,25 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, } } + // Host-side OpenMP offloading recieves the device object files and embeds it + // in a named section including the associated target triple and architecture. + if (IsOpenMPHost && !OpenMPHostInputs.empty()) { + auto InputFile = OpenMPHostInputs.begin(); + auto OpenMPTCs = C.getOffloadToolChains<Action::OFK_OpenMP>(); + for (auto TI = OpenMPTCs.first, TE = OpenMPTCs.second; TI != TE; + ++TI, ++InputFile) { + const ToolChain *TC = TI->second; + const ArgList &TCArgs = C.getArgsForToolChain(TC, "", Action::OFK_OpenMP); + StringRef File = + C.getArgs().MakeArgString(TC->getInputFilename(*InputFile)); + StringRef InputName = Clang::getBaseInputStem(Args, Inputs); + + CmdArgs.push_back(Args.MakeArgString( + "-fembed-offload-object=" + File + "," + TC->getTripleString() + "." + + TCArgs.getLastArgValue(options::OPT_march_EQ) + "." + InputName)); + } + } + if (Triple.isAMDGPU()) { handleAMDGPUCodeObjectVersionOptions(D, Args, CmdArgs); @@ -8116,3 +8142,122 @@ void OffloadWrapper::ConstructJob(Compilation &C, const JobAction &JA, Args.MakeArgString(getToolChain().GetProgramPath(getShortName())), CmdArgs, Inputs, Output)); } + +void LinkerWrapper::ConstructJob(Compilation &C, const JobAction &JA, + const InputInfo &Output, + const InputInfoList &Inputs, + const ArgList &Args, + const char *LinkingOutput) const { + ArgStringList CmdArgs; + + if (getToolChain().getDriver().isUsingLTO(/* IsOffload */ true)) { + // Pass in target features for each toolchain. + auto OpenMPTCRange = C.getOffloadToolChains<Action::OFK_OpenMP>(); + for (auto &I : + llvm::make_range(OpenMPTCRange.first, OpenMPTCRange.second)) { + const ToolChain *TC = I.second; + const ArgList &TCArgs = C.getArgsForToolChain(TC, "", Action::OFK_OpenMP); + ArgStringList FeatureArgs; + TC->addClangTargetOptions(TCArgs, FeatureArgs, Action::OFK_OpenMP); + auto FeatureIt = llvm::find(FeatureArgs, "-target-feature"); + CmdArgs.push_back(Args.MakeArgString( + "-target-feature=" + TC->getTripleString() + "=" + *(FeatureIt + 1))); + } + + // Pass in the bitcode library to be linked during LTO. + for (auto &I : llvm::make_range(OpenMPTCRange.first, OpenMPTCRange.second)) { + const ToolChain *TC = I.second; + const Driver &D = TC->getDriver(); + const ArgList &TCArgs = C.getArgsForToolChain(TC, "", Action::OFK_OpenMP); + StringRef Arch = TCArgs.getLastArgValue(options::OPT_march_EQ); + + std::string BitcodeSuffix; + if (TCArgs.hasFlag(options::OPT_fopenmp_target_new_runtime, + options::OPT_fno_openmp_target_new_runtime, true)) + BitcodeSuffix += "new-"; + if (TC->getTriple().isNVPTX()) + BitcodeSuffix += "nvptx-"; + else if (TC->getTriple().isAMDGPU()) + BitcodeSuffix += "amdgpu-"; + BitcodeSuffix += Arch; + + ArgStringList BitcodeLibrary; + addOpenMPDeviceRTL(D, TCArgs, BitcodeLibrary, BitcodeSuffix, + TC->getTriple()); + + if (!BitcodeLibrary.empty()) + CmdArgs.push_back( + Args.MakeArgString("-target-library=" + TC->getTripleString() + + "-" + Arch + "=" + BitcodeLibrary.back())); + } + + // Pass in the optimization level to use for LTO. + if (const Arg *A = Args.getLastArg(options::OPT_O_Group)) { + StringRef OOpt; + if (A->getOption().matches(options::OPT_O4) || + A->getOption().matches(options::OPT_Ofast)) + OOpt = "3"; + else if (A->getOption().matches(options::OPT_O)) { + OOpt = A->getValue(); + if (OOpt == "g") + OOpt = "1"; + else if (OOpt == "s" || OOpt == "z") + OOpt = "2"; + } else if (A->getOption().matches(options::OPT_O0)) + OOpt = "0"; + if (!OOpt.empty()) + CmdArgs.push_back(Args.MakeArgString(Twine("-opt-level=O") + OOpt)); + } + } + + // Construct the link job so we can wrap around it. + Linker->ConstructJob(C, JA, Output, Inputs, Args, LinkingOutput); + const auto &LinkCommand = C.getJobs().getJobs().back(); + + CmdArgs.push_back("-host-triple"); + CmdArgs.push_back(Args.MakeArgString(getToolChain().getTripleString())); + if (Args.hasArg(options::OPT_v)) + CmdArgs.push_back("-v"); + + // Add debug information if present. + if (const Arg *A = Args.getLastArg(options::OPT_g_Group)) { + const Option &Opt = A->getOption(); + if (Opt.matches(options::OPT_gN_Group)) { + if (Opt.matches(options::OPT_gline_directives_only) || + Opt.matches(options::OPT_gline_tables_only)) + CmdArgs.push_back("-gline-directives-only"); + } else + CmdArgs.push_back("-g"); + } + + for (const auto &A : Args.getAllArgValues(options::OPT_Xcuda_ptxas)) + CmdArgs.push_back(Args.MakeArgString("-ptxas-args=" + A)); + + // Forward remarks passes to the LLVM backend in the wrapper. + if (const Arg *A = Args.getLastArg(options::OPT_Rpass_EQ)) + CmdArgs.push_back( + Args.MakeArgString(Twine("-pass-remarks=") + A->getValue())); + if (const Arg *A = Args.getLastArg(options::OPT_Rpass_missed_EQ)) + CmdArgs.push_back( + Args.MakeArgString(Twine("-pass-remarks-missed=") + A->getValue())); + if (const Arg *A = Args.getLastArg(options::OPT_Rpass_analysis_EQ)) + CmdArgs.push_back( + Args.MakeArgString(Twine("-pass-remarks-analysis=") + A->getValue())); + if (Args.getLastArg(options::OPT_save_temps_EQ)) + CmdArgs.push_back("-save-temps"); + + // Add the linker arguments to be forwarded by the wrapper. + CmdArgs.push_back("-linker-path"); + CmdArgs.push_back(LinkCommand->getExecutable()); + CmdArgs.push_back("--"); + for (const char *LinkArg : LinkCommand->getArguments()) + CmdArgs.push_back(LinkArg); + + const char *Exec = + Args.MakeArgString(getToolChain().GetProgramPath("clang-linker-wrapper")); + + // Replace the executable and arguments of the link job with the + // wrapper. + LinkCommand->replaceExecutable(Exec); + LinkCommand->replaceArguments(CmdArgs); +} diff --git a/clang/lib/Driver/ToolChains/Clang.h b/clang/lib/Driver/ToolChains/Clang.h index 013cd2341e17..79407c9884d5 100644 --- a/clang/lib/Driver/ToolChains/Clang.h +++ b/clang/lib/Driver/ToolChains/Clang.h @@ -170,6 +170,21 @@ public: const char *LinkingOutput) const override; }; +/// Linker wrapper tool. +class LLVM_LIBRARY_VISIBILITY LinkerWrapper final : public Tool { + const Tool *Linker; + +public: + LinkerWrapper(const ToolChain &TC, const Tool *Linker) + : Tool("Offload::Linker", "linker", TC), Linker(Linker) {} + + bool hasIntegratedCPP() const override { return false; } + void ConstructJob(Compilation &C, const JobAction &JA, + const InputInfo &Output, const InputInfoList &Inputs, + const llvm::opt::ArgList &TCArgs, + const char *LinkingOutput) const override; +}; + } // end namespace tools } // end namespace driver diff --git a/clang/lib/Driver/ToolChains/CommonArgs.cpp b/clang/lib/Driver/ToolChains/CommonArgs.cpp index 1d30090ca21c..6364cd133e0b 100644 --- a/clang/lib/Driver/ToolChains/CommonArgs.cpp +++ b/clang/lib/Driver/ToolChains/CommonArgs.cpp @@ -645,6 +645,22 @@ void tools::addLTOOptions(const ToolChain &ToolChain, const ArgList &Args, /*IsLTO=*/true); } +void tools::addOpenMPRuntimeSpecificRPath(const ToolChain &TC, + const ArgList &Args, + ArgStringList &CmdArgs) { + + if (Args.hasFlag(options::OPT_fopenmp_implicit_rpath, + options::OPT_fno_openmp_implicit_rpath, true)) { + // Default to clang lib / lib64 folder, i.e. the same location as device + // runtime + SmallString<256> DefaultLibPath = + llvm::sys::path::parent_path(TC.getDriver().Dir); + llvm::sys::path::append(DefaultLibPath, Twine("lib") + CLANG_LIBDIR_SUFFIX); + CmdArgs.push_back("-rpath"); + CmdArgs.push_back(Args.MakeArgString(DefaultLibPath)); + } +} + void tools::addArchSpecificRPath(const ToolChain &TC, const ArgList &Args, ArgStringList &CmdArgs) { // Enable -frtlib-add-rpath by default for the case of VE. @@ -702,6 +718,9 @@ bool tools::addOpenMPRuntime(ArgStringList &CmdArgs, const ToolChain &TC, addArchSpecificRPath(TC, Args, CmdArgs); + if (RTKind == Driver::OMPRT_OMP) + addOpenMPRuntimeSpecificRPath(TC, Args, CmdArgs); + return true; } @@ -826,16 +845,16 @@ collectSanitizerRuntimes(const ToolChain &TC, const ArgList &Args, if (SanArgs.needsStatsRt() && SanArgs.linkRuntimes()) StaticRuntimes.push_back("stats_client"); + // Always link the static runtime regardless of DSO or executable. + if (SanArgs.needsAsanRt()) + HelperStaticRuntimes.push_back("asan_static"); + // Collect static runtimes. if (Args.hasArg(options::OPT_shared)) { // Don't link static runtimes into DSOs. return; } - // Always link the static runtime for executable. - if (SanArgs.needsAsanRt()) - HelperStaticRuntimes.push_back("asan_static"); - // Each static runtime that has a DSO counterpart above is excluded below, // but runtimes that exist only as static are not affected by needsSharedRt. diff --git a/clang/lib/Driver/ToolChains/CommonArgs.h b/clang/lib/Driver/ToolChains/CommonArgs.h index 00291a3681c8..646fa76949b7 100644 --- a/clang/lib/Driver/ToolChains/CommonArgs.h +++ b/clang/lib/Driver/ToolChains/CommonArgs.h @@ -106,6 +106,9 @@ void AddAssemblerKPIC(const ToolChain &ToolChain, const llvm::opt::ArgList &Args, llvm::opt::ArgStringList &CmdArgs); +void addOpenMPRuntimeSpecificRPath(const ToolChain &TC, + const llvm::opt::ArgList &Args, + llvm::opt::ArgStringList &CmdArgs); void addArchSpecificRPath(const ToolChain &TC, const llvm::opt::ArgList &Args, llvm::opt::ArgStringList &CmdArgs); /// Returns true, if an OpenMP runtime has been added. diff --git a/clang/lib/Driver/ToolChains/Cuda.cpp b/clang/lib/Driver/ToolChains/Cuda.cpp index 7324339efaa6..4a9f6d4c4e3e 100644 --- a/clang/lib/Driver/ToolChains/Cuda.cpp +++ b/clang/lib/Driver/ToolChains/Cuda.cpp @@ -744,6 +744,10 @@ void CudaToolChain::addClangTargetOptions( return; } + // Link the bitcode library late if we're using device LTO. + if (getDriver().isUsingLTO(/* IsOffload */ true)) + return; + std::string BitcodeSuffix; if (DriverArgs.hasFlag(options::OPT_fopenmp_target_new_runtime, options::OPT_fno_openmp_target_new_runtime, true)) diff --git a/clang/lib/Driver/ToolChains/XCore.cpp b/clang/lib/Driver/ToolChains/XCore.cpp index 7e74f6374050..29fa82aec0a9 100644 --- a/clang/lib/Driver/ToolChains/XCore.cpp +++ b/clang/lib/Driver/ToolChains/XCore.cpp @@ -130,6 +130,10 @@ void XCoreToolChain::addClangTargetOptions(const ArgList &DriverArgs, ArgStringList &CC1Args, Action::OffloadKind) const { CC1Args.push_back("-nostdsysteminc"); + // Set `-fno-use-cxa-atexit` to default. + if (!DriverArgs.hasFlag(options::OPT_fuse_cxa_atexit, + options::OPT_fno_use_cxa_atexit, false)) + CC1Args.push_back("-fno-use-cxa-atexit"); } void XCoreToolChain::AddClangCXXStdlibIncludeArgs( diff --git a/clang/lib/Format/BreakableToken.cpp b/clang/lib/Format/BreakableToken.cpp index 5d03c9811e1b..f68d802c1f95 100644 --- a/clang/lib/Format/BreakableToken.cpp +++ b/clang/lib/Format/BreakableToken.cpp @@ -254,8 +254,8 @@ unsigned BreakableStringLiteral::getRemainingLength(unsigned LineIndex, unsigned Offset, unsigned StartColumn) const { return UnbreakableTailLength + Postfix.size() + - encoding::columnWidthWithTabs(Line.substr(Offset, StringRef::npos), - StartColumn, Style.TabWidth, Encoding); + encoding::columnWidthWithTabs(Line.substr(Offset), StartColumn, + Style.TabWidth, Encoding); } unsigned BreakableStringLiteral::getContentStartColumn(unsigned LineIndex, @@ -539,31 +539,30 @@ unsigned BreakableBlockComment::getRangeLength(unsigned LineIndex, unsigned Offset, StringRef::size_type Length, unsigned StartColumn) const { + return encoding::columnWidthWithTabs( + Content[LineIndex].substr(Offset, Length), StartColumn, Style.TabWidth, + Encoding); +} + +unsigned BreakableBlockComment::getRemainingLength(unsigned LineIndex, + unsigned Offset, + unsigned StartColumn) const { unsigned LineLength = - encoding::columnWidthWithTabs(Content[LineIndex].substr(Offset, Length), - StartColumn, Style.TabWidth, Encoding); - // FIXME: This should go into getRemainingLength instead, but we currently - // break tests when putting it there. Investigate how to fix those tests. - // The last line gets a "*/" postfix. + UnbreakableTailLength + + getRangeLength(LineIndex, Offset, StringRef::npos, StartColumn); if (LineIndex + 1 == Lines.size()) { LineLength += 2; // We never need a decoration when breaking just the trailing "*/" postfix. - // Note that checking that Length == 0 is not enough, since Length could - // also be StringRef::npos. - if (Content[LineIndex].substr(Offset, StringRef::npos).empty()) { - LineLength -= Decoration.size(); + bool HasRemainingText = Offset < Content[LineIndex].size(); + if (!HasRemainingText) { + bool HasDecoration = Lines[LineIndex].ltrim().startswith(Decoration); + if (HasDecoration) + LineLength -= Decoration.size(); } } return LineLength; } -unsigned BreakableBlockComment::getRemainingLength(unsigned LineIndex, - unsigned Offset, - unsigned StartColumn) const { - return UnbreakableTailLength + - getRangeLength(LineIndex, Offset, StringRef::npos, StartColumn); -} - unsigned BreakableBlockComment::getContentStartColumn(unsigned LineIndex, bool Break) const { if (Break) diff --git a/clang/lib/Format/ContinuationIndenter.cpp b/clang/lib/Format/ContinuationIndenter.cpp index b66584652bc8..45a4d23557f7 100644 --- a/clang/lib/Format/ContinuationIndenter.cpp +++ b/clang/lib/Format/ContinuationIndenter.cpp @@ -1817,8 +1817,8 @@ unsigned ContinuationIndenter::reformatRawStringLiteral( ContentStartsOnNewline || (NewCode->find('\n') != std::string::npos); if (IsMultiline) { // Break before further function parameters on all levels. - for (unsigned i = 0, e = State.Stack.size(); i != e; ++i) - State.Stack[i].BreakBeforeParameter = true; + for (ParenState &Paren : State.Stack) + Paren.BreakBeforeParameter = true; } return Fixes.second + PrefixExcessCharacters * Style.PenaltyExcessCharacter; } @@ -1826,8 +1826,8 @@ unsigned ContinuationIndenter::reformatRawStringLiteral( unsigned ContinuationIndenter::addMultilineToken(const FormatToken &Current, LineState &State) { // Break before further function parameters on all levels. - for (unsigned i = 0, e = State.Stack.size(); i != e; ++i) - State.Stack[i].BreakBeforeParameter = true; + for (ParenState &Paren : State.Stack) + Paren.BreakBeforeParameter = true; unsigned ColumnsUsed = State.Column; // We can only affect layout of the first and the last line, so the penalty @@ -2380,8 +2380,8 @@ ContinuationIndenter::breakProtrudingToken(const FormatToken &Current, // the next parameter on all levels, so that the next parameter is clearly // visible. Line comments already introduce a break. if (Current.isNot(TT_LineComment)) { - for (unsigned i = 0, e = State.Stack.size(); i != e; ++i) - State.Stack[i].BreakBeforeParameter = true; + for (ParenState &Paren : State.Stack) + Paren.BreakBeforeParameter = true; } if (Current.is(TT_BlockComment)) diff --git a/clang/lib/Format/Format.cpp b/clang/lib/Format/Format.cpp index 04e2915e3af6..dd4755c2227e 100644 --- a/clang/lib/Format/Format.cpp +++ b/clang/lib/Format/Format.cpp @@ -44,6 +44,7 @@ #include <algorithm> #include <memory> #include <mutex> +#include <numeric> #include <string> #include <unordered_map> @@ -532,11 +533,9 @@ template <> struct MappingTraits<FormatStyle> { IO.mapOptional("Language", Style.Language); if (IO.outputting()) { - StringRef StylesArray[] = {"LLVM", "Google", "Chromium", "Mozilla", - "WebKit", "GNU", "Microsoft"}; - ArrayRef<StringRef> Styles(StylesArray); - for (size_t i = 0, e = Styles.size(); i < e; ++i) { - StringRef StyleName(Styles[i]); + StringRef Styles[] = {"LLVM", "Google", "Chromium", "Mozilla", + "WebKit", "GNU", "Microsoft"}; + for (StringRef StyleName : Styles) { FormatStyle PredefinedStyle; if (getPredefinedStyle(StyleName, Style.Language, &PredefinedStyle) && Style == PredefinedStyle) { @@ -1681,10 +1680,10 @@ std::error_code parseConfiguration(llvm::MemoryBufferRef Config, // configuration (which can only be at slot 0) after it. FormatStyle::FormatStyleSet StyleSet; bool LanguageFound = false; - for (int i = Styles.size() - 1; i >= 0; --i) { - if (Styles[i].Language != FormatStyle::LK_None) - StyleSet.Add(Styles[i]); - if (Styles[i].Language == Language) + for (const FormatStyle &Style : llvm::reverse(Styles)) { + if (Style.Language != FormatStyle::LK_None) + StyleSet.Add(Style); + if (Style.Language == Language) LanguageFound = true; } if (!LanguageFound) { @@ -1890,9 +1889,8 @@ public: tooling::Replacements Result; deriveLocalStyle(AnnotatedLines); AffectedRangeMgr.computeAffectedLines(AnnotatedLines); - for (unsigned i = 0, e = AnnotatedLines.size(); i != e; ++i) { - Annotator.calculateFormattingInformation(*AnnotatedLines[i]); - } + for (AnnotatedLine *Line : AnnotatedLines) + Annotator.calculateFormattingInformation(*Line); Annotator.setCommentLineLevels(AnnotatedLines); WhitespaceManager Whitespaces( @@ -1962,10 +1960,10 @@ private: deriveLocalStyle(const SmallVectorImpl<AnnotatedLine *> &AnnotatedLines) { bool HasBinPackedFunction = false; bool HasOnePerLineFunction = false; - for (unsigned i = 0, e = AnnotatedLines.size(); i != e; ++i) { - if (!AnnotatedLines[i]->First->Next) + for (AnnotatedLine *Line : AnnotatedLines) { + if (!Line->First->Next) continue; - FormatToken *Tok = AnnotatedLines[i]->First->Next; + FormatToken *Tok = Line->First->Next; while (Tok->Next) { if (Tok->is(PPK_BinPacked)) HasBinPackedFunction = true; @@ -2524,9 +2522,8 @@ static void sortCppIncludes(const FormatStyle &Style, if (!affectsRange(Ranges, IncludesBeginOffset, IncludesEndOffset)) return; SmallVector<unsigned, 16> Indices; - for (unsigned i = 0, e = Includes.size(); i != e; ++i) { - Indices.push_back(i); - } + Indices.resize(Includes.size()); + std::iota(Indices.begin(), Indices.end(), 0); if (Style.SortIncludes == FormatStyle::SI_CaseInsensitive) { llvm::stable_sort(Indices, [&](unsigned LHSI, unsigned RHSI) { @@ -2678,6 +2675,15 @@ tooling::Replacements sortCppIncludes(const FormatStyle &Style, StringRef Code, if (!FormattingOff && !MergeWithNextLine) { if (IncludeRegex.match(Line, &Matches)) { StringRef IncludeName = Matches[2]; + if (Line.contains("/*") && !Line.contains("*/")) { + // #include with a start of a block comment, but without the end. + // Need to keep all the lines until the end of the comment together. + // FIXME: This is somehow simplified check that probably does not work + // correctly if there are multiple comments on a line. + Pos = Code.find("*/", SearchFrom); + Line = Code.substr( + Prev, (Pos != StringRef::npos ? Pos + 2 : Code.size()) - Prev); + } int Category = Categories.getIncludePriority( IncludeName, /*CheckMainHeader=*/!MainIncludeFound && FirstIncludeBlock); @@ -2718,7 +2724,7 @@ static unsigned findJavaImportGroup(const FormatStyle &Style, unsigned LongestMatchIndex = UINT_MAX; unsigned LongestMatchLength = 0; for (unsigned I = 0; I < Style.JavaImportGroups.size(); I++) { - std::string GroupPrefix = Style.JavaImportGroups[I]; + const std::string &GroupPrefix = Style.JavaImportGroups[I]; if (ImportIdentifier.startswith(GroupPrefix) && GroupPrefix.length() > LongestMatchLength) { LongestMatchIndex = I; @@ -2743,13 +2749,16 @@ static void sortJavaImports(const FormatStyle &Style, unsigned ImportsBlockSize = ImportsEndOffset - ImportsBeginOffset; if (!affectsRange(Ranges, ImportsBeginOffset, ImportsEndOffset)) return; + SmallVector<unsigned, 16> Indices; + Indices.resize(Imports.size()); + std::iota(Indices.begin(), Indices.end(), 0); + SmallVector<unsigned, 16> JavaImportGroups; - for (unsigned i = 0, e = Imports.size(); i != e; ++i) { - Indices.push_back(i); - JavaImportGroups.push_back( - findJavaImportGroup(Style, Imports[i].Identifier)); - } + JavaImportGroups.reserve(Imports.size()); + for (const JavaImportDirective &Import : Imports) + JavaImportGroups.push_back(findJavaImportGroup(Style, Import.Identifier)); + bool StaticImportAfterNormalImport = Style.SortJavaStaticImport == FormatStyle::SJSIO_After; llvm::sort(Indices, [&](unsigned LHSI, unsigned RHSI) { diff --git a/clang/lib/Format/FormatToken.h b/clang/lib/Format/FormatToken.h index a64329802ee3..f116a89ac644 100644 --- a/clang/lib/Format/FormatToken.h +++ b/clang/lib/Format/FormatToken.h @@ -123,6 +123,34 @@ namespace format { TYPE(CSharpGenericTypeConstraintComma) \ TYPE(Unknown) +/// Sorted operators that can follow a C variable. +static const std::vector<clang::tok::TokenKind> COperatorsFollowingVar = [] { + std::vector<clang::tok::TokenKind> ReturnVal = { + tok::l_square, tok::r_square, + tok::l_paren, tok::r_paren, + tok::r_brace, tok::period, + tok::ellipsis, tok::ampamp, + tok::ampequal, tok::star, + tok::starequal, tok::plus, + tok::plusplus, tok::plusequal, + tok::minus, tok::arrow, + tok::minusminus, tok::minusequal, + tok::exclaim, tok::exclaimequal, + tok::slash, tok::slashequal, + tok::percent, tok::percentequal, + tok::less, tok::lessless, + tok::lessequal, tok::lesslessequal, + tok::greater, tok::greatergreater, + tok::greaterequal, tok::greatergreaterequal, + tok::caret, tok::caretequal, + tok::pipe, tok::pipepipe, + tok::pipeequal, tok::question, + tok::semi, tok::equal, + tok::equalequal, tok::comma}; + assert(std::is_sorted(ReturnVal.begin(), ReturnVal.end())); + return ReturnVal; +}(); + /// Determines the semantic type of a syntactic token, e.g. whether "<" is a /// template opener or binary operator. enum TokenType : uint8_t { diff --git a/clang/lib/Format/NamespaceEndCommentsFixer.cpp b/clang/lib/Format/NamespaceEndCommentsFixer.cpp index 0c34c6126c21..9fb6c5142672 100644 --- a/clang/lib/Format/NamespaceEndCommentsFixer.cpp +++ b/clang/lib/Format/NamespaceEndCommentsFixer.cpp @@ -210,8 +210,8 @@ std::pair<tooling::Replacements, unsigned> NamespaceEndCommentsFixer::analyze( // Spin through the lines and ensure we have balanced braces. int Braces = 0; - for (size_t I = 0, E = AnnotatedLines.size(); I != E; ++I) { - FormatToken *Tok = AnnotatedLines[I]->First; + for (AnnotatedLine *Line : AnnotatedLines) { + FormatToken *Tok = Line->First; while (Tok) { Braces += Tok->is(tok::l_brace) ? 1 : Tok->is(tok::r_brace) ? -1 : 0; Tok = Tok->Next; diff --git a/clang/lib/Format/SortJavaScriptImports.cpp b/clang/lib/Format/SortJavaScriptImports.cpp index e4107525a7ff..71326163f45a 100644 --- a/clang/lib/Format/SortJavaScriptImports.cpp +++ b/clang/lib/Format/SortJavaScriptImports.cpp @@ -133,7 +133,10 @@ class JavaScriptImportSorter : public TokenAnalyzer { public: JavaScriptImportSorter(const Environment &Env, const FormatStyle &Style) : TokenAnalyzer(Env, Style), - FileContents(Env.getSourceManager().getBufferData(Env.getFileID())) {} + FileContents(Env.getSourceManager().getBufferData(Env.getFileID())) { + // FormatToken.Tok starts out in an uninitialized state. + invalidToken.Tok.startToken(); + } std::pair<tooling::Replacements, unsigned> analyze(TokenAnnotator &Annotator, @@ -232,7 +235,6 @@ private: if (!Current || Current == LineEnd->Next) { // Set the current token to an invalid token, so that further parsing on // this line fails. - invalidToken.Tok.setKind(tok::unknown); Current = &invalidToken; } } @@ -510,7 +512,6 @@ private: while (Current->is(tok::identifier)) { nextToken(); if (Current->is(tok::semi)) { - nextToken(); return true; } if (!Current->is(tok::period)) diff --git a/clang/lib/Format/TokenAnalyzer.cpp b/clang/lib/Format/TokenAnalyzer.cpp index d0754e0c1112..2bd5a1fd6230 100644 --- a/clang/lib/Format/TokenAnalyzer.cpp +++ b/clang/lib/Format/TokenAnalyzer.cpp @@ -113,12 +113,13 @@ std::pair<tooling::Replacements, unsigned> TokenAnalyzer::process() { assert(UnwrappedLines.rbegin()->empty()); unsigned Penalty = 0; for (unsigned Run = 0, RunE = UnwrappedLines.size(); Run + 1 != RunE; ++Run) { + const auto &Lines = UnwrappedLines[Run]; LLVM_DEBUG(llvm::dbgs() << "Run " << Run << "...\n"); SmallVector<AnnotatedLine *, 16> AnnotatedLines; TokenAnnotator Annotator(Style, Lex.getKeywords()); - for (unsigned i = 0, e = UnwrappedLines[Run].size(); i != e; ++i) { - AnnotatedLines.push_back(new AnnotatedLine(UnwrappedLines[Run][i])); + for (const UnwrappedLine &Line : Lines) { + AnnotatedLines.push_back(new AnnotatedLine(Line)); Annotator.annotate(*AnnotatedLines.back()); } @@ -130,9 +131,8 @@ std::pair<tooling::Replacements, unsigned> TokenAnalyzer::process() { for (const tooling::Replacement &Fix : RunResult.first) llvm::dbgs() << Fix.toString() << "\n"; }); - for (unsigned i = 0, e = AnnotatedLines.size(); i != e; ++i) { - delete AnnotatedLines[i]; - } + for (AnnotatedLine *Line : AnnotatedLines) + delete Line; Penalty += RunResult.second; for (const auto &R : RunResult.first) { diff --git a/clang/lib/Format/TokenAnnotator.h b/clang/lib/Format/TokenAnnotator.h index ecd9dbb0f864..96e03967ff60 100644 --- a/clang/lib/Format/TokenAnnotator.h +++ b/clang/lib/Format/TokenAnnotator.h @@ -66,9 +66,8 @@ public: } ~AnnotatedLine() { - for (unsigned i = 0, e = Children.size(); i != e; ++i) { - delete Children[i]; - } + for (AnnotatedLine *Child : Children) + delete Child; FormatToken *Current = First; while (Current) { Current->Children.clear(); diff --git a/clang/lib/Format/UnwrappedLineFormatter.cpp b/clang/lib/Format/UnwrappedLineFormatter.cpp index 0172a224335c..01c151fec132 100644 --- a/clang/lib/Format/UnwrappedLineFormatter.cpp +++ b/clang/lib/Format/UnwrappedLineFormatter.cpp @@ -100,10 +100,27 @@ private: if (Style.Language == FormatStyle::LK_Java || Style.isJavaScript() || Style.isCSharp()) return 0; - if (RootToken.isAccessSpecifier(false) || - RootToken.isObjCAccessSpecifier() || - (RootToken.isOneOf(Keywords.kw_signals, Keywords.kw_qsignals) && - RootToken.Next && RootToken.Next->is(tok::colon))) { + + auto IsAccessModifier = [this, &RootToken]() { + if (RootToken.isAccessSpecifier(Style.isCpp())) + return true; + else if (RootToken.isObjCAccessSpecifier()) + return true; + // Handle Qt signals. + else if ((RootToken.isOneOf(Keywords.kw_signals, Keywords.kw_qsignals) && + RootToken.Next && RootToken.Next->is(tok::colon))) + return true; + else if (RootToken.Next && + RootToken.Next->isOneOf(Keywords.kw_slots, Keywords.kw_qslots) && + RootToken.Next->Next && RootToken.Next->Next->is(tok::colon)) + return true; + // Handle malformed access specifier e.g. 'private' without trailing ':'. + else if (!RootToken.Next && RootToken.isAccessSpecifier(false)) + return true; + return false; + }; + + if (IsAccessModifier()) { // The AccessModifierOffset may be overridden by IndentAccessModifiers, // in which case we take a negative value of the IndentWidth to simulate // the upper indent level. diff --git a/clang/lib/Format/UnwrappedLineParser.cpp b/clang/lib/Format/UnwrappedLineParser.cpp index 35be2fa3eb62..642679128409 100644 --- a/clang/lib/Format/UnwrappedLineParser.cpp +++ b/clang/lib/Format/UnwrappedLineParser.cpp @@ -687,9 +687,9 @@ void UnwrappedLineParser::calculateBraceTypes(bool ExpectClassBody) { } while (Tok->Tok.isNot(tok::eof) && !LBraceStack.empty()); // Assume other blocks for all unclosed opening braces. - for (unsigned i = 0, e = LBraceStack.size(); i != e; ++i) { - if (LBraceStack[i]->is(BK_Unknown)) - LBraceStack[i]->setBlockKind(BK_Block); + for (FormatToken *LBrace : LBraceStack) { + if (LBrace->is(BK_Unknown)) + LBrace->setBlockKind(BK_Block); } FormatTok = Tokens->setPosition(StoredPosition); @@ -2708,14 +2708,25 @@ void UnwrappedLineParser::parseSwitch() { } void UnwrappedLineParser::parseAccessSpecifier() { + FormatToken *AccessSpecifierCandidate = FormatTok; nextToken(); // Understand Qt's slots. if (FormatTok->isOneOf(Keywords.kw_slots, Keywords.kw_qslots)) nextToken(); // Otherwise, we don't know what it is, and we'd better keep the next token. - if (FormatTok->Tok.is(tok::colon)) + if (FormatTok->Tok.is(tok::colon)) { nextToken(); - addUnwrappedLine(); + addUnwrappedLine(); + } else if (!FormatTok->Tok.is(tok::coloncolon) && + !std::binary_search(COperatorsFollowingVar.begin(), + COperatorsFollowingVar.end(), + FormatTok->Tok.getKind())) { + // Not a variable name nor namespace name. + addUnwrappedLine(); + } else if (AccessSpecifierCandidate) { + // Consider the access specifier to be a C identifier. + AccessSpecifierCandidate->Tok.setKind(tok::identifier); + } } void UnwrappedLineParser::parseConcept() { diff --git a/clang/lib/Format/UsingDeclarationsSorter.cpp b/clang/lib/Format/UsingDeclarationsSorter.cpp index 5608a5a75953..bf5307260c0b 100644 --- a/clang/lib/Format/UsingDeclarationsSorter.cpp +++ b/clang/lib/Format/UsingDeclarationsSorter.cpp @@ -188,10 +188,10 @@ std::pair<tooling::Replacements, unsigned> UsingDeclarationsSorter::analyze( AffectedRangeMgr.computeAffectedLines(AnnotatedLines); tooling::Replacements Fixes; SmallVector<UsingDeclaration, 4> UsingDeclarations; - for (size_t I = 0, E = AnnotatedLines.size(); I != E; ++I) { - const auto *FirstTok = AnnotatedLines[I]->First; - if (AnnotatedLines[I]->InPPDirective || - !AnnotatedLines[I]->startsWith(tok::kw_using) || FirstTok->Finalized) { + for (const AnnotatedLine *Line : AnnotatedLines) { + const auto *FirstTok = Line->First; + if (Line->InPPDirective || !Line->startsWith(tok::kw_using) || + FirstTok->Finalized) { endUsingDeclarationBlock(&UsingDeclarations, SourceMgr, &Fixes); continue; } @@ -204,7 +204,7 @@ std::pair<tooling::Replacements, unsigned> UsingDeclarationsSorter::analyze( endUsingDeclarationBlock(&UsingDeclarations, SourceMgr, &Fixes); continue; } - UsingDeclarations.push_back(UsingDeclaration(AnnotatedLines[I], Label)); + UsingDeclarations.push_back(UsingDeclaration(Line, Label)); } endUsingDeclarationBlock(&UsingDeclarations, SourceMgr, &Fixes); return {Fixes, 0}; diff --git a/clang/lib/Format/WhitespaceManager.cpp b/clang/lib/Format/WhitespaceManager.cpp index 0d2e507ac587..4c130abd83c3 100644 --- a/clang/lib/Format/WhitespaceManager.cpp +++ b/clang/lib/Format/WhitespaceManager.cpp @@ -344,6 +344,10 @@ AlignTokenSequence(const FormatStyle &Style, unsigned Start, unsigned End, if (Changes[ScopeStart - 1].Tok->is(TT_FunctionDeclarationName)) return true; + // Lambda. + if (Changes[ScopeStart - 1].Tok->is(TT_LambdaLBrace)) + return false; + // Continued function declaration if (ScopeStart > Start + 1 && Changes[ScopeStart - 2].Tok->is(TT_FunctionDeclarationName)) @@ -352,8 +356,13 @@ AlignTokenSequence(const FormatStyle &Style, unsigned Start, unsigned End, // Continued function call if (ScopeStart > Start + 1 && Changes[ScopeStart - 2].Tok->is(tok::identifier) && - Changes[ScopeStart - 1].Tok->is(tok::l_paren)) + Changes[ScopeStart - 1].Tok->is(tok::l_paren) && + Changes[ScopeStart].Tok->isNot(TT_LambdaLSquare)) { + if (Changes[i].Tok->MatchingParen && + Changes[i].Tok->MatchingParen->is(TT_LambdaLBrace)) + return false; return Style.BinPackArguments; + } // Ternary operator if (Changes[i].Tok->is(TT_ConditionalExpr)) @@ -372,8 +381,15 @@ AlignTokenSequence(const FormatStyle &Style, unsigned Start, unsigned End, if (ScopeStart > Start + 1 && Changes[ScopeStart - 2].Tok->isNot(tok::identifier) && Changes[ScopeStart - 1].Tok->is(tok::l_brace) && - Changes[i].Tok->isNot(tok::r_brace)) + Changes[i].Tok->isNot(tok::r_brace)) { + for (unsigned OuterScopeStart : llvm::reverse(ScopeStack)) { + // Lambda. + if (OuterScopeStart > Start && + Changes[OuterScopeStart - 1].Tok->is(TT_LambdaLBrace)) + return false; + } return true; + } return false; }; @@ -1014,7 +1030,7 @@ void WhitespaceManager::alignArrayInitializersRightJustified( // Now go through and fixup the spaces. auto *CellIter = Cells.begin(); - for (auto i = 0U; i < CellDescs.CellCount; i++, ++CellIter) { + for (auto i = 0U; i < CellDescs.CellCount; ++i, ++CellIter) { unsigned NetWidth = 0U; if (isSplitCell(*CellIter)) NetWidth = getNetWidth(Cells.begin(), CellIter, CellDescs.InitialSpaces); @@ -1331,8 +1347,13 @@ void WhitespaceManager::storeReplacement(SourceRange Range, StringRef Text) { void WhitespaceManager::appendNewlineText(std::string &Text, unsigned Newlines) { - for (unsigned i = 0; i < Newlines; ++i) - Text.append(UseCRLF ? "\r\n" : "\n"); + if (UseCRLF) { + Text.reserve(Text.size() + 2 * Newlines); + for (unsigned i = 0; i < Newlines; ++i) + Text.append("\r\n"); + } else { + Text.append(Newlines, '\n'); + } } void WhitespaceManager::appendEscapedNewlineText( diff --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp index 7f1ce3da7e7e..553a0b31c0ab 100644 --- a/clang/lib/Frontend/CompilerInvocation.cpp +++ b/clang/lib/Frontend/CompilerInvocation.cpp @@ -3560,6 +3560,8 @@ void CompilerInvocation::GenerateLangArgs(const LangOptions &Opts, GenerateArg(Args, OPT_fclang_abi_compat_EQ, "11.0", SA); else if (Opts.getClangABICompat() == LangOptions::ClangABI::Ver12) GenerateArg(Args, OPT_fclang_abi_compat_EQ, "12.0", SA); + else if (Opts.getClangABICompat() == LangOptions::ClangABI::Ver13) + GenerateArg(Args, OPT_fclang_abi_compat_EQ, "13.0", SA); if (Opts.getSignReturnAddressScope() == LangOptions::SignReturnAddressScopeKind::All) @@ -4062,6 +4064,8 @@ bool CompilerInvocation::ParseLangArgs(LangOptions &Opts, ArgList &Args, Opts.setClangABICompat(LangOptions::ClangABI::Ver11); else if (Major <= 12) Opts.setClangABICompat(LangOptions::ClangABI::Ver12); + else if (Major <= 13) + Opts.setClangABICompat(LangOptions::ClangABI::Ver13); } else if (Ver != "latest") { Diags.Report(diag::err_drv_invalid_value) << A->getAsString(Args) << A->getValue(); diff --git a/clang/lib/Frontend/InitPreprocessor.cpp b/clang/lib/Frontend/InitPreprocessor.cpp index a9023a7a1171..e259ab47c558 100644 --- a/clang/lib/Frontend/InitPreprocessor.cpp +++ b/clang/lib/Frontend/InitPreprocessor.cpp @@ -25,6 +25,7 @@ #include "clang/Serialization/ASTReader.h" #include "llvm/ADT/APFloat.h" #include "llvm/IR/DataLayout.h" +#include "llvm/IR/DerivedTypes.h" using namespace clang; static bool MacroBodyEndsInBackslash(StringRef MacroBody) { @@ -914,6 +915,13 @@ static void InitializePredefinedMacros(const TargetInfo &TI, Builder.defineMacro("__LONG_WIDTH__", Twine(TI.getLongWidth())); Builder.defineMacro("__LLONG_WIDTH__", Twine(TI.getLongLongWidth())); + size_t BitIntMaxWidth = TI.getMaxBitIntWidth(); + assert(BitIntMaxWidth <= llvm::IntegerType::MAX_INT_BITS && + "Target defined a max bit width larger than LLVM can support!"); + assert(BitIntMaxWidth >= TI.getLongLongWidth() && + "Target defined a max bit width smaller than the C standard allows!"); + Builder.defineMacro("__BITINT_MAXWIDTH__", Twine(BitIntMaxWidth)); + DefineTypeSize("__SCHAR_MAX__", TargetInfo::SignedChar, TI, Builder); DefineTypeSize("__SHRT_MAX__", TargetInfo::SignedShort, TI, Builder); DefineTypeSize("__INT_MAX__", TargetInfo::SignedInt, TI, Builder); diff --git a/clang/lib/Headers/arm_acle.h b/clang/lib/Headers/arm_acle.h index 45fac248dadb..1cfc1403276d 100644 --- a/clang/lib/Headers/arm_acle.h +++ b/clang/lib/Headers/arm_acle.h @@ -730,6 +730,12 @@ __arm_st64bv0(void *__addr, data512_t __value) { #define __arm_mte_ptrdiff(__ptra, __ptrb) __builtin_arm_subp(__ptra, __ptrb) #endif +/* Memory Operations Intrinsics */ +#if __ARM_FEATURE_MOPS && __ARM_FEATURE_MEMORY_TAGGING +#define __arm_mops_memset_tag(__tagged_address, __value, __size) \ + __builtin_arm_mops_memset_tag(__tagged_address, __value, __size) +#endif + /* Transactional Memory Extension (TME) Intrinsics */ #if __ARM_FEATURE_TME diff --git a/clang/lib/Headers/float.h b/clang/lib/Headers/float.h index ed610b24aa10..c6a6cc08462d 100644 --- a/clang/lib/Headers/float.h +++ b/clang/lib/Headers/float.h @@ -14,10 +14,11 @@ * additional definitions provided for Windows. * For more details see http://msdn.microsoft.com/en-us/library/y0ybw9fy.aspx * - * Also fall back on Darwin to allow additional definitions and + * Also fall back on Darwin and AIX to allow additional definitions and * implementation-defined values. */ -#if (defined(__APPLE__) || (defined(__MINGW32__) || defined(_MSC_VER))) && \ +#if (defined(__APPLE__) || defined(__MINGW32__) || defined(_MSC_VER) || \ + defined(_AIX)) && \ __STDC_HOSTED__ && __has_include_next(<float.h>) /* Prior to Apple's 10.7 SDK, float.h SDK header used to apply an extra level @@ -37,7 +38,9 @@ # undef FLT_MANT_DIG # undef DBL_MANT_DIG # undef LDBL_MANT_DIG -# if __STDC_VERSION__ >= 199901L || !defined(__STRICT_ANSI__) || __cplusplus >= 201103L +# if __STDC_VERSION__ >= 199901L || !defined(__STRICT_ANSI__) || \ + __cplusplus >= 201103L || \ + (__STDC_HOSTED__ && defined(_AIX) && defined(_ALL_SOURCE)) # undef DECIMAL_DIG # endif # undef FLT_DIG @@ -64,7 +67,9 @@ # undef FLT_MIN # undef DBL_MIN # undef LDBL_MIN -# if __STDC_VERSION__ >= 201112L || !defined(__STRICT_ANSI__) || __cplusplus >= 201703L +# if __STDC_VERSION__ >= 201112L || !defined(__STRICT_ANSI__) || \ + __cplusplus >= 201703L || \ + (__STDC_HOSTED__ && defined(_AIX) && defined(_ALL_SOURCE)) # undef FLT_TRUE_MIN # undef DBL_TRUE_MIN # undef LDBL_TRUE_MIN @@ -87,7 +92,9 @@ #define DBL_MANT_DIG __DBL_MANT_DIG__ #define LDBL_MANT_DIG __LDBL_MANT_DIG__ -#if __STDC_VERSION__ >= 199901L || !defined(__STRICT_ANSI__) || __cplusplus >= 201103L +#if __STDC_VERSION__ >= 199901L || !defined(__STRICT_ANSI__) || \ + __cplusplus >= 201103L || \ + (__STDC_HOSTED__ && defined(_AIX) && defined(_ALL_SOURCE)) # define DECIMAL_DIG __DECIMAL_DIG__ #endif @@ -123,7 +130,9 @@ #define DBL_MIN __DBL_MIN__ #define LDBL_MIN __LDBL_MIN__ -#if __STDC_VERSION__ >= 201112L || !defined(__STRICT_ANSI__) || __cplusplus >= 201703L +#if __STDC_VERSION__ >= 201112L || !defined(__STRICT_ANSI__) || \ + __cplusplus >= 201703L || \ + (__STDC_HOSTED__ && defined(_AIX) && defined(_ALL_SOURCE)) # define FLT_TRUE_MIN __FLT_DENORM_MIN__ # define DBL_TRUE_MIN __DBL_DENORM_MIN__ # define LDBL_TRUE_MIN __LDBL_DENORM_MIN__ diff --git a/clang/lib/Headers/limits.h b/clang/lib/Headers/limits.h index c2d3a7cf4353..cfd23a219ee5 100644 --- a/clang/lib/Headers/limits.h +++ b/clang/lib/Headers/limits.h @@ -78,6 +78,8 @@ #define LONG_WIDTH __LONG_WIDTH__ #define ULLONG_WIDTH __LLONG_WIDTH__ #define LLONG_WIDTH __LLONG_WIDTH__ + +#define BITINT_MAXWIDTH __BITINT_MAXWIDTH__ #endif #ifdef __CHAR_UNSIGNED__ /* -funsigned-char */ diff --git a/clang/lib/Headers/opencl-c-base.h b/clang/lib/Headers/opencl-c-base.h index 06b78da63e69..ad276dc0f6aa 100644 --- a/clang/lib/Headers/opencl-c-base.h +++ b/clang/lib/Headers/opencl-c-base.h @@ -72,6 +72,12 @@ #endif // defined(__SPIR__) #endif // (__OPENCL_CPP_VERSION__ == 202100 || __OPENCL_C_VERSION__ == 300) +#if !defined(__opencl_c_generic_address_space) +// Internal feature macro to provide named (global, local, private) address +// space overloads for builtin functions that take a pointer argument. +#define __opencl_c_named_address_space_builtins 1 +#endif // !defined(__opencl_c_generic_address_space) + // built-in scalar data types: /** diff --git a/clang/lib/Headers/opencl-c.h b/clang/lib/Headers/opencl-c.h index 8fde2fa29899..059a2ec2371b 100644 --- a/clang/lib/Headers/opencl-c.h +++ b/clang/lib/Headers/opencl-c.h @@ -7285,7 +7285,9 @@ half4 __ovld fract(half4 x, half4 *iptr); half8 __ovld fract(half8 x, half8 *iptr); half16 __ovld fract(half16 x, half16 *iptr); #endif //cl_khr_fp16 -#else +#endif //defined(__opencl_c_generic_address_space) + +#if defined(__opencl_c_named_address_space_builtins) float __ovld fract(float x, __global float *iptr); float2 __ovld fract(float2 x, __global float2 *iptr); float3 __ovld fract(float3 x, __global float3 *iptr); @@ -7344,7 +7346,7 @@ half4 __ovld fract(half4 x, __private half4 *iptr); half8 __ovld fract(half8 x, __private half8 *iptr); half16 __ovld fract(half16 x, __private half16 *iptr); #endif //cl_khr_fp16 -#endif //defined(__opencl_c_generic_address_space) +#endif //defined(__opencl_c_named_address_space_builtins) /** * Extract mantissa and exponent from x. For each @@ -7375,7 +7377,9 @@ half4 __ovld frexp(half4 x, int4 *exp); half8 __ovld frexp(half8 x, int8 *exp); half16 __ovld frexp(half16 x, int16 *exp); #endif //cl_khr_fp16 -#else +#endif //defined(__opencl_c_generic_address_space) + +#if defined(__opencl_c_named_address_space_builtins) float __ovld frexp(float x, __global int *exp); float2 __ovld frexp(float2 x, __global int2 *exp); float3 __ovld frexp(float3 x, __global int3 *exp); @@ -7434,7 +7438,7 @@ half4 __ovld frexp(half4 x, __private int4 *exp); half8 __ovld frexp(half8 x, __private int8 *exp); half16 __ovld frexp(half16 x, __private int16 *exp); #endif //cl_khr_fp16 -#endif //defined(__opencl_c_generic_address_space) +#endif //defined(__opencl_c_named_address_space_builtins) /** * Compute the value of the square root of x^2 + y^2 @@ -7582,7 +7586,9 @@ half4 __ovld lgamma_r(half4 x, int4 *signp); half8 __ovld lgamma_r(half8 x, int8 *signp); half16 __ovld lgamma_r(half16 x, int16 *signp); #endif //cl_khr_fp16 -#else +#endif //defined(__opencl_c_generic_address_space) + +#if defined(__opencl_c_named_address_space_builtins) float __ovld lgamma_r(float x, __global int *signp); float2 __ovld lgamma_r(float2 x, __global int2 *signp); float3 __ovld lgamma_r(float3 x, __global int3 *signp); @@ -7641,7 +7647,7 @@ half4 __ovld lgamma_r(half4 x, __private int4 *signp); half8 __ovld lgamma_r(half8 x, __private int8 *signp); half16 __ovld lgamma_r(half16 x, __private int16 *signp); #endif //cl_khr_fp16 -#endif //defined(__opencl_c_generic_address_space) +#endif //defined(__opencl_c_named_address_space_builtins) /** * Compute natural logarithm. @@ -7888,7 +7894,9 @@ half4 __ovld modf(half4 x, half4 *iptr); half8 __ovld modf(half8 x, half8 *iptr); half16 __ovld modf(half16 x, half16 *iptr); #endif //cl_khr_fp16 -#else +#endif //defined(__opencl_c_generic_address_space) + +#if defined(__opencl_c_named_address_space_builtins) float __ovld modf(float x, __global float *iptr); float2 __ovld modf(float2 x, __global float2 *iptr); float3 __ovld modf(float3 x, __global float3 *iptr); @@ -7947,7 +7955,7 @@ half4 __ovld modf(half4 x, __private half4 *iptr); half8 __ovld modf(half8 x, __private half8 *iptr); half16 __ovld modf(half16 x, __private half16 *iptr); #endif //cl_khr_fp16 -#endif //defined(__opencl_c_generic_address_space) +#endif //defined(__opencl_c_named_address_space_builtins) /** * Returns a quiet NaN. The nancode may be placed @@ -8147,9 +8155,10 @@ half3 __ovld remquo(half3 x, half3 y, int3 *quo); half4 __ovld remquo(half4 x, half4 y, int4 *quo); half8 __ovld remquo(half8 x, half8 y, int8 *quo); half16 __ovld remquo(half16 x, half16 y, int16 *quo); - #endif //cl_khr_fp16 -#else +#endif //defined(__opencl_c_generic_address_space) + +#if defined(__opencl_c_named_address_space_builtins) float __ovld remquo(float x, float y, __global int *quo); float2 __ovld remquo(float2 x, float2 y, __global int2 *quo); float3 __ovld remquo(float3 x, float3 y, __global int3 *quo); @@ -8208,7 +8217,7 @@ half4 __ovld remquo(half4 x, half4 y, __private int4 *quo); half8 __ovld remquo(half8 x, half8 y, __private int8 *quo); half16 __ovld remquo(half16 x, half16 y, __private int16 *quo); #endif //cl_khr_fp16 -#endif //defined(__opencl_c_generic_address_space) +#endif //defined(__opencl_c_named_address_space_builtins) /** * Round to integral value (using round to nearest * even rounding mode) in floating-point format. @@ -8372,7 +8381,9 @@ half4 __ovld sincos(half4 x, half4 *cosval); half8 __ovld sincos(half8 x, half8 *cosval); half16 __ovld sincos(half16 x, half16 *cosval); #endif //cl_khr_fp16 -#else +#endif //defined(__opencl_c_generic_address_space) + +#if defined(__opencl_c_named_address_space_builtins) float __ovld sincos(float x, __global float *cosval); float2 __ovld sincos(float2 x, __global float2 *cosval); float3 __ovld sincos(float3 x, __global float3 *cosval); @@ -8431,7 +8442,7 @@ half4 __ovld sincos(half4 x, __private half4 *cosval); half8 __ovld sincos(half8 x, __private half8 *cosval); half16 __ovld sincos(half16 x, __private half16 *cosval); #endif //cl_khr_fp16 -#endif //defined(__opencl_c_generic_address_space) +#endif //defined(__opencl_c_named_address_space_builtins) /** * Compute hyperbolic sine. @@ -11315,7 +11326,9 @@ half4 __ovld __purefn vload4(size_t offset, const half *p); half8 __ovld __purefn vload8(size_t offset, const half *p); half16 __ovld __purefn vload16(size_t offset, const half *p); #endif //cl_khr_fp16 -#else +#endif //defined(__opencl_c_generic_address_space) + +#if defined(__opencl_c_named_address_space_builtins) char2 __ovld __purefn vload2(size_t offset, const __global char *p); uchar2 __ovld __purefn vload2(size_t offset, const __global uchar *p); short2 __ovld __purefn vload2(size_t offset, const __global short *p); @@ -11490,7 +11503,7 @@ half4 __ovld __purefn vload4(size_t offset, const __private half *p); half8 __ovld __purefn vload8(size_t offset, const __private half *p); half16 __ovld __purefn vload16(size_t offset, const __private half *p); #endif //cl_khr_fp16 -#endif //defined(__opencl_c_generic_address_space) +#endif //defined(__opencl_c_named_address_space_builtins) #if defined(__opencl_c_generic_address_space) void __ovld vstore2(char2 data, size_t offset, char *p); @@ -11553,7 +11566,9 @@ void __ovld vstore4(half4 data, size_t offset, half *p); void __ovld vstore8(half8 data, size_t offset, half *p); void __ovld vstore16(half16 data, size_t offset, half *p); #endif //cl_khr_fp16 -#else +#endif //defined(__opencl_c_generic_address_space) + +#if defined(__opencl_c_named_address_space_builtins) void __ovld vstore2(char2 data, size_t offset, __global char *p); void __ovld vstore2(uchar2 data, size_t offset, __global uchar *p); void __ovld vstore2(short2 data, size_t offset, __global short *p); @@ -11726,7 +11741,7 @@ void __ovld vstore4(half4 data, size_t offset, __private half *p); void __ovld vstore8(half8 data, size_t offset, __private half *p); void __ovld vstore16(half16 data, size_t offset, __private half *p); #endif //cl_khr_fp16 -#endif //defined(__opencl_c_generic_address_space) +#endif //defined(__opencl_c_named_address_space_builtins) /** * Read sizeof (half) bytes of data from address @@ -11739,11 +11754,13 @@ void __ovld vstore16(half16 data, size_t offset, __private half *p); float __ovld __purefn vload_half(size_t offset, const __constant half *p); #if defined(__opencl_c_generic_address_space) float __ovld __purefn vload_half(size_t offset, const half *p); -#else +#endif //defined(__opencl_c_generic_address_space) + +#if defined(__opencl_c_named_address_space_builtins) float __ovld __purefn vload_half(size_t offset, const __global half *p); float __ovld __purefn vload_half(size_t offset, const __local half *p); float __ovld __purefn vload_half(size_t offset, const __private half *p); -#endif //defined(__opencl_c_generic_address_space) +#endif //defined(__opencl_c_named_address_space_builtins) /** * Read sizeof (halfn) bytes of data from address @@ -11764,7 +11781,9 @@ float3 __ovld __purefn vload_half3(size_t offset, const half *p); float4 __ovld __purefn vload_half4(size_t offset, const half *p); float8 __ovld __purefn vload_half8(size_t offset, const half *p); float16 __ovld __purefn vload_half16(size_t offset, const half *p); -#else +#endif //defined(__opencl_c_generic_address_space) + +#if defined(__opencl_c_named_address_space_builtins) float2 __ovld __purefn vload_half2(size_t offset, const __global half *p); float3 __ovld __purefn vload_half3(size_t offset, const __global half *p); float4 __ovld __purefn vload_half4(size_t offset, const __global half *p); @@ -11780,7 +11799,7 @@ float3 __ovld __purefn vload_half3(size_t offset, const __private half *p); float4 __ovld __purefn vload_half4(size_t offset, const __private half *p); float8 __ovld __purefn vload_half8(size_t offset, const __private half *p); float16 __ovld __purefn vload_half16(size_t offset, const __private half *p); -#endif //defined(__opencl_c_generic_address_space) +#endif //defined(__opencl_c_named_address_space_builtins) /** * The float value given by data is first @@ -11806,7 +11825,9 @@ void __ovld vstore_half_rtz(double data, size_t offset, half *p); void __ovld vstore_half_rtp(double data, size_t offset, half *p); void __ovld vstore_half_rtn(double data, size_t offset, half *p); #endif //cl_khr_fp64 -#else +#endif //defined(__opencl_c_generic_address_space) + +#if defined(__opencl_c_named_address_space_builtins) void __ovld vstore_half(float data, size_t offset, __global half *p); void __ovld vstore_half_rte(float data, size_t offset, __global half *p); void __ovld vstore_half_rtz(float data, size_t offset, __global half *p); @@ -11839,7 +11860,7 @@ void __ovld vstore_half_rtz(double data, size_t offset, __private half *p); void __ovld vstore_half_rtp(double data, size_t offset, __private half *p); void __ovld vstore_half_rtn(double data, size_t offset, __private half *p); #endif //cl_khr_fp64 -#endif //defined(__opencl_c_generic_address_space) +#endif //defined(__opencl_c_named_address_space_builtins) /** * The floatn value given by data is converted to @@ -11905,7 +11926,9 @@ void __ovld vstore_half4_rtn(double4 data, size_t offset, half *p); void __ovld vstore_half8_rtn(double8 data, size_t offset, half *p); void __ovld vstore_half16_rtn(double16 data, size_t offset, half *p); #endif //cl_khr_fp64 -#else +#endif //defined(__opencl_c_generic_address_space) + +#if defined(__opencl_c_named_address_space_builtins) void __ovld vstore_half2(float2 data, size_t offset, __global half *p); void __ovld vstore_half3(float3 data, size_t offset, __global half *p); void __ovld vstore_half4(float4 data, size_t offset, __global half *p); @@ -12058,7 +12081,7 @@ void __ovld vstore_half4_rtn(double4 data, size_t offset, __private half *p); void __ovld vstore_half8_rtn(double8 data, size_t offset, __private half *p); void __ovld vstore_half16_rtn(double16 data, size_t offset, __private half *p); #endif //cl_khr_fp64 -#endif //defined(__opencl_c_generic_address_space) +#endif //defined(__opencl_c_named_address_space_builtins) /** * For n = 1, 2, 4, 8 and 16 read sizeof (halfn) @@ -12084,7 +12107,9 @@ float3 __ovld __purefn vloada_half3(size_t offset, const half *p); float4 __ovld __purefn vloada_half4(size_t offset, const half *p); float8 __ovld __purefn vloada_half8(size_t offset, const half *p); float16 __ovld __purefn vloada_half16(size_t offset, const half *p); -#else +#endif //defined(__opencl_c_generic_address_space) + +#if defined(__opencl_c_named_address_space_builtins) float2 __ovld __purefn vloada_half2(size_t offset, const __global half *p); float3 __ovld __purefn vloada_half3(size_t offset, const __global half *p); float4 __ovld __purefn vloada_half4(size_t offset, const __global half *p); @@ -12100,7 +12125,7 @@ float3 __ovld __purefn vloada_half3(size_t offset, const __private half *p); float4 __ovld __purefn vloada_half4(size_t offset, const __private half *p); float8 __ovld __purefn vloada_half8(size_t offset, const __private half *p); float16 __ovld __purefn vloada_half16(size_t offset, const __private half *p); -#endif //defined(__opencl_c_generic_address_space) +#endif //defined(__opencl_c_named_address_space_builtins) /** * The floatn value given by data is converted to @@ -12180,8 +12205,9 @@ void __ovld vstorea_half4_rtn(double4 data, size_t offset, half *p); void __ovld vstorea_half8_rtn(double8 data, size_t offset, half *p); void __ovld vstorea_half16_rtn(double16 data, size_t offset, half *p); #endif //cl_khr_fp64 +#endif //defined(__opencl_c_generic_address_space) -#else +#if defined(__opencl_c_named_address_space_builtins) void __ovld vstorea_half2(float2 data, size_t offset, __global half *p); void __ovld vstorea_half3(float3 data, size_t offset, __global half *p); void __ovld vstorea_half4(float4 data, size_t offset, __global half *p); @@ -12363,7 +12389,7 @@ void __ovld vstorea_half4_rtn(double4 data,size_t offset, __private half *p); void __ovld vstorea_half8_rtn(double8 data,size_t offset, __private half *p); void __ovld vstorea_half16_rtn(double16 data,size_t offset, __private half *p); #endif //cl_khr_fp64 -#endif //defined(__opencl_c_generic_address_space) +#endif //defined(__opencl_c_named_address_space_builtins) // OpenCL v1.1 s6.11.8, v1.2 s6.12.8, v2.0 s6.13.8 - Synchronization Functions @@ -18513,6 +18539,8 @@ int __ovld arm_dot_acc_sat(char4 a, char4 b, int c); // Disable any extensions we may have enabled previously. #pragma OPENCL EXTENSION all : disable +#undef __opencl_c_named_address_space_builtins + #undef __cnfn #undef __ovld #endif //_OPENCL_H_ diff --git a/clang/lib/Lex/Lexer.cpp b/clang/lib/Lex/Lexer.cpp index 89e89c7c1f17..a180bba365cf 100644 --- a/clang/lib/Lex/Lexer.cpp +++ b/clang/lib/Lex/Lexer.cpp @@ -2378,8 +2378,9 @@ bool Lexer::SkipLineComment(Token &Result, const char *CurPtr, bool &TokAtPhysicalStartOfLine) { // If Line comments aren't explicitly enabled for this language, emit an // extension warning. - if (!LangOpts.LineComment && !isLexingRawMode()) { - Diag(BufferPtr, diag::ext_line_comment); + if (!LangOpts.LineComment) { + if (!isLexingRawMode()) // There's no PP in raw mode, so can't emit diags. + Diag(BufferPtr, diag::ext_line_comment); // Mark them enabled so we only emit one warning for this translation // unit. diff --git a/clang/lib/Sema/OpenCLBuiltins.td b/clang/lib/Sema/OpenCLBuiltins.td index df2f206041c1..cd704ba2df13 100644 --- a/clang/lib/Sema/OpenCLBuiltins.td +++ b/clang/lib/Sema/OpenCLBuiltins.td @@ -85,6 +85,8 @@ def FuncExtKhrMipmapImageWrites : FunctionExtension<"cl_khr_mipmap_imag def FuncExtKhrGlMsaaSharing : FunctionExtension<"cl_khr_gl_msaa_sharing">; def FuncExtKhrGlMsaaSharingReadWrite : FunctionExtension<"cl_khr_gl_msaa_sharing __opencl_c_read_write_images">; +def FuncExtOpenCLCGenericAddressSpace : FunctionExtension<"__opencl_c_generic_address_space">; +def FuncExtOpenCLCNamedAddressSpaceBuiltins : FunctionExtension<"__opencl_c_named_address_space_builtins">; def FuncExtOpenCLCPipes : FunctionExtension<"__opencl_c_pipes">; def FuncExtOpenCLCWGCollectiveFunctions : FunctionExtension<"__opencl_c_work_group_collective_functions">; def FuncExtOpenCLCReadWriteImages : FunctionExtension<"__opencl_c_read_write_images">; @@ -591,10 +593,10 @@ multiclass MathWithPointer<list<AddressSpace> addrspaces> { } } -let MaxVersion = CL20 in { +let Extension = FuncExtOpenCLCNamedAddressSpaceBuiltins in { defm : MathWithPointer<[GlobalAS, LocalAS, PrivateAS]>; } -let MinVersion = CL20 in { +let Extension = FuncExtOpenCLCGenericAddressSpace in { defm : MathWithPointer<[GenericAS]>; } @@ -840,10 +842,10 @@ multiclass VloadVstore<list<AddressSpace> addrspaces, bit defStores> { } } -let MaxVersion = CL20 in { +let Extension = FuncExtOpenCLCNamedAddressSpaceBuiltins in { defm : VloadVstore<[GlobalAS, LocalAS, PrivateAS], 1>; } -let MinVersion = CL20 in { +let Extension = FuncExtOpenCLCGenericAddressSpace in { defm : VloadVstore<[GenericAS], 1>; } // vload with constant address space is available regardless of version. @@ -874,10 +876,10 @@ multiclass VloadVstoreHalf<list<AddressSpace> addrspaces, bit defStores> { } } -let MaxVersion = CL20 in { +let Extension = FuncExtOpenCLCNamedAddressSpaceBuiltins in { defm : VloadVstoreHalf<[GlobalAS, LocalAS, PrivateAS], 1>; } -let MinVersion = CL20 in { +let Extension = FuncExtOpenCLCGenericAddressSpace in { defm : VloadVstoreHalf<[GenericAS], 1>; } // vload_half and vloada_half with constant address space are available regardless of version. diff --git a/clang/lib/Sema/Sema.cpp b/clang/lib/Sema/Sema.cpp index 20b4a9a5d4e6..7b57c8da4e9c 100644 --- a/clang/lib/Sema/Sema.cpp +++ b/clang/lib/Sema/Sema.cpp @@ -2554,39 +2554,38 @@ static bool IsCPUDispatchCPUSpecificMultiVersion(const Expr *E) { bool Sema::tryToRecoverWithCall(ExprResult &E, const PartialDiagnostic &PD, bool ForceComplain, bool (*IsPlausibleResult)(QualType)) { - if (isSFINAEContext()) { - // If this is a SFINAE context, don't try anything that might trigger ADL - // prematurely. - return false; - } SourceLocation Loc = E.get()->getExprLoc(); SourceRange Range = E.get()->getSourceRange(); - - QualType ZeroArgCallTy; UnresolvedSet<4> Overloads; - if (tryExprAsCall(*E.get(), ZeroArgCallTy, Overloads) && - !ZeroArgCallTy.isNull() && - (!IsPlausibleResult || IsPlausibleResult(ZeroArgCallTy))) { - // At this point, we know E is potentially callable with 0 - // arguments and that it returns something of a reasonable type, - // so we can emit a fixit and carry on pretending that E was - // actually a CallExpr. - SourceLocation ParenInsertionLoc = getLocForEndOfToken(Range.getEnd()); - bool IsMV = IsCPUDispatchCPUSpecificMultiVersion(E.get()); - Diag(Loc, PD) << /*zero-arg*/ 1 << IsMV << Range - << (IsCallableWithAppend(E.get()) - ? FixItHint::CreateInsertion(ParenInsertionLoc, "()") - : FixItHint()); - if (!IsMV) - notePlausibleOverloads(*this, Loc, Overloads, IsPlausibleResult); - - // FIXME: Try this before emitting the fixit, and suppress diagnostics - // while doing so. - E = BuildCallExpr(nullptr, E.get(), Range.getEnd(), None, - Range.getEnd().getLocWithOffset(1)); - return true; - } + // If this is a SFINAE context, don't try anything that might trigger ADL + // prematurely. + if (!isSFINAEContext()) { + QualType ZeroArgCallTy; + if (tryExprAsCall(*E.get(), ZeroArgCallTy, Overloads) && + !ZeroArgCallTy.isNull() && + (!IsPlausibleResult || IsPlausibleResult(ZeroArgCallTy))) { + // At this point, we know E is potentially callable with 0 + // arguments and that it returns something of a reasonable type, + // so we can emit a fixit and carry on pretending that E was + // actually a CallExpr. + SourceLocation ParenInsertionLoc = getLocForEndOfToken(Range.getEnd()); + bool IsMV = IsCPUDispatchCPUSpecificMultiVersion(E.get()); + Diag(Loc, PD) << /*zero-arg*/ 1 << IsMV << Range + << (IsCallableWithAppend(E.get()) + ? FixItHint::CreateInsertion(ParenInsertionLoc, + "()") + : FixItHint()); + if (!IsMV) + notePlausibleOverloads(*this, Loc, Overloads, IsPlausibleResult); + + // FIXME: Try this before emitting the fixit, and suppress diagnostics + // while doing so. + E = BuildCallExpr(nullptr, E.get(), Range.getEnd(), None, + Range.getEnd().getLocWithOffset(1)); + return true; + } + } if (!ForceComplain) return false; bool IsMV = IsCPUDispatchCPUSpecificMultiVersion(E.get()); diff --git a/clang/lib/Sema/SemaCUDA.cpp b/clang/lib/Sema/SemaCUDA.cpp index 59601c5ce79d..efa38554bc83 100644 --- a/clang/lib/Sema/SemaCUDA.cpp +++ b/clang/lib/Sema/SemaCUDA.cpp @@ -590,6 +590,8 @@ bool HasAllowedCUDADeviceStaticInitializer(Sema &S, VarDecl *VD, }; auto IsConstantInit = [&](const Expr *Init) { assert(Init); + ASTContext::CUDAConstantEvalContextRAII EvalCtx(S.Context, + /*NoWronSidedVars=*/true); return Init->isConstantInitializer(S.Context, VD->getType()->isReferenceType()); }; diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp index c8fb36b8311a..dfbf4cdc89cb 100644 --- a/clang/lib/Sema/SemaChecking.cpp +++ b/clang/lib/Sema/SemaChecking.cpp @@ -3987,7 +3987,7 @@ bool Sema::CheckRISCVBuiltinFunctionCall(const TargetInfo &TI, } if (!HasFeature) { - std::string FeatureStrs = ""; + std::string FeatureStrs; for (StringRef OF : ReqOpFeatures) { // If the feature is 64bit, alter the string so it will print better in // the diagnostic. diff --git a/clang/lib/Sema/SemaCoroutine.cpp b/clang/lib/Sema/SemaCoroutine.cpp index e7e60b7e7daf..cd3ae62ebbe2 100644 --- a/clang/lib/Sema/SemaCoroutine.cpp +++ b/clang/lib/Sema/SemaCoroutine.cpp @@ -810,7 +810,7 @@ ExprResult Sema::ActOnCoawaitExpr(Scope *S, SourceLocation Loc, Expr *E) { checkSuspensionContext(*this, Loc, "co_await"); - if (E->getType()->isPlaceholderType()) { + if (E->hasPlaceholderType()) { ExprResult R = CheckPlaceholderExpr(E); if (R.isInvalid()) return ExprError(); E = R.get(); @@ -828,7 +828,7 @@ ExprResult Sema::BuildUnresolvedCoawaitExpr(SourceLocation Loc, Expr *E, if (!FSI) return ExprError(); - if (E->getType()->isPlaceholderType()) { + if (E->hasPlaceholderType()) { ExprResult R = CheckPlaceholderExpr(E); if (R.isInvalid()) return ExprError(); @@ -866,7 +866,7 @@ ExprResult Sema::BuildResolvedCoawaitExpr(SourceLocation Loc, Expr *E, if (!Coroutine) return ExprError(); - if (E->getType()->isPlaceholderType()) { + if (E->hasPlaceholderType()) { ExprResult R = CheckPlaceholderExpr(E); if (R.isInvalid()) return ExprError(); E = R.get(); @@ -927,7 +927,7 @@ ExprResult Sema::BuildCoyieldExpr(SourceLocation Loc, Expr *E) { if (!Coroutine) return ExprError(); - if (E->getType()->isPlaceholderType()) { + if (E->hasPlaceholderType()) { ExprResult R = CheckPlaceholderExpr(E); if (R.isInvalid()) return ExprError(); E = R.get(); @@ -970,8 +970,8 @@ StmtResult Sema::BuildCoreturnStmt(SourceLocation Loc, Expr *E, if (!FSI) return StmtError(); - if (E && E->getType()->isPlaceholderType() && - !E->getType()->isSpecificPlaceholderType(BuiltinType::Overload)) { + if (E && E->hasPlaceholderType() && + !E->hasPlaceholderType(BuiltinType::Overload)) { ExprResult R = CheckPlaceholderExpr(E); if (R.isInvalid()) return StmtError(); E = R.get(); diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp index 3252671991b7..cbd9df4d6a7b 100644 --- a/clang/lib/Sema/SemaDecl.cpp +++ b/clang/lib/Sema/SemaDecl.cpp @@ -5703,6 +5703,13 @@ static bool RebuildDeclaratorInCurrentInstantiation(Sema &S, Declarator &D, return false; } +/// Returns true if the declaration is declared in a system header or from a +/// system macro. +static bool isFromSystemHeader(SourceManager &SM, const Decl *D) { + return SM.isInSystemHeader(D->getLocation()) || + SM.isInSystemMacro(D->getLocation()); +} + void Sema::warnOnReservedIdentifier(const NamedDecl *D) { // Avoid warning twice on the same identifier, and don't warn on redeclaration // of system decl. @@ -5710,9 +5717,10 @@ void Sema::warnOnReservedIdentifier(const NamedDecl *D) { return; ReservedIdentifierStatus Status = D->isReserved(getLangOpts()); if (Status != ReservedIdentifierStatus::NotReserved && - !Context.getSourceManager().isInSystemHeader(D->getLocation())) + !isFromSystemHeader(Context.getSourceManager(), D)) { Diag(D->getLocation(), diag::warn_reserved_extern_symbol) << D << static_cast<int>(Status); + } } Decl *Sema::ActOnDeclarator(Scope *S, Declarator &D) { @@ -14188,6 +14196,9 @@ ShouldWarnAboutMissingPrototype(const FunctionDecl *FD, if (!FD->isGlobal()) return false; + if (!FD->isExternallyVisible()) + return false; + // Don't warn about C++ member functions. if (isa<CXXMethodDecl>(FD)) return false; diff --git a/clang/lib/Sema/SemaDeclAttr.cpp b/clang/lib/Sema/SemaDeclAttr.cpp index f04236ab96c3..e76e7c608e0c 100644 --- a/clang/lib/Sema/SemaDeclAttr.cpp +++ b/clang/lib/Sema/SemaDeclAttr.cpp @@ -3393,7 +3393,8 @@ bool Sema::checkTargetAttr(SourceLocation LiteralLoc, StringRef AttrStr) { if (ParsedAttrs.BranchProtection.empty()) return false; if (!Context.getTargetInfo().validateBranchProtection( - ParsedAttrs.BranchProtection, BPI, DiagMsg)) { + ParsedAttrs.BranchProtection, ParsedAttrs.Architecture, BPI, + DiagMsg)) { if (DiagMsg.empty()) return Diag(LiteralLoc, diag::warn_unsupported_target_attribute) << Unsupported << None << "branch-protection" << Target; diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp index 7de43705c2b1..85553eccde83 100644 --- a/clang/lib/Sema/SemaExpr.cpp +++ b/clang/lib/Sema/SemaExpr.cpp @@ -497,7 +497,7 @@ SourceRange Sema::getExprRange(Expr *E) const { /// DefaultFunctionArrayConversion (C99 6.3.2.1p3, C99 6.3.2.1p4). ExprResult Sema::DefaultFunctionArrayConversion(Expr *E, bool Diagnose) { // Handle any placeholder expressions which made it here. - if (E->getType()->isPlaceholderType()) { + if (E->hasPlaceholderType()) { ExprResult result = CheckPlaceholderExpr(E); if (result.isInvalid()) return ExprError(); E = result.get(); @@ -621,7 +621,7 @@ static void DiagnoseDirectIsaAccess(Sema &S, const ObjCIvarRefExpr *OIRE, ExprResult Sema::DefaultLvalueConversion(Expr *E) { // Handle any placeholder expressions which made it here. - if (E->getType()->isPlaceholderType()) { + if (E->hasPlaceholderType()) { ExprResult result = CheckPlaceholderExpr(E); if (result.isInvalid()) return ExprError(); E = result.get(); @@ -4685,7 +4685,7 @@ ExprResult Sema::ActOnArraySubscriptExpr(Scope *S, Expr *base, SourceLocation lbLoc, Expr *idx, SourceLocation rbLoc) { if (base && !base->getType().isNull() && - base->getType()->isSpecificPlaceholderType(BuiltinType::OMPArraySection)) + base->hasPlaceholderType(BuiltinType::OMPArraySection)) return ActOnOMPArraySectionExpr(base, lbLoc, idx, SourceLocation(), SourceLocation(), /*Length*/ nullptr, /*Stride=*/nullptr, rbLoc); @@ -4711,8 +4711,7 @@ Sema::ActOnArraySubscriptExpr(Scope *S, Expr *base, SourceLocation lbLoc, }; // The matrix subscript operator ([][])is considered a single operator. // Separating the index expressions by parenthesis is not allowed. - if (base->getType()->isSpecificPlaceholderType( - BuiltinType::IncompleteMatrixIdx) && + if (base->hasPlaceholderType(BuiltinType::IncompleteMatrixIdx) && !isa<MatrixSubscriptExpr>(base)) { Diag(base->getExprLoc(), diag::err_matrix_separate_incomplete_index) << SourceRange(base->getBeginLoc(), rbLoc); @@ -4944,9 +4943,8 @@ ExprResult Sema::ActOnOMPArraySectionExpr(Expr *Base, SourceLocation LBLoc, SourceLocation ColonLocSecond, Expr *Length, Expr *Stride, SourceLocation RBLoc) { - if (Base->getType()->isPlaceholderType() && - !Base->getType()->isSpecificPlaceholderType( - BuiltinType::OMPArraySection)) { + if (Base->hasPlaceholderType() && + !Base->hasPlaceholderType(BuiltinType::OMPArraySection)) { ExprResult Result = CheckPlaceholderExpr(Base); if (Result.isInvalid()) return ExprError(); @@ -5114,8 +5112,7 @@ ExprResult Sema::ActOnOMPArraySectionExpr(Expr *Base, SourceLocation LBLoc, } } - if (!Base->getType()->isSpecificPlaceholderType( - BuiltinType::OMPArraySection)) { + if (!Base->hasPlaceholderType(BuiltinType::OMPArraySection)) { ExprResult Result = DefaultFunctionArrayLvalueConversion(Base); if (Result.isInvalid()) return ExprError(); @@ -5130,7 +5127,7 @@ ExprResult Sema::ActOnOMPArrayShapingExpr(Expr *Base, SourceLocation LParenLoc, SourceLocation RParenLoc, ArrayRef<Expr *> Dims, ArrayRef<SourceRange> Brackets) { - if (Base->getType()->isPlaceholderType()) { + if (Base->hasPlaceholderType()) { ExprResult Result = CheckPlaceholderExpr(Base); if (Result.isInvalid()) return ExprError(); @@ -5155,7 +5152,7 @@ ExprResult Sema::ActOnOMPArrayShapingExpr(Expr *Base, SourceLocation LParenLoc, SmallVector<Expr *, 4> NewDims; bool ErrorFound = false; for (Expr *Dim : Dims) { - if (Dim->getType()->isPlaceholderType()) { + if (Dim->hasPlaceholderType()) { ExprResult Result = CheckPlaceholderExpr(Dim); if (Result.isInvalid()) { ErrorFound = true; @@ -13653,7 +13650,7 @@ QualType Sema::CheckAddressOfOperand(ExprResult &OrigOp, SourceLocation OpLoc) { if (OrigOp.get()->isTypeDependent()) return Context.DependentTy; - assert(!OrigOp.get()->getType()->isPlaceholderType()); + assert(!OrigOp.get()->hasPlaceholderType()); // Make sure to ignore parentheses in subsequent checks Expr *op = OrigOp.get()->IgnoreParens(); diff --git a/clang/lib/Sema/SemaExprCXX.cpp b/clang/lib/Sema/SemaExprCXX.cpp index b34b744d7312..7ce125f5ef82 100644 --- a/clang/lib/Sema/SemaExprCXX.cpp +++ b/clang/lib/Sema/SemaExprCXX.cpp @@ -564,7 +564,7 @@ ExprResult Sema::BuildCXXTypeId(QualType TypeInfoType, SourceLocation RParenLoc) { bool WasEvaluated = false; if (E && !E->isTypeDependent()) { - if (E->getType()->isPlaceholderType()) { + if (E->hasPlaceholderType()) { ExprResult result = CheckPlaceholderExpr(E); if (result.isInvalid()) return ExprError(); E = result.get(); @@ -5704,7 +5704,7 @@ ExprResult Sema::BuildExpressionTrait(ExpressionTrait ET, SourceLocation RParen) { if (Queried->isTypeDependent()) { // Delay type-checking for type-dependent expressions. - } else if (Queried->getType()->isPlaceholderType()) { + } else if (Queried->hasPlaceholderType()) { ExprResult PE = CheckPlaceholderExpr(Queried); if (PE.isInvalid()) return ExprError(); return BuildExpressionTrait(ET, KWLoc, PE.get(), RParen); @@ -5720,8 +5720,7 @@ QualType Sema::CheckPointerToMemberOperands(ExprResult &LHS, ExprResult &RHS, ExprValueKind &VK, SourceLocation Loc, bool isIndirect) { - assert(!LHS.get()->getType()->isPlaceholderType() && - !RHS.get()->getType()->isPlaceholderType() && + assert(!LHS.get()->hasPlaceholderType() && !RHS.get()->hasPlaceholderType() && "placeholders should have been weeded out by now"); // The LHS undergoes lvalue conversions if this is ->*, and undergoes the diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp index ae91a6470471..a500ad4f0220 100644 --- a/clang/lib/Sema/SemaOpenMP.cpp +++ b/clang/lib/Sema/SemaOpenMP.cpp @@ -5327,6 +5327,8 @@ static CapturedStmt *buildDistanceFunc(Sema &Actions, QualType LogicalTy, IntegerLiteral *Zero = IntegerLiteral::Create( Ctx, llvm::APInt(Ctx.getIntWidth(LogicalTy), 0), LogicalTy, {}); + IntegerLiteral *One = IntegerLiteral::Create( + Ctx, llvm::APInt(Ctx.getIntWidth(LogicalTy), 1), LogicalTy, {}); Expr *Dist; if (Rel == BO_NE) { // When using a != comparison, the increment can be +1 or -1. This can be @@ -5381,18 +5383,25 @@ static CapturedStmt *buildDistanceFunc(Sema &Actions, QualType LogicalTy, if (Rel == BO_LE || Rel == BO_GE) { // Add one to the range if the relational operator is inclusive. - Range = AssertSuccess(Actions.BuildBinOp( - nullptr, {}, BO_Add, Range, - Actions.ActOnIntegerConstant(SourceLocation(), 1).get())); + Range = + AssertSuccess(Actions.BuildBinOp(nullptr, {}, BO_Add, Range, One)); } - // Divide by the absolute step amount. + // Divide by the absolute step amount. If the range is not a multiple of + // the step size, rounding-up the effective upper bound ensures that the + // last iteration is included. + // Note that the rounding-up may cause an overflow in a temporry that + // could be avoided, but would have occured in a C-style for-loop as well. Expr *Divisor = BuildVarRef(NewStep); if (Rel == BO_GE || Rel == BO_GT) Divisor = AssertSuccess(Actions.BuildUnaryOp(nullptr, {}, UO_Minus, Divisor)); + Expr *DivisorMinusOne = + AssertSuccess(Actions.BuildBinOp(nullptr, {}, BO_Sub, Divisor, One)); + Expr *RangeRoundUp = AssertSuccess( + Actions.BuildBinOp(nullptr, {}, BO_Add, Range, DivisorMinusOne)); Dist = AssertSuccess( - Actions.BuildBinOp(nullptr, {}, BO_Div, Range, Divisor)); + Actions.BuildBinOp(nullptr, {}, BO_Div, RangeRoundUp, Divisor)); // If there is not at least one iteration, the range contains garbage. Fix // to zero in this case. diff --git a/clang/lib/Sema/SemaOverload.cpp b/clang/lib/Sema/SemaOverload.cpp index 483247aaa7c5..3fa192cedfa3 100644 --- a/clang/lib/Sema/SemaOverload.cpp +++ b/clang/lib/Sema/SemaOverload.cpp @@ -14320,7 +14320,8 @@ ExprResult Sema::BuildCallToMemberFunction(Scope *S, Expr *MemExprE, FoundDecl = MemExpr->getFoundDecl(); Qualifier = MemExpr->getQualifier(); UnbridgedCasts.restore(); - } else if (auto *UnresExpr = dyn_cast<UnresolvedMemberExpr>(NakedMemExpr)) { + } else { + UnresolvedMemberExpr *UnresExpr = cast<UnresolvedMemberExpr>(NakedMemExpr); Qualifier = UnresExpr->getQualifier(); QualType ObjectType = UnresExpr->getBaseType(); @@ -14433,9 +14434,7 @@ ExprResult Sema::BuildCallToMemberFunction(Scope *S, Expr *MemExprE, } MemExpr = cast<MemberExpr>(MemExprE->IgnoreParens()); - } else - // Unimaged NakedMemExpr type. - return ExprError(); + } QualType ResultType = Method->getReturnType(); ExprValueKind VK = Expr::getValueKindForType(ResultType); diff --git a/clang/lib/Sema/SemaTemplateInstantiate.cpp b/clang/lib/Sema/SemaTemplateInstantiate.cpp index 7c6bb4c8a5f8..6de486be8f16 100644 --- a/clang/lib/Sema/SemaTemplateInstantiate.cpp +++ b/clang/lib/Sema/SemaTemplateInstantiate.cpp @@ -1943,6 +1943,9 @@ TemplateInstantiator::TransformExprRequirement(concepts::ExprRequirement *Req) { if (ExprInst.isInvalid()) return nullptr; ExprResult TransExprRes = TransformExpr(E); + if (!TransExprRes.isInvalid() && !Trap.hasErrorOccurred() && + TransExprRes.get()->hasPlaceholderType()) + TransExprRes = SemaRef.CheckPlaceholderExpr(TransExprRes.get()); if (TransExprRes.isInvalid() || Trap.hasErrorOccurred()) TransExpr = createSubstDiag(SemaRef, Info, [&](llvm::raw_ostream &OS) { E->printPretty(OS, nullptr, SemaRef.getPrintingPolicy()); diff --git a/clang/lib/Sema/SemaType.cpp b/clang/lib/Sema/SemaType.cpp index 959f4903b030..ab47e9f03eaf 100644 --- a/clang/lib/Sema/SemaType.cpp +++ b/clang/lib/Sema/SemaType.cpp @@ -2257,7 +2257,7 @@ QualType Sema::BuildBitIntType(bool IsUnsigned, Expr *BitWidth, if (ICE.isInvalid()) return QualType(); - int64_t NumBits = Bits.getSExtValue(); + size_t NumBits = Bits.getZExtValue(); if (!IsUnsigned && NumBits < 2) { Diag(Loc, diag::err_bit_int_bad_size) << 0; return QualType(); @@ -2268,9 +2268,10 @@ QualType Sema::BuildBitIntType(bool IsUnsigned, Expr *BitWidth, return QualType(); } - if (NumBits > llvm::IntegerType::MAX_INT_BITS) { + const TargetInfo &TI = getASTContext().getTargetInfo(); + if (NumBits > TI.getMaxBitIntWidth()) { Diag(Loc, diag::err_bit_int_max_size) - << IsUnsigned << llvm::IntegerType::MAX_INT_BITS; + << IsUnsigned << static_cast<uint64_t>(TI.getMaxBitIntWidth()); return QualType(); } diff --git a/clang/lib/Sema/TreeTransform.h b/clang/lib/Sema/TreeTransform.h index e43b3ca968eb..5c37fcaaea13 100644 --- a/clang/lib/Sema/TreeTransform.h +++ b/clang/lib/Sema/TreeTransform.h @@ -12494,6 +12494,8 @@ TreeTransform<Derived>::TransformExprRequirement(concepts::ExprRequirement *Req) TransExpr = Req->getExprSubstitutionDiagnostic(); else { ExprResult TransExprRes = getDerived().TransformExpr(Req->getExpr()); + if (TransExprRes.isUsable() && TransExprRes.get()->hasPlaceholderType()) + TransExprRes = SemaRef.CheckPlaceholderExpr(TransExprRes.get()); if (TransExprRes.isInvalid()) return nullptr; TransExpr = TransExprRes.get(); diff --git a/compiler-rt/include/profile/InstrProfData.inc b/compiler-rt/include/profile/InstrProfData.inc index 0544b6b2ef71..62054a6a3df5 100644 --- a/compiler-rt/include/profile/InstrProfData.inc +++ b/compiler-rt/include/profile/InstrProfData.inc @@ -660,6 +660,8 @@ serializeValueProfDataFrom(ValueProfRecordClosure *Closure, * generated profile, and 0 if this is a Clang FE generated profile. * 1 in bit 57 indicates there are context-sensitive records in the profile. * The 59th bit indicates whether to use debug info to correlate profiles. + * The 60th bit indicates single byte coverage instrumentation. + * The 61st bit indicates function entry instrumentation only. */ #define VARIANT_MASKS_ALL 0xff00000000000000ULL #define GET_VERSION(V) ((V) & ~VARIANT_MASKS_ALL) @@ -667,6 +669,8 @@ serializeValueProfDataFrom(ValueProfRecordClosure *Closure, #define VARIANT_MASK_CSIR_PROF (0x1ULL << 57) #define VARIANT_MASK_INSTR_ENTRY (0x1ULL << 58) #define VARIANT_MASK_DBG_CORRELATE (0x1ULL << 59) +#define VARIANT_MASK_BYTE_COVERAGE (0x1ULL << 60) +#define VARIANT_MASK_FUNCTION_ENTRY_ONLY (0x1ULL << 61) #define INSTR_PROF_RAW_VERSION_VAR __llvm_profile_raw_version #define INSTR_PROF_PROFILE_RUNTIME_VAR __llvm_profile_runtime #define INSTR_PROF_PROFILE_COUNTER_BIAS_VAR __llvm_profile_counter_bias diff --git a/compiler-rt/include/profile/MemProfData.inc b/compiler-rt/include/profile/MemProfData.inc index d64227e4ba31..20f8308645c1 100644 --- a/compiler-rt/include/profile/MemProfData.inc +++ b/compiler-rt/include/profile/MemProfData.inc @@ -20,11 +20,10 @@ * \*===----------------------------------------------------------------------===*/ - #ifdef _MSC_VER -#define PACKED(__decl__) __pragma(pack(push,1)) __decl__ __pragma(pack(pop)) +#define PACKED(...) __pragma(pack(push,1)) __VA_ARGS__ __pragma(pack(pop)) #else -#define PACKED(__decl__) __decl__ __attribute__((__packed__)) +#define PACKED(...) __VA_ARGS__ __attribute__((__packed__)) #endif // A 64-bit magic number to uniquely identify the raw binary memprof profile file. @@ -47,14 +46,106 @@ PACKED(struct Header { uint64_t StackOffset; }); + // A struct describing the information necessary to describe a /proc/maps // segment entry for a particular binary/library identified by its build id. PACKED(struct SegmentEntry { uint64_t Start; uint64_t End; uint64_t Offset; - uint8_t BuildId[32]; + // This field is unused until sanitizer procmaps support for build ids for + // Linux-Elf is implemented. + uint8_t BuildId[32] = {0}; + + SegmentEntry(uint64_t S, uint64_t E, uint64_t O) : + Start(S), End(E), Offset(O) {} + + SegmentEntry(const SegmentEntry& S) { + Start = S.Start; + End = S.End; + Offset = S.Offset; + } + + SegmentEntry& operator=(const SegmentEntry& S) { + Start = S.Start; + End = S.End; + Offset = S.Offset; + return *this; + } + + bool operator==(const SegmentEntry& S) const { + return Start == S.Start && + End == S.End && + Offset == S.Offset; + } }); + +// A struct representing the heap allocation characteristics of a particular +// runtime context. This struct is shared between the compiler-rt runtime and +// the raw profile reader. The indexed format uses a separate, self-describing +// backwards compatible format. +PACKED(struct MemInfoBlock { + uint32_t alloc_count; + uint64_t total_access_count, min_access_count, max_access_count; + uint64_t total_size; + uint32_t min_size, max_size; + uint32_t alloc_timestamp, dealloc_timestamp; + uint64_t total_lifetime; + uint32_t min_lifetime, max_lifetime; + uint32_t alloc_cpu_id, dealloc_cpu_id; + uint32_t num_migrated_cpu; + + // Only compared to prior deallocated object currently. + uint32_t num_lifetime_overlaps; + uint32_t num_same_alloc_cpu; + uint32_t num_same_dealloc_cpu; + + uint64_t data_type_id; // TODO: hash of type name + + MemInfoBlock() : alloc_count(0) {} + + MemInfoBlock(uint32_t size, uint64_t access_count, uint32_t alloc_timestamp, + uint32_t dealloc_timestamp, uint32_t alloc_cpu, uint32_t dealloc_cpu) + : alloc_count(1), total_access_count(access_count), + min_access_count(access_count), max_access_count(access_count), + total_size(size), min_size(size), max_size(size), + alloc_timestamp(alloc_timestamp), dealloc_timestamp(dealloc_timestamp), + total_lifetime(dealloc_timestamp - alloc_timestamp), + min_lifetime(total_lifetime), max_lifetime(total_lifetime), + alloc_cpu_id(alloc_cpu), dealloc_cpu_id(dealloc_cpu), + num_lifetime_overlaps(0), num_same_alloc_cpu(0), + num_same_dealloc_cpu(0) { + num_migrated_cpu = alloc_cpu_id != dealloc_cpu_id; + } + + void Merge(const MemInfoBlock &newMIB) { + alloc_count += newMIB.alloc_count; + + total_access_count += newMIB.total_access_count; + min_access_count = newMIB.min_access_count < min_access_count ? newMIB.min_access_count : min_access_count; + max_access_count = newMIB.max_access_count < max_access_count ? newMIB.max_access_count : max_access_count; + + total_size += newMIB.total_size; + min_size = newMIB.min_size < min_size ? newMIB.min_size : min_size; + max_size = newMIB.max_size < max_size ? newMIB.max_size : max_size; + + total_lifetime += newMIB.total_lifetime; + min_lifetime = newMIB.min_lifetime < min_lifetime ? newMIB.min_lifetime : min_lifetime; + max_lifetime = newMIB.max_lifetime > max_lifetime ? newMIB.max_lifetime : max_lifetime; + + // We know newMIB was deallocated later, so just need to check if it was + // allocated before last one deallocated. + num_lifetime_overlaps += newMIB.alloc_timestamp < dealloc_timestamp; + alloc_timestamp = newMIB.alloc_timestamp; + dealloc_timestamp = newMIB.dealloc_timestamp; + + num_same_alloc_cpu += alloc_cpu_id == newMIB.alloc_cpu_id; + num_same_dealloc_cpu += dealloc_cpu_id == newMIB.dealloc_cpu_id; + alloc_cpu_id = newMIB.alloc_cpu_id; + dealloc_cpu_id = newMIB.dealloc_cpu_id; + } +}); + } // namespace memprof } // namespace llvm diff --git a/compiler-rt/include/sanitizer/common_interface_defs.h b/compiler-rt/include/sanitizer/common_interface_defs.h index 692b8f70c969..ba58ad46f32d 100644 --- a/compiler-rt/include/sanitizer/common_interface_defs.h +++ b/compiler-rt/include/sanitizer/common_interface_defs.h @@ -211,6 +211,15 @@ void __sanitizer_symbolize_pc(void *pc, const char *fmt, char *out_buf, // Same as __sanitizer_symbolize_pc, but for data section (i.e. globals). void __sanitizer_symbolize_global(void *data_ptr, const char *fmt, char *out_buf, size_t out_buf_size); +// Determine the return address. +#if !defined(_MSC_VER) || defined(__clang__) +#define __sanitizer_return_address() \ + __builtin_extract_return_addr(__builtin_return_address(0)) +#else +extern "C" void *_ReturnAddress(void); +#pragma intrinsic(_ReturnAddress) +#define __sanitizer_return_address() _ReturnAddress() +#endif /// Sets the callback to be called immediately before death on error. /// diff --git a/compiler-rt/lib/builtins/floatsisf.c b/compiler-rt/lib/builtins/floatsisf.c index fe060407755b..c01f81e41e8e 100644 --- a/compiler-rt/lib/builtins/floatsisf.c +++ b/compiler-rt/lib/builtins/floatsisf.c @@ -17,7 +17,7 @@ #include "int_lib.h" -COMPILER_RT_ABI fp_t __floatsisf(int a) { +COMPILER_RT_ABI fp_t __floatsisf(si_int a) { const int aWidth = sizeof a * CHAR_BIT; @@ -33,7 +33,7 @@ COMPILER_RT_ABI fp_t __floatsisf(int a) { } // Exponent of (fp_t)a is the width of abs(a). - const int exponent = (aWidth - 1) - __builtin_clz(a); + const int exponent = (aWidth - 1) - clzsi(a); rep_t result; // Shift a into the significand field, rounding if it is a right-shift diff --git a/compiler-rt/lib/builtins/floatsitf.c b/compiler-rt/lib/builtins/floatsitf.c index f56063f368d9..80a4ef08fb0e 100644 --- a/compiler-rt/lib/builtins/floatsitf.c +++ b/compiler-rt/lib/builtins/floatsitf.c @@ -16,7 +16,7 @@ #include "fp_lib.h" #if defined(CRT_HAS_128BIT) && defined(CRT_LDBL_128BIT) -COMPILER_RT_ABI fp_t __floatsitf(int a) { +COMPILER_RT_ABI fp_t __floatsitf(si_int a) { const int aWidth = sizeof a * CHAR_BIT; @@ -26,14 +26,14 @@ COMPILER_RT_ABI fp_t __floatsitf(int a) { // All other cases begin by extracting the sign and absolute value of a rep_t sign = 0; - unsigned aAbs = (unsigned)a; + su_int aAbs = (su_int)a; if (a < 0) { sign = signBit; - aAbs = ~(unsigned)a + 1U; + aAbs = ~(su_int)a + (su_int)1U; } // Exponent of (fp_t)a is the width of abs(a). - const int exponent = (aWidth - 1) - __builtin_clz(aAbs); + const int exponent = (aWidth - 1) - clzsi(aAbs); rep_t result; // Shift a into the significand field and clear the implicit bit. diff --git a/compiler-rt/lib/builtins/floatunsisf.c b/compiler-rt/lib/builtins/floatunsisf.c index 33a1b5ae2a63..ec062b5943e9 100644 --- a/compiler-rt/lib/builtins/floatunsisf.c +++ b/compiler-rt/lib/builtins/floatunsisf.c @@ -17,7 +17,7 @@ #include "int_lib.h" -COMPILER_RT_ABI fp_t __floatunsisf(unsigned int a) { +COMPILER_RT_ABI fp_t __floatunsisf(su_int a) { const int aWidth = sizeof a * CHAR_BIT; @@ -26,7 +26,7 @@ COMPILER_RT_ABI fp_t __floatunsisf(unsigned int a) { return fromRep(0); // Exponent of (fp_t)a is the width of abs(a). - const int exponent = (aWidth - 1) - __builtin_clz(a); + const int exponent = (aWidth - 1) - clzsi(a); rep_t result; // Shift a into the significand field, rounding if it is a right-shift diff --git a/compiler-rt/lib/builtins/floatunsitf.c b/compiler-rt/lib/builtins/floatunsitf.c index a4bf0f65fe1c..7ba1fb6000dc 100644 --- a/compiler-rt/lib/builtins/floatunsitf.c +++ b/compiler-rt/lib/builtins/floatunsitf.c @@ -16,7 +16,7 @@ #include "fp_lib.h" #if defined(CRT_HAS_128BIT) && defined(CRT_LDBL_128BIT) -COMPILER_RT_ABI fp_t __floatunsitf(unsigned int a) { +COMPILER_RT_ABI fp_t __floatunsitf(su_int a) { const int aWidth = sizeof a * CHAR_BIT; @@ -25,7 +25,7 @@ COMPILER_RT_ABI fp_t __floatunsitf(unsigned int a) { return fromRep(0); // Exponent of (fp_t)a is the width of abs(a). - const int exponent = (aWidth - 1) - __builtin_clz(a); + const int exponent = (aWidth - 1) - clzsi(a); rep_t result; // Shift a into the significand field and clear the implicit bit. diff --git a/compiler-rt/lib/builtins/fp_extend.h b/compiler-rt/lib/builtins/fp_extend.h index aad4436730dd..eee4722bf90e 100644 --- a/compiler-rt/lib/builtins/fp_extend.h +++ b/compiler-rt/lib/builtins/fp_extend.h @@ -33,9 +33,9 @@ static __inline int src_rep_t_clz(src_rep_t a) { return __builtin_clzl(a); #else if (a & REP_C(0xffffffff00000000)) - return __builtin_clz(a >> 32); + return clzsi(a >> 32); else - return 32 + __builtin_clz(a & REP_C(0xffffffff)); + return 32 + clzsi(a & REP_C(0xffffffff)); #endif } diff --git a/compiler-rt/lib/builtins/udivmoddi4.c b/compiler-rt/lib/builtins/udivmoddi4.c index ca17b36ce585..123e5fb05f8c 100644 --- a/compiler-rt/lib/builtins/udivmoddi4.c +++ b/compiler-rt/lib/builtins/udivmoddi4.c @@ -82,7 +82,7 @@ COMPILER_RT_ABI du_int __udivmoddi4(du_int a, du_int b, du_int *rem) { r.s.high = n.s.high & (d.s.high - 1); *rem = r.all; } - return n.s.high >> __builtin_ctz(d.s.high); + return n.s.high >> ctzsi(d.s.high); } // K K // --- @@ -112,7 +112,7 @@ COMPILER_RT_ABI du_int __udivmoddi4(du_int a, du_int b, du_int *rem) { *rem = n.s.low & (d.s.low - 1); if (d.s.low == 1) return n.all; - sr = __builtin_ctz(d.s.low); + sr = ctzsi(d.s.low); q.s.high = n.s.high >> sr; q.s.low = (n.s.high << (n_uword_bits - sr)) | (n.s.low >> sr); return q.all; diff --git a/compiler-rt/lib/hwasan/hwasan_memintrinsics.cpp b/compiler-rt/lib/hwasan/hwasan_memintrinsics.cpp index fab017aae60b..ea7f5ce40b07 100644 --- a/compiler-rt/lib/hwasan/hwasan_memintrinsics.cpp +++ b/compiler-rt/lib/hwasan/hwasan_memintrinsics.cpp @@ -40,5 +40,5 @@ void *__hwasan_memmove(void *to, const void *from, uptr size) { reinterpret_cast<uptr>(to), size); CheckAddressSized<ErrorAction::Recover, AccessType::Load>( reinterpret_cast<uptr>(from), size); - return memmove(UntagPtr(to), UntagPtr(from), size); + return memmove(to, from, size); } diff --git a/compiler-rt/lib/interception/interception_win.cpp b/compiler-rt/lib/interception/interception_win.cpp index 9289e06b88fc..10b893391f47 100644 --- a/compiler-rt/lib/interception/interception_win.cpp +++ b/compiler-rt/lib/interception/interception_win.cpp @@ -401,6 +401,7 @@ static uptr AllocateMemoryForTrampoline(uptr image_address, size_t size) { // The following prologues cannot be patched because of the short jump // jumping to the patching region. +#if SANITIZER_WINDOWS64 // ntdll!wcslen in Win11 // 488bc1 mov rax,rcx // 0fb710 movzx edx,word ptr [rax] @@ -422,6 +423,7 @@ static const u8 kPrologueWithShortJump2[] = { 0x4c, 0x8b, 0xc1, 0x8a, 0x01, 0x48, 0xff, 0xc1, 0x84, 0xc0, 0x75, 0xf7, }; +#endif // Returns 0 on error. static size_t GetInstructionSize(uptr address, size_t* rel_offset = nullptr) { diff --git a/compiler-rt/lib/memprof/memprof_allocator.cpp b/compiler-rt/lib/memprof/memprof_allocator.cpp index 0974b898666b..14e7bfe53534 100644 --- a/compiler-rt/lib/memprof/memprof_allocator.cpp +++ b/compiler-rt/lib/memprof/memprof_allocator.cpp @@ -15,11 +15,11 @@ #include "memprof_allocator.h" #include "memprof_mapping.h" -#include "memprof_meminfoblock.h" #include "memprof_mibmap.h" #include "memprof_rawprofile.h" #include "memprof_stack.h" #include "memprof_thread.h" +#include "profile/MemProfData.inc" #include "sanitizer_common/sanitizer_allocator_checks.h" #include "sanitizer_common/sanitizer_allocator_interface.h" #include "sanitizer_common/sanitizer_allocator_report.h" @@ -36,6 +36,42 @@ #include <time.h> namespace __memprof { +namespace { +using ::llvm::memprof::MemInfoBlock; + +void Print(const MemInfoBlock &M, const u64 id, bool print_terse) { + u64 p; + + if (print_terse) { + p = M.total_size * 100 / M.alloc_count; + Printf("MIB:%llu/%u/%llu.%02llu/%u/%u/", id, M.alloc_count, p / 100, + p % 100, M.min_size, M.max_size); + p = M.total_access_count * 100 / M.alloc_count; + Printf("%llu.%02llu/%llu/%llu/", p / 100, p % 100, M.min_access_count, + M.max_access_count); + p = M.total_lifetime * 100 / M.alloc_count; + Printf("%llu.%02llu/%u/%u/", p / 100, p % 100, M.min_lifetime, + M.max_lifetime); + Printf("%u/%u/%u/%u\n", M.num_migrated_cpu, M.num_lifetime_overlaps, + M.num_same_alloc_cpu, M.num_same_dealloc_cpu); + } else { + p = M.total_size * 100 / M.alloc_count; + Printf("Memory allocation stack id = %llu\n", id); + Printf("\talloc_count %u, size (ave/min/max) %llu.%02llu / %u / %u\n", + M.alloc_count, p / 100, p % 100, M.min_size, M.max_size); + p = M.total_access_count * 100 / M.alloc_count; + Printf("\taccess_count (ave/min/max): %llu.%02llu / %llu / %llu\n", p / 100, + p % 100, M.min_access_count, M.max_access_count); + p = M.total_lifetime * 100 / M.alloc_count; + Printf("\tlifetime (ave/min/max): %llu.%02llu / %u / %u\n", p / 100, + p % 100, M.min_lifetime, M.max_lifetime); + Printf("\tnum migrated: %u, num lifetime overlaps: %u, num same alloc " + "cpu: %u, num same dealloc_cpu: %u\n", + M.num_migrated_cpu, M.num_lifetime_overlaps, M.num_same_alloc_cpu, + M.num_same_dealloc_cpu); + } +} +} // namespace static int GetCpuId(void) { // _memprof_preinit is called via the preinit_array, which subsequently calls @@ -240,7 +276,7 @@ struct Allocator { static void PrintCallback(const uptr Key, LockedMemInfoBlock *const &Value, void *Arg) { SpinMutexLock(&Value->mutex); - Value->mib.Print(Key, bool(Arg)); + Print(Value->mib, Key, bool(Arg)); } void FinishAndWrite() { diff --git a/compiler-rt/lib/memprof/memprof_meminfoblock.h b/compiler-rt/lib/memprof/memprof_meminfoblock.h deleted file mode 100644 index 19e424435e79..000000000000 --- a/compiler-rt/lib/memprof/memprof_meminfoblock.h +++ /dev/null @@ -1,116 +0,0 @@ -#ifndef MEMPROF_MEMINFOBLOCK_H_ -#define MEMPROF_MEMINFOBLOCK_H_ - -#include "memprof_interface_internal.h" // For u32, u64 TODO: Move these out of the internal header. -#include "sanitizer_common/sanitizer_common.h" - -namespace __memprof { - -using __sanitizer::Printf; - -struct MemInfoBlock { - u32 alloc_count; - u64 total_access_count, min_access_count, max_access_count; - u64 total_size; - u32 min_size, max_size; - u32 alloc_timestamp, dealloc_timestamp; - u64 total_lifetime; - u32 min_lifetime, max_lifetime; - u32 alloc_cpu_id, dealloc_cpu_id; - u32 num_migrated_cpu; - - // Only compared to prior deallocated object currently. - u32 num_lifetime_overlaps; - u32 num_same_alloc_cpu; - u32 num_same_dealloc_cpu; - - u64 data_type_id; // TODO: hash of type name - - MemInfoBlock() : alloc_count(0) {} - - MemInfoBlock(u32 size, u64 access_count, u32 alloc_timestamp, - u32 dealloc_timestamp, u32 alloc_cpu, u32 dealloc_cpu) - : alloc_count(1), total_access_count(access_count), - min_access_count(access_count), max_access_count(access_count), - total_size(size), min_size(size), max_size(size), - alloc_timestamp(alloc_timestamp), dealloc_timestamp(dealloc_timestamp), - total_lifetime(dealloc_timestamp - alloc_timestamp), - min_lifetime(total_lifetime), max_lifetime(total_lifetime), - alloc_cpu_id(alloc_cpu), dealloc_cpu_id(dealloc_cpu), - num_lifetime_overlaps(0), num_same_alloc_cpu(0), - num_same_dealloc_cpu(0) { - num_migrated_cpu = alloc_cpu_id != dealloc_cpu_id; - } - - void Print(u64 id, bool print_terse) const { - u64 p; - - if (print_terse) { - p = total_size * 100 / alloc_count; - Printf("MIB:%llu/%u/%llu.%02llu/%u/%u/", id, alloc_count, p / 100, - p % 100, min_size, max_size); - p = total_access_count * 100 / alloc_count; - Printf("%llu.%02llu/%llu/%llu/", p / 100, p % 100, min_access_count, - max_access_count); - p = total_lifetime * 100 / alloc_count; - Printf("%llu.%02llu/%u/%u/", p / 100, p % 100, min_lifetime, - max_lifetime); - Printf("%u/%u/%u/%u\n", num_migrated_cpu, num_lifetime_overlaps, - num_same_alloc_cpu, num_same_dealloc_cpu); - } else { - p = total_size * 100 / alloc_count; - Printf("Memory allocation stack id = %llu\n", id); - Printf("\talloc_count %u, size (ave/min/max) %llu.%02llu / %u / %u\n", - alloc_count, p / 100, p % 100, min_size, max_size); - p = total_access_count * 100 / alloc_count; - Printf("\taccess_count (ave/min/max): %llu.%02llu / %llu / %llu\n", - p / 100, p % 100, min_access_count, max_access_count); - p = total_lifetime * 100 / alloc_count; - Printf("\tlifetime (ave/min/max): %llu.%02llu / %u / %u\n", p / 100, - p % 100, min_lifetime, max_lifetime); - Printf("\tnum migrated: %u, num lifetime overlaps: %u, num same alloc " - "cpu: %u, num same dealloc_cpu: %u\n", - num_migrated_cpu, num_lifetime_overlaps, num_same_alloc_cpu, - num_same_dealloc_cpu); - } - } - - static void printHeader() { - Printf("MIB:StackID/AllocCount/AveSize/MinSize/MaxSize/AveAccessCount/" - "MinAccessCount/MaxAccessCount/AveLifetime/MinLifetime/MaxLifetime/" - "NumMigratedCpu/NumLifetimeOverlaps/NumSameAllocCpu/" - "NumSameDeallocCpu\n"); - } - - void Merge(const MemInfoBlock &newMIB) { - alloc_count += newMIB.alloc_count; - - total_access_count += newMIB.total_access_count; - min_access_count = Min(min_access_count, newMIB.min_access_count); - max_access_count = Max(max_access_count, newMIB.max_access_count); - - total_size += newMIB.total_size; - min_size = Min(min_size, newMIB.min_size); - max_size = Max(max_size, newMIB.max_size); - - total_lifetime += newMIB.total_lifetime; - min_lifetime = Min(min_lifetime, newMIB.min_lifetime); - max_lifetime = Max(max_lifetime, newMIB.max_lifetime); - - // We know newMIB was deallocated later, so just need to check if it was - // allocated before last one deallocated. - num_lifetime_overlaps += newMIB.alloc_timestamp < dealloc_timestamp; - alloc_timestamp = newMIB.alloc_timestamp; - dealloc_timestamp = newMIB.dealloc_timestamp; - - num_same_alloc_cpu += alloc_cpu_id == newMIB.alloc_cpu_id; - num_same_dealloc_cpu += dealloc_cpu_id == newMIB.dealloc_cpu_id; - alloc_cpu_id = newMIB.alloc_cpu_id; - dealloc_cpu_id = newMIB.dealloc_cpu_id; - } - -} __attribute__((packed)); - -} // namespace __memprof - -#endif // MEMPROF_MEMINFOBLOCK_H_ diff --git a/compiler-rt/lib/memprof/memprof_mibmap.cpp b/compiler-rt/lib/memprof/memprof_mibmap.cpp index 47449cf9612b..32f0796c8f24 100644 --- a/compiler-rt/lib/memprof/memprof_mibmap.cpp +++ b/compiler-rt/lib/memprof/memprof_mibmap.cpp @@ -11,10 +11,12 @@ //===----------------------------------------------------------------------===// #include "memprof_mibmap.h" +#include "profile/MemProfData.inc" #include "sanitizer_common/sanitizer_allocator_internal.h" #include "sanitizer_common/sanitizer_mutex.h" namespace __memprof { +using ::llvm::memprof::MemInfoBlock; void InsertOrMerge(const uptr Id, const MemInfoBlock &Block, MIBMapTy &Map) { MIBMapTy::Handle h(&Map, static_cast<uptr>(Id), /*remove=*/false, diff --git a/compiler-rt/lib/memprof/memprof_mibmap.h b/compiler-rt/lib/memprof/memprof_mibmap.h index ed5dda174fe5..a7cd420464e8 100644 --- a/compiler-rt/lib/memprof/memprof_mibmap.h +++ b/compiler-rt/lib/memprof/memprof_mibmap.h @@ -1,7 +1,9 @@ #ifndef MEMPROF_MIBMAP_H_ #define MEMPROF_MIBMAP_H_ -#include "memprof_meminfoblock.h" +#include <stdint.h> + +#include "profile/MemProfData.inc" #include "sanitizer_common/sanitizer_addrhashmap.h" #include "sanitizer_common/sanitizer_mutex.h" @@ -9,7 +11,7 @@ namespace __memprof { struct LockedMemInfoBlock { __sanitizer::StaticSpinMutex mutex; - MemInfoBlock mib; + ::llvm::memprof::MemInfoBlock mib; }; // The MIB map stores a mapping from stack ids to MemInfoBlocks. @@ -17,7 +19,8 @@ typedef __sanitizer::AddrHashMap<LockedMemInfoBlock *, 200003> MIBMapTy; // Insert a new MemInfoBlock or merge with an existing block identified by the // stack id. -void InsertOrMerge(const uptr Id, const MemInfoBlock &Block, MIBMapTy &Map); +void InsertOrMerge(const uptr Id, const ::llvm::memprof::MemInfoBlock &Block, + MIBMapTy &Map); } // namespace __memprof diff --git a/compiler-rt/lib/memprof/memprof_rawprofile.cpp b/compiler-rt/lib/memprof/memprof_rawprofile.cpp index c4800a6df34c..f065e8dbcabc 100644 --- a/compiler-rt/lib/memprof/memprof_rawprofile.cpp +++ b/compiler-rt/lib/memprof/memprof_rawprofile.cpp @@ -2,7 +2,6 @@ #include <stdlib.h> #include <string.h> -#include "memprof_meminfoblock.h" #include "memprof_rawprofile.h" #include "profile/MemProfData.inc" #include "sanitizer_common/sanitizer_allocator_internal.h" @@ -16,6 +15,7 @@ namespace __memprof { using ::__sanitizer::Vector; +using ::llvm::memprof::MemInfoBlock; using SegmentEntry = ::llvm::memprof::SegmentEntry; using Header = ::llvm::memprof::Header; @@ -65,11 +65,8 @@ void SerializeSegmentsToBuffer(MemoryMappingLayoutBase &Layout, for (Layout.Reset(); Layout.Next(&segment);) { if (segment.IsReadable() && segment.IsExecutable()) { - SegmentEntry Entry{}; - Entry.Start = segment.start; - Entry.End = segment.end; - Entry.Offset = segment.offset; - memcpy(Entry.BuildId, segment.uuid, sizeof(segment.uuid)); + // TODO: Record segment.uuid when it is implemented for Linux-Elf. + SegmentEntry Entry(segment.start, segment.end, segment.offset); memcpy(Ptr, &Entry, sizeof(SegmentEntry)); Ptr += sizeof(SegmentEntry); NumSegmentsRecorded++; diff --git a/compiler-rt/lib/memprof/tests/rawprofile.cpp b/compiler-rt/lib/memprof/tests/rawprofile.cpp index 829e18370737..6181d80fadf6 100644 --- a/compiler-rt/lib/memprof/tests/rawprofile.cpp +++ b/compiler-rt/lib/memprof/tests/rawprofile.cpp @@ -3,7 +3,6 @@ #include <cstdint> #include <memory> -#include "memprof/memprof_meminfoblock.h" #include "profile/MemProfData.inc" #include "sanitizer_common/sanitizer_common.h" #include "sanitizer_common/sanitizer_procmaps.h" @@ -14,13 +13,13 @@ namespace { -using ::__memprof::MemInfoBlock; using ::__memprof::MIBMapTy; using ::__memprof::SerializeToRawProfile; using ::__sanitizer::MemoryMappedSegment; using ::__sanitizer::MemoryMappingLayoutBase; using ::__sanitizer::StackDepotPut; using ::__sanitizer::StackTrace; +using ::llvm::memprof::MemInfoBlock; using ::testing::_; using ::testing::Action; using ::testing::DoAll; @@ -33,21 +32,21 @@ public: MOCK_METHOD(void, Reset, (), (override)); }; -u64 PopulateFakeMap(const MemInfoBlock &FakeMIB, uptr StackPCBegin, - MIBMapTy &FakeMap) { +uint64_t PopulateFakeMap(const MemInfoBlock &FakeMIB, uint64_t StackPCBegin, + MIBMapTy &FakeMap) { constexpr int kSize = 5; - uptr array[kSize]; + uint64_t array[kSize]; for (int i = 0; i < kSize; i++) { array[i] = StackPCBegin + i; } StackTrace St(array, kSize); - u32 Id = StackDepotPut(St); + uint32_t Id = StackDepotPut(St); InsertOrMerge(Id, FakeMIB, FakeMap); return Id; } -template <class T = u64> T Read(char *&Buffer) { +template <class T = uint64_t> T Read(char *&Buffer) { static_assert(std::is_pod<T>::value, "Must be a POD type."); assert(reinterpret_cast<size_t>(Buffer) % sizeof(T) == 0 && "Unaligned read!"); @@ -86,12 +85,12 @@ TEST(MemProf, Basic) { FakeMIB.alloc_count = 0x1; FakeMIB.total_access_count = 0x2; - u64 FakeIds[2]; + uint64_t FakeIds[2]; FakeIds[0] = PopulateFakeMap(FakeMIB, /*StackPCBegin=*/2, FakeMap); FakeIds[1] = PopulateFakeMap(FakeMIB, /*StackPCBegin=*/3, FakeMap); char *Ptr = nullptr; - u64 NumBytes = SerializeToRawProfile(FakeMap, Layout, Ptr); + uint64_t NumBytes = SerializeToRawProfile(FakeMap, Layout, Ptr); const char *Buffer = Ptr; ASSERT_GT(NumBytes, 0ULL); @@ -100,10 +99,10 @@ TEST(MemProf, Basic) { // Check the header. EXPECT_THAT(Read(Ptr), MEMPROF_RAW_MAGIC_64); EXPECT_THAT(Read(Ptr), MEMPROF_RAW_VERSION); - const u64 TotalSize = Read(Ptr); - const u64 SegmentOffset = Read(Ptr); - const u64 MIBOffset = Read(Ptr); - const u64 StackOffset = Read(Ptr); + const uint64_t TotalSize = Read(Ptr); + const uint64_t SegmentOffset = Read(Ptr); + const uint64_t MIBOffset = Read(Ptr); + const uint64_t StackOffset = Read(Ptr); // ============= Check sizes and padding. EXPECT_EQ(TotalSize, NumBytes); @@ -117,7 +116,7 @@ TEST(MemProf, Basic) { EXPECT_EQ(MIBOffset - SegmentOffset, 64ULL); EXPECT_EQ(MIBOffset, 112ULL); - // We expect 2 mib entry, 8b for the count and sizeof(u64) + + // We expect 2 mib entry, 8b for the count and sizeof(uint64_t) + // sizeof(MemInfoBlock) contains stack id + MeminfoBlock. EXPECT_EQ(StackOffset - MIBOffset, 8 + 2 * (8 + sizeof(MemInfoBlock))); @@ -129,19 +128,22 @@ TEST(MemProf, Basic) { EXPECT_GE(TotalSize - StackOffset, 8ULL + 2 * (8 + 8 + 5 * 8)); // ============= Check contents. + // The Uuid field is not yet populated on Linux-Elf by the sanitizer procmaps + // library, so we expect it to be filled with 0 for now. unsigned char ExpectedSegmentBytes[64] = { - 0x01, 0, 0, 0, 0, 0, 0, 0, // Number of entries - 0x10, 0, 0, 0, 0, 0, 0, 0, // Start - 0x20, 0, 0, 0, 0, 0, 0, 0, // End - 0x10, 0, 0, 0, 0, 0, 0, 0, // Offset - 0x0C, 0x0, 0xF, 0xF, 0xE, 0xE, // Uuid + 0x01, 0, 0, 0, 0, 0, 0, 0, // Number of entries + 0x10, 0, 0, 0, 0, 0, 0, 0, // Start + 0x20, 0, 0, 0, 0, 0, 0, 0, // End + 0x10, 0, 0, 0, 0, 0, 0, 0, // Offset + 0x0, // Uuid }; EXPECT_EQ(memcmp(Buffer + SegmentOffset, ExpectedSegmentBytes, 64), 0); // Check that the number of entries is 2. - EXPECT_EQ(*reinterpret_cast<const u64 *>(Buffer + MIBOffset), 2ULL); + EXPECT_EQ(*reinterpret_cast<const uint64_t *>(Buffer + MIBOffset), 2ULL); // Check that stack id is set. - EXPECT_EQ(*reinterpret_cast<const u64 *>(Buffer + MIBOffset + 8), FakeIds[0]); + EXPECT_EQ(*reinterpret_cast<const uint64_t *>(Buffer + MIBOffset + 8), + FakeIds[0]); // Only check a few fields of the first MemInfoBlock. unsigned char ExpectedMIBBytes[sizeof(MemInfoBlock)] = { @@ -159,9 +161,9 @@ TEST(MemProf, Basic) { 0); // Check that the number of entries is 2. - EXPECT_EQ(*reinterpret_cast<const u64 *>(Buffer + StackOffset), 2ULL); + EXPECT_EQ(*reinterpret_cast<const uint64_t *>(Buffer + StackOffset), 2ULL); // Check that the 1st stack id is set. - EXPECT_EQ(*reinterpret_cast<const u64 *>(Buffer + StackOffset + 8), + EXPECT_EQ(*reinterpret_cast<const uint64_t *>(Buffer + StackOffset + 8), FakeIds[0]); // Contents are num pcs, value of each pc - 1. unsigned char ExpectedStackBytes[2][6 * 8] = { @@ -184,7 +186,7 @@ TEST(MemProf, Basic) { // Check that the 2nd stack id is set. EXPECT_EQ( - *reinterpret_cast<const u64 *>(Buffer + StackOffset + 8 + 6 * 8 + 8), + *reinterpret_cast<const uint64_t *>(Buffer + StackOffset + 8 + 6 * 8 + 8), FakeIds[1]); EXPECT_EQ(memcmp(Buffer + StackOffset + 16 + 6 * 8 + 8, ExpectedStackBytes[1], diff --git a/compiler-rt/lib/msan/msan_interceptors.cpp b/compiler-rt/lib/msan/msan_interceptors.cpp index d1b858930a7f..5317af6982a0 100644 --- a/compiler-rt/lib/msan/msan_interceptors.cpp +++ b/compiler-rt/lib/msan/msan_interceptors.cpp @@ -666,7 +666,7 @@ INTERCEPTOR(int, fstat, int fd, void *buf) { #define MSAN_MAYBE_INTERCEPT_FSTAT #endif -#if !SANITIZER_FREEBSD && !SANITIZER_NETBSD +#if SANITIZER_GLIBC INTERCEPTOR(int, __fxstat, int magic, int fd, void *buf) { ENSURE_MSAN_INITED(); int res = REAL(__fxstat)(magic, fd, buf); @@ -679,7 +679,7 @@ INTERCEPTOR(int, __fxstat, int magic, int fd, void *buf) { #define MSAN_MAYBE_INTERCEPT___FXSTAT #endif -#if !SANITIZER_FREEBSD && !SANITIZER_NETBSD +#if SANITIZER_GLIBC INTERCEPTOR(int, __fxstat64, int magic, int fd, void *buf) { ENSURE_MSAN_INITED(); int res = REAL(__fxstat64)(magic, fd, buf); @@ -704,7 +704,7 @@ INTERCEPTOR(int, fstatat, int fd, char *pathname, void *buf, int flags) { # define MSAN_MAYBE_INTERCEPT_FSTATAT #endif -#if !SANITIZER_FREEBSD && !SANITIZER_NETBSD +#if SANITIZER_GLIBC INTERCEPTOR(int, __fxstatat, int magic, int fd, char *pathname, void *buf, int flags) { ENSURE_MSAN_INITED(); @@ -717,7 +717,7 @@ INTERCEPTOR(int, __fxstatat, int magic, int fd, char *pathname, void *buf, # define MSAN_MAYBE_INTERCEPT___FXSTATAT #endif -#if !SANITIZER_FREEBSD && !SANITIZER_NETBSD +#if SANITIZER_GLIBC INTERCEPTOR(int, __fxstatat64, int magic, int fd, char *pathname, void *buf, int flags) { ENSURE_MSAN_INITED(); diff --git a/compiler-rt/lib/profile/InstrProfiling.c b/compiler-rt/lib/profile/InstrProfiling.c index 557c0da2dbae..ead5e9330734 100644 --- a/compiler-rt/lib/profile/InstrProfiling.c +++ b/compiler-rt/lib/profile/InstrProfiling.c @@ -45,7 +45,9 @@ COMPILER_RT_VISIBILITY void __llvm_profile_reset_counters(void) { char *I = __llvm_profile_begin_counters(); char *E = __llvm_profile_end_counters(); - memset(I, 0, E - I); + char ResetValue = + (__llvm_profile_get_version() & VARIANT_MASK_BYTE_COVERAGE) ? 0xFF : 0; + memset(I, ResetValue, E - I); const __llvm_profile_data *DataBegin = __llvm_profile_begin_data(); const __llvm_profile_data *DataEnd = __llvm_profile_end_data(); diff --git a/compiler-rt/lib/profile/InstrProfilingBuffer.c b/compiler-rt/lib/profile/InstrProfilingBuffer.c index f3d15511452e..57f8b68919b1 100644 --- a/compiler-rt/lib/profile/InstrProfilingBuffer.c +++ b/compiler-rt/lib/profile/InstrProfilingBuffer.c @@ -65,6 +65,8 @@ uint64_t __llvm_profile_get_data_size(const __llvm_profile_data *Begin, } COMPILER_RT_VISIBILITY size_t __llvm_profile_counter_entry_size(void) { + if (__llvm_profile_get_version() & VARIANT_MASK_BYTE_COVERAGE) + return sizeof(uint8_t); return sizeof(uint64_t); } diff --git a/compiler-rt/lib/profile/InstrProfilingMerge.c b/compiler-rt/lib/profile/InstrProfilingMerge.c index 3a520f1488a7..adf866e52cf7 100644 --- a/compiler-rt/lib/profile/InstrProfilingMerge.c +++ b/compiler-rt/lib/profile/InstrProfilingMerge.c @@ -155,8 +155,14 @@ int __llvm_profile_merge_from_buffer(const char *ProfileData, if (SrcCounters < SrcCountersStart || SrcCounters >= SrcNameStart || (SrcCounters + __llvm_profile_counter_entry_size() * NC) > SrcNameStart) return 1; - for (unsigned I = 0; I < NC; I++) - ((uint64_t *)DstCounters)[I] += ((uint64_t *)SrcCounters)[I]; + for (unsigned I = 0; I < NC; I++) { + if (__llvm_profile_get_version() & VARIANT_MASK_BYTE_COVERAGE) { + // A value of zero signifies the function is covered. + DstCounters[I] &= SrcCounters[I]; + } else { + ((uint64_t *)DstCounters)[I] += ((uint64_t *)SrcCounters)[I]; + } + } /* Now merge value profile data. */ if (!VPMergeHook) diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_atomic_clang.h b/compiler-rt/lib/sanitizer_common/sanitizer_atomic_clang.h index fc13ca52dda7..c2b22cf572a6 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_atomic_clang.h +++ b/compiler-rt/lib/sanitizer_common/sanitizer_atomic_clang.h @@ -74,13 +74,12 @@ template <typename T> inline bool atomic_compare_exchange_strong(volatile T *a, typename T::Type *cmp, typename T::Type xchg, memory_order mo) { - typedef typename T::Type Type; - Type cmpv = *cmp; - Type prev; - prev = __sync_val_compare_and_swap(&a->val_dont_use, cmpv, xchg); - if (prev == cmpv) return true; - *cmp = prev; - return false; + // Transitioned from __sync_val_compare_and_swap to support targets like + // SPARC V8 that cannot inline atomic cmpxchg. __atomic_compare_exchange + // can then be resolved from libatomic. __ATOMIC_SEQ_CST is used to best + // match the __sync builtin memory order. + return __atomic_compare_exchange(&a->val_dont_use, cmp, &xchg, false, + __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST); } template<typename T> diff --git a/compiler-rt/lib/tsan/rtl/tsan_interceptors_posix.cpp b/compiler-rt/lib/tsan/rtl/tsan_interceptors_posix.cpp index 056bd15e0907..733ae5ee87ef 100644 --- a/compiler-rt/lib/tsan/rtl/tsan_interceptors_posix.cpp +++ b/compiler-rt/lib/tsan/rtl/tsan_interceptors_posix.cpp @@ -1536,7 +1536,7 @@ TSAN_INTERCEPTOR(int, pthread_once, void *o, void (*f)()) { return 0; } -#if SANITIZER_LINUX && !SANITIZER_ANDROID +#if SANITIZER_GLIBC TSAN_INTERCEPTOR(int, __fxstat, int version, int fd, void *buf) { SCOPED_TSAN_INTERCEPTOR(__fxstat, version, fd, buf); if (fd > 0) @@ -1562,7 +1562,7 @@ TSAN_INTERCEPTOR(int, fstat, int fd, void *buf) { #endif } -#if SANITIZER_LINUX && !SANITIZER_ANDROID +#if SANITIZER_GLIBC TSAN_INTERCEPTOR(int, __fxstat64, int version, int fd, void *buf) { SCOPED_TSAN_INTERCEPTOR(__fxstat64, version, fd, buf); if (fd > 0) @@ -1574,7 +1574,7 @@ TSAN_INTERCEPTOR(int, __fxstat64, int version, int fd, void *buf) { #define TSAN_MAYBE_INTERCEPT___FXSTAT64 #endif -#if SANITIZER_LINUX && !SANITIZER_ANDROID +#if SANITIZER_GLIBC TSAN_INTERCEPTOR(int, fstat64, int fd, void *buf) { SCOPED_TSAN_INTERCEPTOR(__fxstat64, 0, fd, buf); if (fd > 0) diff --git a/libcxx/include/__algorithm/in_in_out_result.h b/libcxx/include/__algorithm/in_in_out_result.h new file mode 100644 index 000000000000..e365eb58eb62 --- /dev/null +++ b/libcxx/include/__algorithm/in_in_out_result.h @@ -0,0 +1,48 @@ +// -*- C++ -*- +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___ALGORITHM_IN_IN_OUT_RESULT_H +#define _LIBCPP___ALGORITHM_IN_IN_OUT_RESULT_H + +#include <__concepts/convertible_to.h> +#include <__config> +#include <__utility/move.h> + +_LIBCPP_BEGIN_NAMESPACE_STD + +#ifndef _LIBCPP_HAS_NO_CONCEPTS + +namespace ranges { +template <class _I1, class _I2, class _O1> +struct in_in_out_result { + [[no_unique_address]] _I1 in1; + [[no_unique_address]] _I2 in2; + [[no_unique_address]] _O1 out; + + template <class _II1, class _II2, class _OO1> + requires convertible_to<const _I1&, _II1> && convertible_to<const _I2&, _II2> && convertible_to<const _O1&, _OO1> + _LIBCPP_HIDE_FROM_ABI constexpr + operator in_in_out_result<_II1, _II2, _OO1>() const& { + return {in1, in2, out}; + } + + template <class _II1, class _II2, class _OO1> + requires convertible_to<_I1, _II1> && convertible_to<_I2, _II2> && convertible_to<_O1, _OO1> + _LIBCPP_HIDE_FROM_ABI constexpr + operator in_in_out_result<_II1, _II2, _OO1>() && { + return {_VSTD::move(in1), _VSTD::move(in2), _VSTD::move(out)}; + } +}; +} // namespace ranges + +#endif // _LIBCPP_HAS_NO_CONCEPTS + +_LIBCPP_END_NAMESPACE_STD + +#endif // _LIBCPP___ALGORITHM_IN_IN_RESULT_H diff --git a/libcxx/include/__algorithm/in_in_result.h b/libcxx/include/__algorithm/in_in_result.h index fbe53ae4f57e..ed14ecedbbdf 100644 --- a/libcxx/include/__algorithm/in_in_result.h +++ b/libcxx/include/__algorithm/in_in_result.h @@ -16,7 +16,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD -#ifndef _LIBCPP_HAS_NO_RANGES +#ifndef _LIBCPP_HAS_NO_CONCEPTS namespace ranges { template <class _I1, class _I2> @@ -38,7 +38,7 @@ struct in_in_result { }; } // namespace ranges -#endif // _LIBCPP_HAS_NO_RANGES +#endif // _LIBCPP_HAS_NO_CONCEPTS _LIBCPP_END_NAMESPACE_STD diff --git a/libcxx/include/__algorithm/in_out_result.h b/libcxx/include/__algorithm/in_out_result.h index 9d971157200f..8a58d6ada10c 100644 --- a/libcxx/include/__algorithm/in_out_result.h +++ b/libcxx/include/__algorithm/in_out_result.h @@ -20,7 +20,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD -#if !defined(_LIBCPP_HAS_NO_RANGES) +#if !defined(_LIBCPP_HAS_NO_CONCEPTS) namespace ranges { template<class _InputIterator, class _OutputIterator> @@ -45,7 +45,7 @@ struct in_out_result { }; } // namespace ranges -#endif // !defined(_LIBCPP_HAS_NO_RANGES) +#endif // !defined(_LIBCPP_HAS_NO_CONCEPTS) _LIBCPP_END_NAMESPACE_STD diff --git a/libcxx/include/__compare/compare_partial_order_fallback.h b/libcxx/include/__compare/compare_partial_order_fallback.h new file mode 100644 index 000000000000..895523b38fb3 --- /dev/null +++ b/libcxx/include/__compare/compare_partial_order_fallback.h @@ -0,0 +1,73 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___COMPARE_COMPARE_PARTIAL_ORDER_FALLBACK +#define _LIBCPP___COMPARE_COMPARE_PARTIAL_ORDER_FALLBACK + +#include <__compare/ordering.h> +#include <__compare/partial_order.h> +#include <__config> +#include <__utility/forward.h> +#include <__utility/priority_tag.h> +#include <type_traits> + +#ifndef _LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER +#pragma GCC system_header +#endif + +_LIBCPP_BEGIN_NAMESPACE_STD + +#if _LIBCPP_STD_VER > 17 && !defined(_LIBCPP_HAS_NO_CONCEPTS) + +// [cmp.alg] +namespace __compare_partial_order_fallback { + struct __fn { + template<class _Tp, class _Up> + requires is_same_v<decay_t<_Tp>, decay_t<_Up>> + _LIBCPP_HIDE_FROM_ABI static constexpr auto + __go(_Tp&& __t, _Up&& __u, __priority_tag<1>) + noexcept(noexcept(_VSTD::partial_order(_VSTD::forward<_Tp>(__t), _VSTD::forward<_Up>(__u)))) + -> decltype( _VSTD::partial_order(_VSTD::forward<_Tp>(__t), _VSTD::forward<_Up>(__u))) + { return _VSTD::partial_order(_VSTD::forward<_Tp>(__t), _VSTD::forward<_Up>(__u)); } + + template<class _Tp, class _Up> + requires is_same_v<decay_t<_Tp>, decay_t<_Up>> + _LIBCPP_HIDE_FROM_ABI static constexpr auto + __go(_Tp&& __t, _Up&& __u, __priority_tag<0>) + noexcept(noexcept(_VSTD::forward<_Tp>(__t) == _VSTD::forward<_Up>(__u) ? partial_ordering::equivalent : + _VSTD::forward<_Tp>(__t) < _VSTD::forward<_Up>(__u) ? partial_ordering::less : + _VSTD::forward<_Up>(__u) < _VSTD::forward<_Tp>(__t) ? partial_ordering::greater : + partial_ordering::unordered)) + -> decltype( _VSTD::forward<_Tp>(__t) == _VSTD::forward<_Up>(__u) ? partial_ordering::equivalent : + _VSTD::forward<_Tp>(__t) < _VSTD::forward<_Up>(__u) ? partial_ordering::less : + _VSTD::forward<_Up>(__u) < _VSTD::forward<_Tp>(__t) ? partial_ordering::greater : + partial_ordering::unordered) + { + return _VSTD::forward<_Tp>(__t) == _VSTD::forward<_Up>(__u) ? partial_ordering::equivalent : + _VSTD::forward<_Tp>(__t) < _VSTD::forward<_Up>(__u) ? partial_ordering::less : + _VSTD::forward<_Up>(__u) < _VSTD::forward<_Tp>(__t) ? partial_ordering::greater : + partial_ordering::unordered; + } + + template<class _Tp, class _Up> + _LIBCPP_HIDE_FROM_ABI constexpr auto operator()(_Tp&& __t, _Up&& __u) const + noexcept(noexcept(__go(_VSTD::forward<_Tp>(__t), _VSTD::forward<_Up>(__u), __priority_tag<1>()))) + -> decltype( __go(_VSTD::forward<_Tp>(__t), _VSTD::forward<_Up>(__u), __priority_tag<1>())) + { return __go(_VSTD::forward<_Tp>(__t), _VSTD::forward<_Up>(__u), __priority_tag<1>()); } + }; +} // namespace __compare_partial_order_fallback + +inline namespace __cpo { + inline constexpr auto compare_partial_order_fallback = __compare_partial_order_fallback::__fn{}; +} // namespace __cpo + +#endif // _LIBCPP_STD_VER > 17 && !defined(_LIBCPP_HAS_NO_CONCEPTS) + +_LIBCPP_END_NAMESPACE_STD + +#endif // _LIBCPP___COMPARE_COMPARE_PARTIAL_ORDER_FALLBACK diff --git a/libcxx/include/__compare/compare_strong_order_fallback.h b/libcxx/include/__compare/compare_strong_order_fallback.h new file mode 100644 index 000000000000..5fee7b478068 --- /dev/null +++ b/libcxx/include/__compare/compare_strong_order_fallback.h @@ -0,0 +1,70 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___COMPARE_COMPARE_STRONG_ORDER_FALLBACK +#define _LIBCPP___COMPARE_COMPARE_STRONG_ORDER_FALLBACK + +#include <__compare/ordering.h> +#include <__compare/strong_order.h> +#include <__config> +#include <__utility/forward.h> +#include <__utility/priority_tag.h> +#include <type_traits> + +#ifndef _LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER +#pragma GCC system_header +#endif + +_LIBCPP_BEGIN_NAMESPACE_STD + +#if _LIBCPP_STD_VER > 17 && !defined(_LIBCPP_HAS_NO_CONCEPTS) + +// [cmp.alg] +namespace __compare_strong_order_fallback { + struct __fn { + template<class _Tp, class _Up> + requires is_same_v<decay_t<_Tp>, decay_t<_Up>> + _LIBCPP_HIDE_FROM_ABI static constexpr auto + __go(_Tp&& __t, _Up&& __u, __priority_tag<1>) + noexcept(noexcept(_VSTD::strong_order(_VSTD::forward<_Tp>(__t), _VSTD::forward<_Up>(__u)))) + -> decltype( _VSTD::strong_order(_VSTD::forward<_Tp>(__t), _VSTD::forward<_Up>(__u))) + { return _VSTD::strong_order(_VSTD::forward<_Tp>(__t), _VSTD::forward<_Up>(__u)); } + + template<class _Tp, class _Up> + requires is_same_v<decay_t<_Tp>, decay_t<_Up>> + _LIBCPP_HIDE_FROM_ABI static constexpr auto + __go(_Tp&& __t, _Up&& __u, __priority_tag<0>) + noexcept(noexcept(_VSTD::forward<_Tp>(__t) == _VSTD::forward<_Up>(__u) ? strong_ordering::equal : + _VSTD::forward<_Tp>(__t) < _VSTD::forward<_Up>(__u) ? strong_ordering::less : + strong_ordering::greater)) + -> decltype( _VSTD::forward<_Tp>(__t) == _VSTD::forward<_Up>(__u) ? strong_ordering::equal : + _VSTD::forward<_Tp>(__t) < _VSTD::forward<_Up>(__u) ? strong_ordering::less : + strong_ordering::greater) + { + return _VSTD::forward<_Tp>(__t) == _VSTD::forward<_Up>(__u) ? strong_ordering::equal : + _VSTD::forward<_Tp>(__t) < _VSTD::forward<_Up>(__u) ? strong_ordering::less : + strong_ordering::greater; + } + + template<class _Tp, class _Up> + _LIBCPP_HIDE_FROM_ABI constexpr auto operator()(_Tp&& __t, _Up&& __u) const + noexcept(noexcept(__go(_VSTD::forward<_Tp>(__t), _VSTD::forward<_Up>(__u), __priority_tag<1>()))) + -> decltype( __go(_VSTD::forward<_Tp>(__t), _VSTD::forward<_Up>(__u), __priority_tag<1>())) + { return __go(_VSTD::forward<_Tp>(__t), _VSTD::forward<_Up>(__u), __priority_tag<1>()); } + }; +} // namespace __compare_strong_order_fallback + +inline namespace __cpo { + inline constexpr auto compare_strong_order_fallback = __compare_strong_order_fallback::__fn{}; +} // namespace __cpo + +#endif // _LIBCPP_STD_VER > 17 && !defined(_LIBCPP_HAS_NO_CONCEPTS) + +_LIBCPP_END_NAMESPACE_STD + +#endif // _LIBCPP___COMPARE_COMPARE_STRONG_ORDER_FALLBACK diff --git a/libcxx/include/__compare/compare_weak_order_fallback.h b/libcxx/include/__compare/compare_weak_order_fallback.h new file mode 100644 index 000000000000..0abd4f2dfbee --- /dev/null +++ b/libcxx/include/__compare/compare_weak_order_fallback.h @@ -0,0 +1,70 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___COMPARE_COMPARE_WEAK_ORDER_FALLBACK +#define _LIBCPP___COMPARE_COMPARE_WEAK_ORDER_FALLBACK + +#include <__compare/ordering.h> +#include <__compare/weak_order.h> +#include <__config> +#include <__utility/forward.h> +#include <__utility/priority_tag.h> +#include <type_traits> + +#ifndef _LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER +#pragma GCC system_header +#endif + +_LIBCPP_BEGIN_NAMESPACE_STD + +#if _LIBCPP_STD_VER > 17 && !defined(_LIBCPP_HAS_NO_CONCEPTS) + +// [cmp.alg] +namespace __compare_weak_order_fallback { + struct __fn { + template<class _Tp, class _Up> + requires is_same_v<decay_t<_Tp>, decay_t<_Up>> + _LIBCPP_HIDE_FROM_ABI static constexpr auto + __go(_Tp&& __t, _Up&& __u, __priority_tag<1>) + noexcept(noexcept(_VSTD::weak_order(_VSTD::forward<_Tp>(__t), _VSTD::forward<_Up>(__u)))) + -> decltype( _VSTD::weak_order(_VSTD::forward<_Tp>(__t), _VSTD::forward<_Up>(__u))) + { return _VSTD::weak_order(_VSTD::forward<_Tp>(__t), _VSTD::forward<_Up>(__u)); } + + template<class _Tp, class _Up> + requires is_same_v<decay_t<_Tp>, decay_t<_Up>> + _LIBCPP_HIDE_FROM_ABI static constexpr auto + __go(_Tp&& __t, _Up&& __u, __priority_tag<0>) + noexcept(noexcept(_VSTD::forward<_Tp>(__t) == _VSTD::forward<_Up>(__u) ? weak_ordering::equivalent : + _VSTD::forward<_Tp>(__t) < _VSTD::forward<_Up>(__u) ? weak_ordering::less : + weak_ordering::greater)) + -> decltype( _VSTD::forward<_Tp>(__t) == _VSTD::forward<_Up>(__u) ? weak_ordering::equivalent : + _VSTD::forward<_Tp>(__t) < _VSTD::forward<_Up>(__u) ? weak_ordering::less : + weak_ordering::greater) + { + return _VSTD::forward<_Tp>(__t) == _VSTD::forward<_Up>(__u) ? weak_ordering::equivalent : + _VSTD::forward<_Tp>(__t) < _VSTD::forward<_Up>(__u) ? weak_ordering::less : + weak_ordering::greater; + } + + template<class _Tp, class _Up> + _LIBCPP_HIDE_FROM_ABI constexpr auto operator()(_Tp&& __t, _Up&& __u) const + noexcept(noexcept(__go(_VSTD::forward<_Tp>(__t), _VSTD::forward<_Up>(__u), __priority_tag<1>()))) + -> decltype( __go(_VSTD::forward<_Tp>(__t), _VSTD::forward<_Up>(__u), __priority_tag<1>())) + { return __go(_VSTD::forward<_Tp>(__t), _VSTD::forward<_Up>(__u), __priority_tag<1>()); } + }; +} // namespace __compare_weak_order_fallback + +inline namespace __cpo { + inline constexpr auto compare_weak_order_fallback = __compare_weak_order_fallback::__fn{}; +} // namespace __cpo + +#endif // _LIBCPP_STD_VER > 17 && !defined(_LIBCPP_HAS_NO_CONCEPTS) + +_LIBCPP_END_NAMESPACE_STD + +#endif // _LIBCPP___COMPARE_COMPARE_WEAK_ORDER_FALLBACK diff --git a/libcxx/include/__concepts/swappable.h b/libcxx/include/__concepts/swappable.h index 423b3a89fa40..d45249738535 100644 --- a/libcxx/include/__concepts/swappable.h +++ b/libcxx/include/__concepts/swappable.h @@ -28,13 +28,13 @@ _LIBCPP_BEGIN_NAMESPACE_STD #if _LIBCPP_STD_VER > 17 && !defined(_LIBCPP_HAS_NO_CONCEPTS) // [concept.swappable] -namespace ranges::__swap { - // Deleted to inhibit ADL + +namespace ranges { +namespace __swap { + template<class _Tp> void swap(_Tp&, _Tp&) = delete; - - // [1] template<class _Tp, class _Up> concept __unqualified_swappable_with = (__class_or_enum<remove_cvref_t<_Tp>> || __class_or_enum<remove_cvref_t<_Up>>) && @@ -89,11 +89,12 @@ namespace ranges::__swap { __y = _VSTD::exchange(__x, _VSTD::move(__y)); } }; -} // namespace ranges::__swap +} // namespace __swap -namespace ranges::inline __cpo { +inline namespace __cpo { inline constexpr auto swap = __swap::__fn{}; -} // namespace ranges::__cpo +} // namespace __cpo +} // namespace ranges template<class _Tp> concept swappable = requires(_Tp& __a, _Tp& __b) { ranges::swap(__a, __b); }; diff --git a/libcxx/include/__config b/libcxx/include/__config index b99cdc38dc9f..3c3d4b57c76e 100644 --- a/libcxx/include/__config +++ b/libcxx/include/__config @@ -821,10 +821,10 @@ typedef __char32_t char32_t; // Inline namespaces are available in Clang/GCC/MSVC regardless of C++ dialect. #define _LIBCPP_BEGIN_NAMESPACE_STD namespace std { inline namespace _LIBCPP_ABI_NAMESPACE { #define _LIBCPP_END_NAMESPACE_STD } } -#define _VSTD std::_LIBCPP_ABI_NAMESPACE +#define _VSTD std _LIBCPP_BEGIN_NAMESPACE_STD _LIBCPP_END_NAMESPACE_STD -#if _LIBCPP_STD_VER >= 17 +#if _LIBCPP_STD_VER > 14 #define _LIBCPP_BEGIN_NAMESPACE_FILESYSTEM \ _LIBCPP_BEGIN_NAMESPACE_STD inline namespace __fs { namespace filesystem { #else @@ -875,14 +875,10 @@ typedef unsigned int char32_t; # define _LIBCPP_CONSTEVAL consteval #endif -#if !defined(__cpp_concepts) || __cpp_concepts < 201907L +#if _LIBCPP_STD_VER <= 17 || !defined(__cpp_concepts) || __cpp_concepts < 201907L #define _LIBCPP_HAS_NO_CONCEPTS #endif -#if _LIBCPP_STD_VER <= 17 || defined(_LIBCPP_HAS_NO_CONCEPTS) -#define _LIBCPP_HAS_NO_RANGES -#endif - #ifdef __GNUC__ # define _LIBCPP_NOALIAS __attribute__((__malloc__)) #else @@ -1258,8 +1254,9 @@ extern "C" _LIBCPP_FUNC_VIS void __sanitizer_annotate_contiguous_container( # if defined(__clang__) && __has_attribute(acquire_capability) // Work around the attribute handling in clang. When both __declspec and // __attribute__ are present, the processing goes awry preventing the definition -// of the types. -# if !defined(_LIBCPP_OBJECT_FORMAT_COFF) +// of the types. In MinGW mode, __declspec evaluates to __attribute__, and thus +// combining the two does work. +# if !defined(_MSC_VER) # define _LIBCPP_HAS_THREAD_SAFETY_ANNOTATIONS # endif # endif diff --git a/libcxx/include/__filesystem/directory_iterator.h b/libcxx/include/__filesystem/directory_iterator.h index 7ea66bbc7ff0..1cb1794efdae 100644 --- a/libcxx/include/__filesystem/directory_iterator.h +++ b/libcxx/include/__filesystem/directory_iterator.h @@ -133,7 +133,7 @@ _LIBCPP_AVAILABILITY_FILESYSTEM_POP _LIBCPP_END_NAMESPACE_FILESYSTEM -#if !defined(_LIBCPP_HAS_NO_RANGES) +#if !defined(_LIBCPP_HAS_NO_CONCEPTS) template <> _LIBCPP_AVAILABILITY_FILESYSTEM diff --git a/libcxx/include/__filesystem/recursive_directory_iterator.h b/libcxx/include/__filesystem/recursive_directory_iterator.h index db7e793e8530..c26d0a381159 100644 --- a/libcxx/include/__filesystem/recursive_directory_iterator.h +++ b/libcxx/include/__filesystem/recursive_directory_iterator.h @@ -164,7 +164,7 @@ _LIBCPP_AVAILABILITY_FILESYSTEM_POP _LIBCPP_END_NAMESPACE_FILESYSTEM -#if !defined(_LIBCPP_HAS_NO_RANGES) +#if !defined(_LIBCPP_HAS_NO_CONCEPTS) template <> _LIBCPP_AVAILABILITY_FILESYSTEM diff --git a/libcxx/include/__format/format_arg.h b/libcxx/include/__format/format_arg.h index da829d52fbfe..e76b0dd50d3c 100644 --- a/libcxx/include/__format/format_arg.h +++ b/libcxx/include/__format/format_arg.h @@ -140,13 +140,12 @@ private: // shall be well-formed when treated as an unevaluated operand. template <class _Ctx, class... _Args> - _LIBCPP_HIDE_FROM_ABI - _LIBCPP_AVAILABILITY_FORMAT friend __format_arg_store<_Ctx, _Args...> - _VSTD::make_format_args(const _Args&...); + _LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_FORMAT friend __format_arg_store<_Ctx, _Args...> + make_format_args(const _Args&...); template <class _Visitor, class _Ctx> _LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_FORMAT friend decltype(auto) - _VSTD::visit_format_arg(_Visitor&& __vis, basic_format_arg<_Ctx> __arg); + visit_format_arg(_Visitor&& __vis, basic_format_arg<_Ctx> __arg); union { bool __boolean; diff --git a/libcxx/include/__format/format_context.h b/libcxx/include/__format/format_context.h index f8ec7c8eb001..570bf7e90d9f 100644 --- a/libcxx/include/__format/format_context.h +++ b/libcxx/include/__format/format_context.h @@ -132,9 +132,8 @@ private: template <class __OutIt, class __CharT> friend _LIBCPP_HIDE_FROM_ABI basic_format_context<__OutIt, __CharT> - _VSTD::__format_context_create( - __OutIt, basic_format_args<basic_format_context<__OutIt, __CharT>>, - optional<_VSTD::locale>&&); + __format_context_create(__OutIt, basic_format_args<basic_format_context<__OutIt, __CharT>>, + optional<_VSTD::locale>&&); // Note: the Standard doesn't specify the required constructors. _LIBCPP_HIDE_FROM_ABI @@ -146,8 +145,7 @@ private: #else template <class __OutIt, class __CharT> friend _LIBCPP_HIDE_FROM_ABI basic_format_context<__OutIt, __CharT> - _VSTD::__format_context_create( - __OutIt, basic_format_args<basic_format_context<__OutIt, __CharT>>); + __format_context_create(__OutIt, basic_format_args<basic_format_context<__OutIt, __CharT>>); _LIBCPP_HIDE_FROM_ABI explicit basic_format_context(_OutIt __out_it, diff --git a/libcxx/include/__functional/function.h b/libcxx/include/__functional/function.h index b6d383ce8459..6bb7eb7e8a24 100644 --- a/libcxx/include/__functional/function.h +++ b/libcxx/include/__functional/function.h @@ -1664,7 +1664,7 @@ __func<_Fp, _Alloc, _Rp(_A0, _A1, _A2)>::target_type() const #endif // _LIBCPP_NO_RTTI -} // __function +} // namespace __function template<class _Rp> class _LIBCPP_TEMPLATE_VIS function<_Rp()> diff --git a/libcxx/include/__functional/ranges_operations.h b/libcxx/include/__functional/ranges_operations.h index 777c53525102..8b06240e46a7 100644 --- a/libcxx/include/__functional/ranges_operations.h +++ b/libcxx/include/__functional/ranges_operations.h @@ -20,7 +20,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD -#if !defined(_LIBCPP_HAS_NO_RANGES) +#if !defined(_LIBCPP_HAS_NO_CONCEPTS) namespace ranges { struct equal_to { @@ -90,7 +90,7 @@ struct greater_equal { }; } // namespace ranges -#endif // !defined(_LIBCPP_HAS_NO_RANGES) +#endif // !defined(_LIBCPP_HAS_NO_CONCEPTS) _LIBCPP_END_NAMESPACE_STD diff --git a/libcxx/include/__iterator/advance.h b/libcxx/include/__iterator/advance.h index 03418979ddbd..5b0e97d76be9 100644 --- a/libcxx/include/__iterator/advance.h +++ b/libcxx/include/__iterator/advance.h @@ -64,7 +64,7 @@ void advance(_InputIter& __i, _Distance __orig_n) { _VSTD::__advance(__i, __n, typename iterator_traits<_InputIter>::iterator_category()); } -#if !defined(_LIBCPP_HAS_NO_RANGES) +#if !defined(_LIBCPP_HAS_NO_CONCEPTS) // [range.iter.op.advance] @@ -192,7 +192,7 @@ inline namespace __cpo { } // namespace __cpo } // namespace ranges -#endif // !defined(_LIBCPP_HAS_NO_RANGES) +#endif // !defined(_LIBCPP_HAS_NO_CONCEPTS) _LIBCPP_END_NAMESPACE_STD diff --git a/libcxx/include/__iterator/common_iterator.h b/libcxx/include/__iterator/common_iterator.h index 605071d70928..68309ee08b30 100644 --- a/libcxx/include/__iterator/common_iterator.h +++ b/libcxx/include/__iterator/common_iterator.h @@ -27,7 +27,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD -#if !defined(_LIBCPP_HAS_NO_RANGES) +#if !defined(_LIBCPP_HAS_NO_CONCEPTS) template<class _Iter> concept __can_use_postfix_proxy = @@ -276,7 +276,7 @@ struct iterator_traits<common_iterator<_Iter, _Sent>> { using reference = iter_reference_t<_Iter>; }; -#endif // !defined(_LIBCPP_HAS_NO_RANGES) +#endif // !defined(_LIBCPP_HAS_NO_CONCEPTS) _LIBCPP_END_NAMESPACE_STD diff --git a/libcxx/include/__iterator/concepts.h b/libcxx/include/__iterator/concepts.h index d7a666743afb..f6d092c75d48 100644 --- a/libcxx/include/__iterator/concepts.h +++ b/libcxx/include/__iterator/concepts.h @@ -26,7 +26,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD -#if !defined(_LIBCPP_HAS_NO_RANGES) +#if !defined(_LIBCPP_HAS_NO_CONCEPTS) // [iterator.concept.readable] template<class _In> @@ -257,7 +257,7 @@ concept indirectly_movable_storable = // Note: indirectly_swappable is located in iter_swap.h to prevent a dependency cycle // (both iter_swap and indirectly_swappable require indirectly_readable). -#endif // !defined(_LIBCPP_HAS_NO_RANGES) +#endif // !defined(_LIBCPP_HAS_NO_CONCEPTS) _LIBCPP_END_NAMESPACE_STD diff --git a/libcxx/include/__iterator/counted_iterator.h b/libcxx/include/__iterator/counted_iterator.h index 82d7adcfb02e..aaab3ac77777 100644 --- a/libcxx/include/__iterator/counted_iterator.h +++ b/libcxx/include/__iterator/counted_iterator.h @@ -30,7 +30,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD -#if !defined(_LIBCPP_HAS_NO_RANGES) +#if !defined(_LIBCPP_HAS_NO_CONCEPTS) template<class> struct __counted_iterator_concept {}; @@ -296,7 +296,7 @@ struct iterator_traits<counted_iterator<_Iter>> : iterator_traits<_Iter> { add_pointer_t<iter_reference_t<_Iter>>, void>; }; -#endif // !defined(_LIBCPP_HAS_NO_RANGES) +#endif // !defined(_LIBCPP_HAS_NO_CONCEPTS) _LIBCPP_END_NAMESPACE_STD diff --git a/libcxx/include/__iterator/default_sentinel.h b/libcxx/include/__iterator/default_sentinel.h index 7172a748febc..e12a5909ccf7 100644 --- a/libcxx/include/__iterator/default_sentinel.h +++ b/libcxx/include/__iterator/default_sentinel.h @@ -18,12 +18,12 @@ _LIBCPP_BEGIN_NAMESPACE_STD -#if !defined(_LIBCPP_HAS_NO_RANGES) +#if !defined(_LIBCPP_HAS_NO_CONCEPTS) struct default_sentinel_t { }; inline constexpr default_sentinel_t default_sentinel{}; -#endif // !defined(_LIBCPP_HAS_NO_RANGES) +#endif // !defined(_LIBCPP_HAS_NO_CONCEPTS) _LIBCPP_END_NAMESPACE_STD diff --git a/libcxx/include/__iterator/distance.h b/libcxx/include/__iterator/distance.h index 70e8e52398f9..faab03492389 100644 --- a/libcxx/include/__iterator/distance.h +++ b/libcxx/include/__iterator/distance.h @@ -11,7 +11,13 @@ #define _LIBCPP___ITERATOR_DISTANCE_H #include <__config> +#include <__iterator/concepts.h> +#include <__iterator/incrementable_traits.h> #include <__iterator/iterator_traits.h> +#include <__ranges/access.h> +#include <__ranges/concepts.h> +#include <__ranges/size.h> +#include <type_traits> #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) #pragma GCC system_header @@ -46,6 +52,56 @@ distance(_InputIter __first, _InputIter __last) return _VSTD::__distance(__first, __last, typename iterator_traits<_InputIter>::iterator_category()); } +#if _LIBCPP_STD_VER > 17 && !defined(_LIBCPP_HAS_NO_CONCEPTS) + +// [range.iter.op.distance] + +namespace ranges { +namespace __distance { + +struct __fn { + template<class _Ip, sentinel_for<_Ip> _Sp> + requires (!sized_sentinel_for<_Sp, _Ip>) + _LIBCPP_HIDE_FROM_ABI + constexpr iter_difference_t<_Ip> operator()(_Ip __first, _Sp __last) const { + iter_difference_t<_Ip> __n = 0; + while (__first != __last) { + ++__first; + ++__n; + } + return __n; + } + + template<class _Ip, sized_sentinel_for<decay_t<_Ip>> _Sp> + _LIBCPP_HIDE_FROM_ABI + constexpr iter_difference_t<_Ip> operator()(_Ip&& __first, _Sp __last) const { + if constexpr (sized_sentinel_for<_Sp, __uncvref_t<_Ip>>) { + return __last - __first; + } else { + return __last - decay_t<_Ip>(__first); + } + } + + template<range _Rp> + _LIBCPP_HIDE_FROM_ABI + constexpr range_difference_t<_Rp> operator()(_Rp&& __r) const { + if constexpr (sized_range<_Rp>) { + return static_cast<range_difference_t<_Rp>>(ranges::size(__r)); + } else { + return operator()(ranges::begin(__r), ranges::end(__r)); + } + } +}; + +} // namespace __distance + +inline namespace __cpo { + inline constexpr auto distance = __distance::__fn{}; +} // namespace __cpo +} // namespace ranges + +#endif // _LIBCPP_STD_VER > 17 && !defined(_LIBCPP_HAS_NO_CONCEPTS) + _LIBCPP_END_NAMESPACE_STD #endif // _LIBCPP___ITERATOR_DISTANCE_H diff --git a/libcxx/include/__iterator/incrementable_traits.h b/libcxx/include/__iterator/incrementable_traits.h index fd5015ddf1b1..3b68acc9bc51 100644 --- a/libcxx/include/__iterator/incrementable_traits.h +++ b/libcxx/include/__iterator/incrementable_traits.h @@ -20,7 +20,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD -#if !defined(_LIBCPP_HAS_NO_RANGES) +#if !defined(_LIBCPP_HAS_NO_CONCEPTS) // [incrementable.traits] template<class> struct incrementable_traits {}; @@ -65,7 +65,7 @@ using iter_difference_t = typename conditional_t<__is_primary_template<iterator_ incrementable_traits<remove_cvref_t<_Ip> >, iterator_traits<remove_cvref_t<_Ip> > >::difference_type; -#endif // !defined(_LIBCPP_HAS_NO_RANGES) +#endif // !defined(_LIBCPP_HAS_NO_CONCEPTS) _LIBCPP_END_NAMESPACE_STD diff --git a/libcxx/include/__iterator/indirectly_comparable.h b/libcxx/include/__iterator/indirectly_comparable.h index 3129b2dcf65e..3bafc56f926f 100644 --- a/libcxx/include/__iterator/indirectly_comparable.h +++ b/libcxx/include/__iterator/indirectly_comparable.h @@ -17,13 +17,13 @@ _LIBCPP_BEGIN_NAMESPACE_STD -#ifndef _LIBCPP_HAS_NO_RANGES +#ifndef _LIBCPP_HAS_NO_CONCEPTS template <class _I1, class _I2, class _Rp, class _P1 = identity, class _P2 = identity> concept indirectly_comparable = indirect_binary_predicate<_Rp, projected<_I1, _P1>, projected<_I2, _P2>>; -#endif // _LIBCPP_HAS_NO_RANGES +#endif // _LIBCPP_HAS_NO_CONCEPTS _LIBCPP_END_NAMESPACE_STD diff --git a/libcxx/include/__iterator/insert_iterator.h b/libcxx/include/__iterator/insert_iterator.h index 33117419881b..2f18f5f12162 100644 --- a/libcxx/include/__iterator/insert_iterator.h +++ b/libcxx/include/__iterator/insert_iterator.h @@ -24,7 +24,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD -#if _LIBCPP_STD_VER > 17 && !defined(_LIBCPP_HAS_NO_RANGES) +#if _LIBCPP_STD_VER > 17 && !defined(_LIBCPP_HAS_NO_CONCEPTS) template <class _Container> using __insert_iterator_iter_t = ranges::iterator_t<_Container>; #else diff --git a/libcxx/include/__iterator/iter_move.h b/libcxx/include/__iterator/iter_move.h index a2951f764b0c..dfcf8e6c8308 100644 --- a/libcxx/include/__iterator/iter_move.h +++ b/libcxx/include/__iterator/iter_move.h @@ -23,15 +23,21 @@ _LIBCPP_BEGIN_NAMESPACE_STD -#if !defined(_LIBCPP_HAS_NO_RANGES) +#if !defined(_LIBCPP_HAS_NO_CONCEPTS) + +// [iterator.cust.move] + +namespace ranges { +namespace __iter_move { -namespace ranges::__iter_move { void iter_move(); -template<class _Ip> -concept __unqualified_iter_move = requires(_Ip&& __i) { - iter_move(_VSTD::forward<_Ip>(__i)); -}; +template <class _Tp> +concept __unqualified_iter_move = + __class_or_enum<remove_cvref_t<_Tp>> && + requires (_Tp&& __t) { + iter_move(_VSTD::forward<_Tp>(__t)); + }; // [iterator.cust.move]/1 // The name ranges::iter_move denotes a customization point object. @@ -69,17 +75,18 @@ struct __fn { // [iterator.cust.move]/1.3 // Otherwise, ranges::iter_move(E) is ill-formed. }; -} // namespace ranges::__iter_move +} // namespace __iter_move -namespace ranges::inline __cpo { +inline namespace __cpo { inline constexpr auto iter_move = __iter_move::__fn{}; -} +} // namespace __cpo +} // namespace ranges template<__dereferenceable _Tp> -requires requires(_Tp& __t) { { ranges::iter_move(__t) } -> __referenceable; } + requires requires(_Tp& __t) { { ranges::iter_move(__t) } -> __referenceable; } using iter_rvalue_reference_t = decltype(ranges::iter_move(declval<_Tp&>())); -#endif // !_LIBCPP_HAS_NO_RANGES +#endif // !_LIBCPP_HAS_NO_CONCEPTS _LIBCPP_END_NAMESPACE_STD diff --git a/libcxx/include/__iterator/iter_swap.h b/libcxx/include/__iterator/iter_swap.h index a6c3bc8c663e..0179546667b7 100644 --- a/libcxx/include/__iterator/iter_swap.h +++ b/libcxx/include/__iterator/iter_swap.h @@ -26,7 +26,9 @@ _LIBCPP_BEGIN_NAMESPACE_STD -#if !defined(_LIBCPP_HAS_NO_RANGES) +#if !defined(_LIBCPP_HAS_NO_CONCEPTS) + +// [iter.cust.swap] namespace ranges { namespace __iter_swap { @@ -34,9 +36,11 @@ namespace __iter_swap { void iter_swap(_I1, _I2) = delete; template<class _T1, class _T2> - concept __unqualified_iter_swap = requires(_T1&& __x, _T2&& __y) { - iter_swap(_VSTD::forward<_T1>(__x), _VSTD::forward<_T2>(__y)); - }; + concept __unqualified_iter_swap = + (__class_or_enum<remove_cvref_t<_T1>> || __class_or_enum<remove_cvref_t<_T2>>) && + requires (_T1&& __x, _T2&& __y) { + iter_swap(_VSTD::forward<_T1>(__x), _VSTD::forward<_T2>(__y)); + }; template<class _T1, class _T2> concept __readable_swappable = @@ -79,12 +83,11 @@ namespace __iter_swap { *_VSTD::forward<_T1>(__x) = _VSTD::move(__old); } }; -} // end namespace __iter_swap +} // namespace __iter_swap inline namespace __cpo { inline constexpr auto iter_swap = __iter_swap::__fn{}; } // namespace __cpo - } // namespace ranges template<class _I1, class _I2 = _I1> @@ -97,7 +100,7 @@ concept indirectly_swappable = ranges::iter_swap(__i2, __i1); }; -#endif // !defined(_LIBCPP_HAS_NO_RANGES) +#endif // !defined(_LIBCPP_HAS_NO_CONCEPTS) _LIBCPP_END_NAMESPACE_STD diff --git a/libcxx/include/__iterator/iterator_traits.h b/libcxx/include/__iterator/iterator_traits.h index 54c3e11e6738..f2dbb7c700ec 100644 --- a/libcxx/include/__iterator/iterator_traits.h +++ b/libcxx/include/__iterator/iterator_traits.h @@ -22,7 +22,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD -#if !defined(_LIBCPP_HAS_NO_RANGES) +#if !defined(_LIBCPP_HAS_NO_CONCEPTS) template <class _Tp> using __with_reference = _Tp&; @@ -41,7 +41,7 @@ concept __dereferenceable = requires(_Tp& __t) { template<__dereferenceable _Tp> using iter_reference_t = decltype(*declval<_Tp&>()); -#endif // !defined(_LIBCPP_HAS_NO_RANGES) +#endif // !defined(_LIBCPP_HAS_NO_CONCEPTS) template <class _Iter> struct _LIBCPP_TEMPLATE_VIS iterator_traits; @@ -139,7 +139,7 @@ public: static const bool value = sizeof(__test<_Tp>(nullptr)) == 1; }; -#if !defined(_LIBCPP_HAS_NO_RANGES) +#if !defined(_LIBCPP_HAS_NO_CONCEPTS) // The `cpp17-*-iterator` exposition-only concepts are easily confused with the Cpp17*Iterator tables, // so they've been banished to a namespace that makes it obvious they have a niche use-case. @@ -362,7 +362,7 @@ struct iterator_traits : __iterator_traits<_Ip> { using __primary_template = iterator_traits; }; -#else // !defined(_LIBCPP_HAS_NO_RANGES) +#else // !defined(_LIBCPP_HAS_NO_CONCEPTS) template <class _Iter, bool> struct __iterator_traits {}; @@ -399,10 +399,10 @@ struct _LIBCPP_TEMPLATE_VIS iterator_traits using __primary_template = iterator_traits; }; -#endif // !defined(_LIBCPP_HAS_NO_RANGES) +#endif // !defined(_LIBCPP_HAS_NO_CONCEPTS) template<class _Tp> -#if !defined(_LIBCPP_HAS_NO_RANGES) +#if !defined(_LIBCPP_HAS_NO_CONCEPTS) requires is_object_v<_Tp> #endif struct _LIBCPP_TEMPLATE_VIS iterator_traits<_Tp*> diff --git a/libcxx/include/__iterator/next.h b/libcxx/include/__iterator/next.h index b9bdd6b27e05..8683e2210e95 100644 --- a/libcxx/include/__iterator/next.h +++ b/libcxx/include/__iterator/next.h @@ -35,7 +35,7 @@ inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX14 return __x; } -#if !defined(_LIBCPP_HAS_NO_RANGES) +#if !defined(_LIBCPP_HAS_NO_CONCEPTS) // [range.iter.op.next] @@ -79,7 +79,7 @@ inline namespace __cpo { } // namespace __cpo } // namespace ranges -#endif // !defined(_LIBCPP_HAS_NO_RANGES) +#endif // !defined(_LIBCPP_HAS_NO_CONCEPTS) _LIBCPP_END_NAMESPACE_STD diff --git a/libcxx/include/__iterator/prev.h b/libcxx/include/__iterator/prev.h index 870cbe64eaee..5a58dc0e4b43 100644 --- a/libcxx/include/__iterator/prev.h +++ b/libcxx/include/__iterator/prev.h @@ -34,7 +34,7 @@ inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX14 return __x; } -#if !defined(_LIBCPP_HAS_NO_RANGES) +#if !defined(_LIBCPP_HAS_NO_CONCEPTS) // [range.iter.op.prev] @@ -71,7 +71,7 @@ inline namespace __cpo { } // namespace __cpo } // namespace ranges -#endif // !defined(_LIBCPP_HAS_NO_RANGES) +#endif // !defined(_LIBCPP_HAS_NO_CONCEPTS) _LIBCPP_END_NAMESPACE_STD diff --git a/libcxx/include/__iterator/projected.h b/libcxx/include/__iterator/projected.h index c24c86b4e06e..30ea3a124b2c 100644 --- a/libcxx/include/__iterator/projected.h +++ b/libcxx/include/__iterator/projected.h @@ -20,7 +20,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD -#if !defined(_LIBCPP_HAS_NO_RANGES) +#if !defined(_LIBCPP_HAS_NO_CONCEPTS) template<indirectly_readable _It, indirectly_regular_unary_invocable<_It> _Proj> struct projected { @@ -33,7 +33,7 @@ struct incrementable_traits<projected<_It, _Proj>> { using difference_type = iter_difference_t<_It>; }; -#endif // !defined(_LIBCPP_HAS_NO_RANGES) +#endif // !defined(_LIBCPP_HAS_NO_CONCEPTS) _LIBCPP_END_NAMESPACE_STD diff --git a/libcxx/include/__iterator/readable_traits.h b/libcxx/include/__iterator/readable_traits.h index 13f323e295ba..c0b16bafd784 100644 --- a/libcxx/include/__iterator/readable_traits.h +++ b/libcxx/include/__iterator/readable_traits.h @@ -20,7 +20,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD -#if !defined(_LIBCPP_HAS_NO_RANGES) +#if !defined(_LIBCPP_HAS_NO_CONCEPTS) // [readable.traits] template<class> struct __cond_value_type {}; @@ -79,7 +79,7 @@ using iter_value_t = typename conditional_t<__is_primary_template<iterator_trait indirectly_readable_traits<remove_cvref_t<_Ip> >, iterator_traits<remove_cvref_t<_Ip> > >::value_type; -#endif // !defined(_LIBCPP_HAS_NO_RANGES) +#endif // !defined(_LIBCPP_HAS_NO_CONCEPTS) _LIBCPP_END_NAMESPACE_STD diff --git a/libcxx/include/__iterator/unreachable_sentinel.h b/libcxx/include/__iterator/unreachable_sentinel.h index cbbccd7bb288..b200236d8b9d 100644 --- a/libcxx/include/__iterator/unreachable_sentinel.h +++ b/libcxx/include/__iterator/unreachable_sentinel.h @@ -19,7 +19,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD -#if !defined(_LIBCPP_HAS_NO_RANGES) +#if !defined(_LIBCPP_HAS_NO_CONCEPTS) struct unreachable_sentinel_t { template<weakly_incrementable _Iter> @@ -31,7 +31,7 @@ struct unreachable_sentinel_t { inline constexpr unreachable_sentinel_t unreachable_sentinel{}; -#endif // !defined(_LIBCPP_HAS_NO_RANGES) +#endif // !defined(_LIBCPP_HAS_NO_CONCEPTS) _LIBCPP_END_NAMESPACE_STD diff --git a/libcxx/include/__locale b/libcxx/include/__locale index 98445bd2d8f4..51f35eece712 100644 --- a/libcxx/include/__locale +++ b/libcxx/include/__locale @@ -452,7 +452,7 @@ public: static const mask punct = _PUNCT; static const mask xdigit = _HEX; static const mask blank = _BLANK; - static const mask __regex_word = 0x80; + static const mask __regex_word = 0x4000; // 0x8000 and 0x0100 and 0x00ff are used # define _LIBCPP_CTYPE_MASK_IS_COMPOSITE_PRINT #elif defined(__APPLE__) || defined(__FreeBSD__) || defined(__EMSCRIPTEN__) || defined(__NetBSD__) # ifdef __APPLE__ @@ -545,6 +545,12 @@ public: static const mask graph = alnum | punct; _LIBCPP_INLINE_VISIBILITY ctype_base() {} + +// TODO: Remove the ifndef when the assert no longer fails on AIX. +#ifndef _AIX + static_assert((__regex_word & ~(space | print | cntrl | upper | lower | alpha | digit | punct | xdigit | blank)) == __regex_word, + "__regex_word can't overlap other bits"); +#endif }; template <class _CharT> class _LIBCPP_TEMPLATE_VIS ctype; diff --git a/libcxx/include/__memory/concepts.h b/libcxx/include/__memory/concepts.h index 4029b590fe8c..dced563f38b6 100644 --- a/libcxx/include/__memory/concepts.h +++ b/libcxx/include/__memory/concepts.h @@ -25,7 +25,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD -#if !defined(_LIBCPP_HAS_NO_RANGES) +#if !defined(_LIBCPP_HAS_NO_CONCEPTS) namespace ranges { // [special.mem.concepts] @@ -59,7 +59,7 @@ concept __nothrow_forward_range = __nothrow_forward_iterator<iterator_t<_Rp>>; } // namespace ranges -#endif // !defined(_LIBCPP_HAS_NO_RANGES) +#endif // !defined(_LIBCPP_HAS_NO_CONCEPTS) _LIBCPP_END_NAMESPACE_STD diff --git a/libcxx/include/__memory/ranges_construct_at.h b/libcxx/include/__memory/ranges_construct_at.h index 1a72da739682..b1821c8cc29e 100644 --- a/libcxx/include/__memory/ranges_construct_at.h +++ b/libcxx/include/__memory/ranges_construct_at.h @@ -29,7 +29,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD -#if !defined(_LIBCPP_HAS_NO_RANGES) +#if !defined(_LIBCPP_HAS_NO_CONCEPTS) namespace ranges { // construct_at @@ -117,7 +117,7 @@ inline namespace __cpo { } // namespace ranges -#endif // !defined(_LIBCPP_HAS_NO_RANGES) +#endif // !defined(_LIBCPP_HAS_NO_CONCEPTS) _LIBCPP_END_NAMESPACE_STD diff --git a/libcxx/include/__memory/ranges_uninitialized_algorithms.h b/libcxx/include/__memory/ranges_uninitialized_algorithms.h index 6a8f9f070ed7..70d901a5ad03 100644 --- a/libcxx/include/__memory/ranges_uninitialized_algorithms.h +++ b/libcxx/include/__memory/ranges_uninitialized_algorithms.h @@ -32,7 +32,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD -#if !defined(_LIBCPP_HAS_NO_RANGES) +#if !defined(_LIBCPP_HAS_NO_CONCEPTS) namespace ranges { // uninitialized_default_construct @@ -309,7 +309,7 @@ inline namespace __cpo { } // namespace __cpo } // namespace ranges -#endif // !defined(_LIBCPP_HAS_NO_RANGES) +#endif // !defined(_LIBCPP_HAS_NO_CONCEPTS) _LIBCPP_END_NAMESPACE_STD diff --git a/libcxx/include/__ranges/access.h b/libcxx/include/__ranges/access.h index 0b9470fa4017..67c6c57bd81e 100644 --- a/libcxx/include/__ranges/access.h +++ b/libcxx/include/__ranges/access.h @@ -25,7 +25,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD -#if !defined(_LIBCPP_HAS_NO_RANGES) +#if !defined(_LIBCPP_HAS_NO_CONCEPTS) namespace ranges { template <class _Tp> @@ -83,7 +83,7 @@ namespace __begin { void operator()(auto&&) const = delete; }; -} +} // namespace __begin inline namespace __cpo { inline constexpr auto begin = __begin::__fn{}; @@ -150,7 +150,7 @@ namespace __end { void operator()(auto&&) const = delete; }; -} +} // namespace __end inline namespace __cpo { inline constexpr auto end = __end::__fn{}; @@ -178,7 +178,7 @@ namespace __cbegin { -> decltype( ranges::begin(static_cast<const _Tp&&>(__t))) { return ranges::begin(static_cast<const _Tp&&>(__t)); } }; -} +} // namespace __cbegin inline namespace __cpo { inline constexpr auto cbegin = __cbegin::__fn{}; @@ -206,14 +206,14 @@ namespace __cend { -> decltype( ranges::end(static_cast<const _Tp&&>(__t))) { return ranges::end(static_cast<const _Tp&&>(__t)); } }; -} +} // namespace __cend inline namespace __cpo { inline constexpr auto cend = __cend::__fn{}; } // namespace __cpo } // namespace ranges -#endif // !defined(_LIBCPP_HAS_NO_RANGES) +#endif // !defined(_LIBCPP_HAS_NO_CONCEPTS) _LIBCPP_END_NAMESPACE_STD diff --git a/libcxx/include/__ranges/all.h b/libcxx/include/__ranges/all.h index 90327da81460..b0a58e9bad82 100644 --- a/libcxx/include/__ranges/all.h +++ b/libcxx/include/__ranges/all.h @@ -28,7 +28,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD -#if !defined(_LIBCPP_HAS_NO_RANGES) +#if !defined(_LIBCPP_HAS_NO_CONCEPTS) namespace ranges::views { @@ -64,7 +64,7 @@ namespace __all { return ranges::owning_view{_VSTD::forward<_Tp>(__t)}; } }; -} +} // namespace __all inline namespace __cpo { inline constexpr auto all = __all::__fn{}; @@ -75,7 +75,7 @@ using all_t = decltype(views::all(declval<_Range>())); } // namespace ranges::views -#endif // !defined(_LIBCPP_HAS_NO_RANGES) +#endif // !defined(_LIBCPP_HAS_NO_CONCEPTS) _LIBCPP_END_NAMESPACE_STD diff --git a/libcxx/include/__ranges/common_view.h b/libcxx/include/__ranges/common_view.h index a58554f66e50..fb8078d65558 100644 --- a/libcxx/include/__ranges/common_view.h +++ b/libcxx/include/__ranges/common_view.h @@ -30,7 +30,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD -#if !defined(_LIBCPP_HAS_NO_RANGES) +#if !defined(_LIBCPP_HAS_NO_CONCEPTS) namespace ranges { @@ -120,15 +120,15 @@ namespace __common { -> decltype( common_view{_VSTD::forward<_Range>(__range)}) { return common_view{_VSTD::forward<_Range>(__range)}; } }; -} +} // namespace __common inline namespace __cpo { inline constexpr auto common = __common::__fn{}; -} +} // namespace __cpo } // namespace views } // namespace ranges -#endif // !defined(_LIBCPP_HAS_NO_RANGES) +#endif // !defined(_LIBCPP_HAS_NO_CONCEPTS) _LIBCPP_END_NAMESPACE_STD diff --git a/libcxx/include/__ranges/concepts.h b/libcxx/include/__ranges/concepts.h index a9cb15f9f17c..0590863138ed 100644 --- a/libcxx/include/__ranges/concepts.h +++ b/libcxx/include/__ranges/concepts.h @@ -32,7 +32,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD -#if !defined(_LIBCPP_HAS_NO_RANGES) +#if !defined(_LIBCPP_HAS_NO_CONCEPTS) namespace ranges { // [range.range] @@ -133,7 +133,7 @@ namespace ranges { } // namespace ranges -#endif // !defined(_LIBCPP_HAS_NO_RANGES) +#endif // !defined(_LIBCPP_HAS_NO_CONCEPTS) _LIBCPP_END_NAMESPACE_STD diff --git a/libcxx/include/__ranges/copyable_box.h b/libcxx/include/__ranges/copyable_box.h index 4811690adaec..608db55dbc5f 100644 --- a/libcxx/include/__ranges/copyable_box.h +++ b/libcxx/include/__ranges/copyable_box.h @@ -24,7 +24,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD -#if !defined(_LIBCPP_HAS_NO_RANGES) +#if !defined(_LIBCPP_HAS_NO_CONCEPTS) // __copyable_box allows turning a type that is copy-constructible (but maybe not copy-assignable) into // a type that is both copy-constructible and copy-assignable. It does that by introducing an empty state @@ -171,7 +171,7 @@ namespace ranges { }; } // namespace ranges -#endif // !defined(_LIBCPP_HAS_NO_RANGES) +#endif // !defined(_LIBCPP_HAS_NO_CONCEPTS) _LIBCPP_END_NAMESPACE_STD diff --git a/libcxx/include/__ranges/counted.h b/libcxx/include/__ranges/counted.h index cb9784092420..92bcf06be3ad 100644 --- a/libcxx/include/__ranges/counted.h +++ b/libcxx/include/__ranges/counted.h @@ -29,7 +29,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD -#if !defined(_LIBCPP_HAS_NO_RANGES) +#if !defined(_LIBCPP_HAS_NO_CONCEPTS) namespace ranges::views { @@ -74,7 +74,7 @@ inline namespace __cpo { } // namespace ranges::views -#endif // !defined(_LIBCPP_HAS_NO_RANGES) +#endif // !defined(_LIBCPP_HAS_NO_CONCEPTS) _LIBCPP_END_NAMESPACE_STD diff --git a/libcxx/include/__ranges/dangling.h b/libcxx/include/__ranges/dangling.h index 45ff83b205e5..b6b733a6eaa9 100644 --- a/libcxx/include/__ranges/dangling.h +++ b/libcxx/include/__ranges/dangling.h @@ -21,7 +21,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD -#if !defined(_LIBCPP_HAS_NO_RANGES) +#if !defined(_LIBCPP_HAS_NO_CONCEPTS) namespace ranges { struct dangling { @@ -35,7 +35,7 @@ using borrowed_iterator_t = _If<borrowed_range<_Rp>, iterator_t<_Rp>, dangling>; // borrowed_subrange_t defined in <__ranges/subrange.h> } // namespace ranges -#endif // !_LIBCPP_HAS_NO_RANGES +#endif // !_LIBCPP_HAS_NO_CONCEPTS _LIBCPP_END_NAMESPACE_STD diff --git a/libcxx/include/__ranges/data.h b/libcxx/include/__ranges/data.h index 69dfd479c011..f97ec8033297 100644 --- a/libcxx/include/__ranges/data.h +++ b/libcxx/include/__ranges/data.h @@ -24,7 +24,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD -#if !defined(_LIBCPP_HAS_NO_RANGES) +#if !defined(_LIBCPP_HAS_NO_CONCEPTS) // [range.prim.data] @@ -64,7 +64,7 @@ namespace __data { return _VSTD::to_address(ranges::begin(__t)); } }; -} +} // namespace __data inline namespace __cpo { inline constexpr auto data = __data::__fn{}; @@ -92,14 +92,14 @@ namespace __cdata { -> decltype( ranges::data(static_cast<const _Tp&&>(__t))) { return ranges::data(static_cast<const _Tp&&>(__t)); } }; -} +} // namespace __cdata inline namespace __cpo { inline constexpr auto cdata = __cdata::__fn{}; } // namespace __cpo } // namespace ranges -#endif // !defined(_LIBCPP_HAS_NO_RANGES) +#endif // !defined(_LIBCPP_HAS_NO_CONCEPTS) _LIBCPP_END_NAMESPACE_STD diff --git a/libcxx/include/__ranges/drop_view.h b/libcxx/include/__ranges/drop_view.h index 6adb09cf677e..1e2bfa40b325 100644 --- a/libcxx/include/__ranges/drop_view.h +++ b/libcxx/include/__ranges/drop_view.h @@ -31,7 +31,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD -#if !defined(_LIBCPP_HAS_NO_RANGES) +#if !defined(_LIBCPP_HAS_NO_CONCEPTS) namespace ranges { template<view _View> @@ -120,7 +120,7 @@ public: inline constexpr bool enable_borrowed_range<drop_view<_Tp>> = enable_borrowed_range<_Tp>; } // namespace ranges -#endif // !defined(_LIBCPP_HAS_NO_RANGES) +#endif // !defined(_LIBCPP_HAS_NO_CONCEPTS) _LIBCPP_END_NAMESPACE_STD diff --git a/libcxx/include/__ranges/empty.h b/libcxx/include/__ranges/empty.h index 8da0b120f182..b06a81c98e8f 100644 --- a/libcxx/include/__ranges/empty.h +++ b/libcxx/include/__ranges/empty.h @@ -22,7 +22,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD -#if !defined(_LIBCPP_HAS_NO_RANGES) +#if !defined(_LIBCPP_HAS_NO_CONCEPTS) // [range.prim.empty] @@ -68,14 +68,14 @@ namespace __empty { return ranges::begin(__t) == ranges::end(__t); } }; -} +} // namespace __empty inline namespace __cpo { inline constexpr auto empty = __empty::__fn{}; } // namespace __cpo } // namespace ranges -#endif // !defined(_LIBCPP_HAS_NO_RANGES) +#endif // !defined(_LIBCPP_HAS_NO_CONCEPTS) _LIBCPP_END_NAMESPACE_STD diff --git a/libcxx/include/__ranges/empty_view.h b/libcxx/include/__ranges/empty_view.h index f744dcbe92f4..e0116b933797 100644 --- a/libcxx/include/__ranges/empty_view.h +++ b/libcxx/include/__ranges/empty_view.h @@ -20,7 +20,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD -#if !defined(_LIBCPP_HAS_NO_RANGES) +#if !defined(_LIBCPP_HAS_NO_CONCEPTS) namespace ranges { template<class _Tp> @@ -38,7 +38,7 @@ namespace ranges { inline constexpr bool enable_borrowed_range<empty_view<_Tp>> = true; } // namespace ranges -#endif // !defined(_LIBCPP_HAS_NO_RANGES) +#endif // !defined(_LIBCPP_HAS_NO_CONCEPTS) _LIBCPP_END_NAMESPACE_STD diff --git a/libcxx/include/__ranges/enable_borrowed_range.h b/libcxx/include/__ranges/enable_borrowed_range.h index 20b1d42b26c8..5f5b3f505773 100644 --- a/libcxx/include/__ranges/enable_borrowed_range.h +++ b/libcxx/include/__ranges/enable_borrowed_range.h @@ -22,7 +22,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD -#if _LIBCPP_STD_VER > 17 && !defined(_LIBCPP_HAS_NO_RANGES) +#if _LIBCPP_STD_VER > 17 && !defined(_LIBCPP_HAS_NO_CONCEPTS) namespace ranges { @@ -34,7 +34,7 @@ inline constexpr bool enable_borrowed_range = false; } // namespace ranges -#endif // _LIBCPP_STD_VER > 17 && !defined(_LIBCPP_HAS_NO_RANGES) +#endif // _LIBCPP_STD_VER > 17 && !defined(_LIBCPP_HAS_NO_CONCEPTS) _LIBCPP_END_NAMESPACE_STD diff --git a/libcxx/include/__ranges/enable_view.h b/libcxx/include/__ranges/enable_view.h index e1daec046fc0..87d53f3fcc8c 100644 --- a/libcxx/include/__ranges/enable_view.h +++ b/libcxx/include/__ranges/enable_view.h @@ -20,7 +20,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD -#if !defined(_LIBCPP_HAS_NO_RANGES) +#if !defined(_LIBCPP_HAS_NO_CONCEPTS) namespace ranges { @@ -38,9 +38,9 @@ template <class _Tp> inline constexpr bool enable_view = derived_from<_Tp, view_base> || requires { ranges::__is_derived_from_view_interface((_Tp*)nullptr, (_Tp*)nullptr); }; -} // end namespace ranges +} // namespace ranges -#endif // !_LIBCPP_HAS_NO_RANGES +#endif // !_LIBCPP_HAS_NO_CONCEPTS _LIBCPP_END_NAMESPACE_STD diff --git a/libcxx/include/__ranges/iota_view.h b/libcxx/include/__ranges/iota_view.h index da712b8e6f4f..17f6021f7dd4 100644 --- a/libcxx/include/__ranges/iota_view.h +++ b/libcxx/include/__ranges/iota_view.h @@ -39,7 +39,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD -#if !defined(_LIBCPP_HAS_NO_RANGES) +#if !defined(_LIBCPP_HAS_NO_CONCEPTS) namespace ranges { template<class _Int> @@ -397,11 +397,11 @@ namespace __iota { inline namespace __cpo { inline constexpr auto iota = __iota::__fn{}; -} +} // namespace __cpo } // namespace views } // namespace ranges -#endif // !defined(_LIBCPP_HAS_NO_RANGES) +#endif // !defined(_LIBCPP_HAS_NO_CONCEPTS) _LIBCPP_END_NAMESPACE_STD diff --git a/libcxx/include/__ranges/join_view.h b/libcxx/include/__ranges/join_view.h index 9aa69da76cf0..7efbcfdf8788 100644 --- a/libcxx/include/__ranges/join_view.h +++ b/libcxx/include/__ranges/join_view.h @@ -30,7 +30,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD -#if !defined(_LIBCPP_HAS_NO_RANGES) +#if !defined(_LIBCPP_HAS_NO_CONCEPTS) namespace ranges { template<class> @@ -343,7 +343,7 @@ namespace ranges { #undef _CONSTEXPR_TERNARY -#endif // !defined(_LIBCPP_HAS_NO_RANGES) +#endif // !defined(_LIBCPP_HAS_NO_CONCEPTS) _LIBCPP_END_NAMESPACE_STD diff --git a/libcxx/include/__ranges/non_propagating_cache.h b/libcxx/include/__ranges/non_propagating_cache.h index 456e08d8c971..d0f447ce7cc5 100644 --- a/libcxx/include/__ranges/non_propagating_cache.h +++ b/libcxx/include/__ranges/non_propagating_cache.h @@ -24,7 +24,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD -#if !defined(_LIBCPP_HAS_NO_RANGES) +#if !defined(_LIBCPP_HAS_NO_CONCEPTS) namespace ranges { // __non_propagating_cache is a helper type that allows storing an optional value in it, @@ -107,7 +107,7 @@ namespace ranges { struct __empty_cache { }; } // namespace ranges -#endif // _LIBCPP_STD_VER > 17 && !defined(_LIBCPP_HAS_NO_RANGES) +#endif // _LIBCPP_STD_VER > 17 && !defined(_LIBCPP_HAS_NO_CONCEPTS) _LIBCPP_END_NAMESPACE_STD diff --git a/libcxx/include/__ranges/owning_view.h b/libcxx/include/__ranges/owning_view.h index 29182d2d8e46..9c038cbd7580 100644 --- a/libcxx/include/__ranges/owning_view.h +++ b/libcxx/include/__ranges/owning_view.h @@ -28,7 +28,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD -#if !defined(_LIBCPP_HAS_NO_RANGES) +#if !defined(_LIBCPP_HAS_NO_CONCEPTS) namespace ranges { template<range _Rp> @@ -74,7 +74,7 @@ public: } // namespace ranges -#endif // !defined(_LIBCPP_HAS_NO_RANGES) +#endif // !defined(_LIBCPP_HAS_NO_CONCEPTS) _LIBCPP_END_NAMESPACE_STD diff --git a/libcxx/include/__ranges/range_adaptor.h b/libcxx/include/__ranges/range_adaptor.h index 74aea3187e89..d037585d2679 100644 --- a/libcxx/include/__ranges/range_adaptor.h +++ b/libcxx/include/__ranges/range_adaptor.h @@ -25,7 +25,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD -#if !defined(_LIBCPP_HAS_NO_RANGES) +#if !defined(_LIBCPP_HAS_NO_CONCEPTS) // CRTP base that one can derive from in order to be considered a range adaptor closure // by the library. When deriving from this class, a pipe operator will be provided to @@ -66,7 +66,7 @@ struct __range_adaptor_closure { { return __range_adaptor_closure_t(_VSTD::__compose(_VSTD::forward<_OtherClosure>(__c2), _VSTD::forward<_Closure>(__c1))); } }; -#endif // !defined(_LIBCPP_HAS_NO_RANGES) +#endif // !defined(_LIBCPP_HAS_NO_CONCEPTS) _LIBCPP_END_NAMESPACE_STD diff --git a/libcxx/include/__ranges/ref_view.h b/libcxx/include/__ranges/ref_view.h index 283fa2599bff..4d12759e614f 100644 --- a/libcxx/include/__ranges/ref_view.h +++ b/libcxx/include/__ranges/ref_view.h @@ -31,7 +31,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD -#if !defined(_LIBCPP_HAS_NO_RANGES) +#if !defined(_LIBCPP_HAS_NO_CONCEPTS) namespace ranges { template<range _Range> @@ -79,7 +79,7 @@ public: inline constexpr bool enable_borrowed_range<ref_view<_Tp>> = true; } // namespace ranges -#endif // !defined(_LIBCPP_HAS_NO_RANGES) +#endif // !defined(_LIBCPP_HAS_NO_CONCEPTS) _LIBCPP_END_NAMESPACE_STD diff --git a/libcxx/include/__ranges/reverse_view.h b/libcxx/include/__ranges/reverse_view.h index 618be80457b6..04ac7f29aa7c 100644 --- a/libcxx/include/__ranges/reverse_view.h +++ b/libcxx/include/__ranges/reverse_view.h @@ -33,7 +33,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD -#if !defined(_LIBCPP_HAS_NO_RANGES) +#if !defined(_LIBCPP_HAS_NO_CONCEPTS) namespace ranges { template<view _View> @@ -175,15 +175,15 @@ namespace ranges { -> decltype( reverse_view{_VSTD::forward<_Range>(__range)}) { return reverse_view{_VSTD::forward<_Range>(__range)}; } }; - } + } // namespace __reverse inline namespace __cpo { inline constexpr auto reverse = __reverse::__fn{}; - } + } // namespace __cpo } // namespace views } // namespace ranges -#endif // !defined(_LIBCPP_HAS_NO_RANGES) +#endif // !defined(_LIBCPP_HAS_NO_CONCEPTS) _LIBCPP_END_NAMESPACE_STD diff --git a/libcxx/include/__ranges/single_view.h b/libcxx/include/__ranges/single_view.h index 931ce78da7b9..c6f0746ea795 100644 --- a/libcxx/include/__ranges/single_view.h +++ b/libcxx/include/__ranges/single_view.h @@ -24,7 +24,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD -#if !defined(_LIBCPP_HAS_NO_RANGES) +#if !defined(_LIBCPP_HAS_NO_CONCEPTS) namespace ranges { template<copy_constructible _Tp> @@ -74,7 +74,7 @@ namespace ranges { single_view(_Tp) -> single_view<_Tp>; } // namespace ranges -#endif // !defined(_LIBCPP_HAS_NO_RANGES) +#endif // !defined(_LIBCPP_HAS_NO_CONCEPTS) _LIBCPP_END_NAMESPACE_STD diff --git a/libcxx/include/__ranges/size.h b/libcxx/include/__ranges/size.h index f3de5a8b8410..e1aaf7eba898 100644 --- a/libcxx/include/__ranges/size.h +++ b/libcxx/include/__ranges/size.h @@ -24,12 +24,12 @@ _LIBCPP_BEGIN_NAMESPACE_STD -#if !defined(_LIBCPP_HAS_NO_RANGES) +#if !defined(_LIBCPP_HAS_NO_CONCEPTS) namespace ranges { template<class> inline constexpr bool disable_sized_range = false; -} +} // namespace ranges // [range.prim.size] @@ -97,7 +97,7 @@ namespace __size { return _VSTD::__to_unsigned_like(ranges::end(__t) - ranges::begin(__t)); } }; -} +} // namespace __size inline namespace __cpo { inline constexpr auto size = __size::__fn{}; @@ -121,14 +121,14 @@ namespace __ssize { return static_cast<_Signed>(ranges::size(__t)); } }; -} +} // namespace __ssize inline namespace __cpo { inline constexpr auto ssize = __ssize::__fn{}; } // namespace __cpo } // namespace ranges -#endif // !defined(_LIBCPP_HAS_NO_RANGES) +#endif // !defined(_LIBCPP_HAS_NO_CONCEPTS) _LIBCPP_END_NAMESPACE_STD diff --git a/libcxx/include/__ranges/subrange.h b/libcxx/include/__ranges/subrange.h index 14716d1fb0ff..c6977cec3672 100644 --- a/libcxx/include/__ranges/subrange.h +++ b/libcxx/include/__ranges/subrange.h @@ -36,7 +36,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD -#if !defined(_LIBCPP_HAS_NO_RANGES) +#if !defined(_LIBCPP_HAS_NO_CONCEPTS) namespace ranges { template<class _From, class _To> @@ -227,7 +227,7 @@ namespace ranges { -> subrange<iterator_t<_Range>, sentinel_t<_Range>, subrange_kind::sized>; template<size_t _Index, class _Iter, class _Sent, subrange_kind _Kind> - requires (_Index < 2) + requires ((_Index == 0 && copyable<_Iter>) || _Index == 1) _LIBCPP_HIDE_FROM_ABI constexpr auto get(const subrange<_Iter, _Sent, _Kind>& __subrange) { if constexpr (_Index == 0) @@ -282,7 +282,7 @@ struct tuple_element<1, const ranges::subrange<_Ip, _Sp, _Kp>> { using type = _Sp; }; -#endif // !defined(_LIBCPP_HAS_NO_RANGES) +#endif // !defined(_LIBCPP_HAS_NO_CONCEPTS) _LIBCPP_END_NAMESPACE_STD diff --git a/libcxx/include/__ranges/take_view.h b/libcxx/include/__ranges/take_view.h index 73a57fe96c08..4d45219020da 100644 --- a/libcxx/include/__ranges/take_view.h +++ b/libcxx/include/__ranges/take_view.h @@ -34,7 +34,7 @@ _LIBCPP_PUSH_MACROS _LIBCPP_BEGIN_NAMESPACE_STD -#if !defined(_LIBCPP_HAS_NO_RANGES) +#if !defined(_LIBCPP_HAS_NO_CONCEPTS) namespace ranges { template<view _View> @@ -176,7 +176,7 @@ public: inline constexpr bool enable_borrowed_range<take_view<_Tp>> = enable_borrowed_range<_Tp>; } // namespace ranges -#endif // !defined(_LIBCPP_HAS_NO_RANGES) +#endif // !defined(_LIBCPP_HAS_NO_CONCEPTS) _LIBCPP_END_NAMESPACE_STD diff --git a/libcxx/include/__ranges/transform_view.h b/libcxx/include/__ranges/transform_view.h index 1506e8b2a7fe..d39dd4097290 100644 --- a/libcxx/include/__ranges/transform_view.h +++ b/libcxx/include/__ranges/transform_view.h @@ -20,7 +20,6 @@ #include <__functional/bind_back.h> #include <__functional/invoke.h> #include <__iterator/concepts.h> -#include <__iterator/iter_swap.h> #include <__iterator/iterator_traits.h> #include <__memory/addressof.h> #include <__ranges/access.h> @@ -42,7 +41,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD -#if !defined(_LIBCPP_HAS_NO_RANGES) +#if !defined(_LIBCPP_HAS_NO_CONCEPTS) namespace ranges { @@ -425,16 +424,16 @@ namespace __transform { noexcept(is_nothrow_constructible_v<decay_t<_Fn>, _Fn>) { return __range_adaptor_closure_t(_VSTD::__bind_back(*this, _VSTD::forward<_Fn>(__f))); } }; -} +} // namespace __transform inline namespace __cpo { inline constexpr auto transform = __transform::__fn{}; -} +} // namespace __cpo } // namespace views } // namespace ranges -#endif // !defined(_LIBCPP_HAS_NO_RANGES) +#endif // !defined(_LIBCPP_HAS_NO_CONCEPTS) _LIBCPP_END_NAMESPACE_STD diff --git a/libcxx/include/__ranges/view_interface.h b/libcxx/include/__ranges/view_interface.h index c5215cbcb8e3..0a37323df817 100644 --- a/libcxx/include/__ranges/view_interface.h +++ b/libcxx/include/__ranges/view_interface.h @@ -27,7 +27,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD -#if !defined(_LIBCPP_HAS_NO_RANGES) +#if !defined(_LIBCPP_HAS_NO_CONCEPTS) namespace ranges { @@ -186,9 +186,9 @@ public: } }; -} +} // namespace ranges -#endif // !defined(_LIBCPP_HAS_NO_RANGES) +#endif // !defined(_LIBCPP_HAS_NO_CONCEPTS) _LIBCPP_END_NAMESPACE_STD diff --git a/libcxx/include/algorithm b/libcxx/include/algorithm index 03b4faaee284..e5f1030be201 100644 --- a/libcxx/include/algorithm +++ b/libcxx/include/algorithm @@ -20,7 +20,10 @@ namespace std namespace ranges { template <class I1, class I2> - struct in_in_result; // since C++20 + struct in_in_result; // since C++20 + + template <class I1, class I2, class O> + struct in_in_out_result; // since C++20 } template <class InputIterator, class Predicate> @@ -696,6 +699,7 @@ template<class InputIterator, class OutputIterator> #include <__algorithm/generate.h> #include <__algorithm/generate_n.h> #include <__algorithm/half_positive.h> +#include <__algorithm/in_in_out_result.h> #include <__algorithm/in_in_result.h> #include <__algorithm/in_out_result.h> #include <__algorithm/includes.h> diff --git a/libcxx/include/atomic b/libcxx/include/atomic index 02844642fa08..bd49367a3246 100644 --- a/libcxx/include/atomic +++ b/libcxx/include/atomic @@ -2692,10 +2692,13 @@ typedef conditional<_LIBCPP_CONTENTION_LOCK_FREE, __cxx_contention_t, char>::typ typedef conditional<_LIBCPP_CONTENTION_LOCK_FREE, __cxx_contention_t, unsigned char>::type __libcpp_unsigned_lock_free; #else // No signed/unsigned lock-free types +#define _LIBCPP_NO_LOCK_FREE_TYPES #endif +#if !defined(_LIBCPP_NO_LOCK_FREE_TYPES) typedef atomic<__libcpp_signed_lock_free> atomic_signed_lock_free; typedef atomic<__libcpp_unsigned_lock_free> atomic_unsigned_lock_free; +#endif #define ATOMIC_FLAG_INIT {false} #define ATOMIC_VAR_INIT(__v) {__v} diff --git a/libcxx/include/compare b/libcxx/include/compare index d686b5a369f2..287e61690bbb 100644 --- a/libcxx/include/compare +++ b/libcxx/include/compare @@ -51,9 +51,14 @@ namespace std { struct compare_three_way; // C++20 // [cmp.alg], comparison algorithms - template<class T> constexpr strong_ordering strong_order(const T& a, const T& b); - template<class T> constexpr weak_ordering weak_order(const T& a, const T& b); - template<class T> constexpr partial_ordering partial_order(const T& a, const T& b); + inline namespace unspecified { + inline constexpr unspecified strong_order = unspecified; + inline constexpr unspecified weak_order = unspecified; + inline constexpr unspecified partial_order = unspecified; + inline constexpr unspecified compare_strong_order_fallback = unspecified; + inline constexpr unspecified compare_weak_order_fallback = unspecified; + inline constexpr unspecified compare_partial_order_fallback = unspecified; + } // [cmp.partialord], Class partial_ordering class partial_ordering { @@ -136,8 +141,11 @@ namespace std { */ #include <__compare/common_comparison_category.h> +#include <__compare/compare_partial_order_fallback.h> +#include <__compare/compare_strong_order_fallback.h> #include <__compare/compare_three_way.h> #include <__compare/compare_three_way_result.h> +#include <__compare/compare_weak_order_fallback.h> #include <__compare/is_eq.h> #include <__compare/ordering.h> #include <__compare/partial_order.h> diff --git a/libcxx/include/cstddef b/libcxx/include/cstddef index ed5aea6f7a19..e77629e1ea0c 100644 --- a/libcxx/include/cstddef +++ b/libcxx/include/cstddef @@ -156,7 +156,8 @@ template <class _Integer> template <class _Integer, class = _EnableByteOverload<_Integer> > _LIBCPP_NODISCARD_EXT constexpr _Integer to_integer(byte __b) noexcept { return static_cast<_Integer>(__b); } -} + +} // namespace std #endif diff --git a/libcxx/include/module.modulemap b/libcxx/include/module.modulemap index 90fae9bb8362..84abf01bf5d7 100644 --- a/libcxx/include/module.modulemap +++ b/libcxx/include/module.modulemap @@ -246,6 +246,7 @@ module std [system] { module generate { private header "__algorithm/generate.h" } module generate_n { private header "__algorithm/generate_n.h" } module half_positive { private header "__algorithm/half_positive.h" } + module in_in_out_result { private header "__algorithm/in_in_out_result.h" } module in_in_result { private header "__algorithm/in_in_result.h" } module in_out_result { private header "__algorithm/in_out_result.h" } module includes { private header "__algorithm/includes.h" } @@ -385,16 +386,19 @@ module std [system] { export * module __compare { - module common_comparison_category { private header "__compare/common_comparison_category.h" } - module compare_three_way { private header "__compare/compare_three_way.h" } - module compare_three_way_result { private header "__compare/compare_three_way_result.h" } - module is_eq { private header "__compare/is_eq.h" } - module ordering { private header "__compare/ordering.h" } - module partial_order { private header "__compare/partial_order.h" } - module strong_order { private header "__compare/strong_order.h" } - module synth_three_way { private header "__compare/synth_three_way.h" } - module three_way_comparable { private header "__compare/three_way_comparable.h" } - module weak_order { private header "__compare/weak_order.h" } + module common_comparison_category { private header "__compare/common_comparison_category.h" } + module compare_partial_order_fallback { private header "__compare/compare_partial_order_fallback.h" } + module compare_strong_order_fallback { private header "__compare/compare_strong_order_fallback.h" } + module compare_three_way { private header "__compare/compare_three_way.h" } + module compare_three_way_result { private header "__compare/compare_three_way_result.h" } + module compare_weak_order_fallback { private header "__compare/compare_weak_order_fallback.h" } + module is_eq { private header "__compare/is_eq.h" } + module ordering { private header "__compare/ordering.h" } + module partial_order { private header "__compare/partial_order.h" } + module strong_order { private header "__compare/strong_order.h" } + module synth_three_way { private header "__compare/synth_three_way.h" } + module three_way_comparable { private header "__compare/three_way_comparable.h" } + module weak_order { private header "__compare/weak_order.h" } } } module complex { diff --git a/libcxx/include/ranges b/libcxx/include/ranges index eb4492376c5c..f7c543d7316c 100644 --- a/libcxx/include/ranges +++ b/libcxx/include/ranges @@ -239,11 +239,11 @@ namespace std::ranges { _LIBCPP_BEGIN_NAMESPACE_STD -#if _LIBCPP_STD_VER > 17 && !defined(_LIBCPP_HAS_NO_RANGES) +#if _LIBCPP_STD_VER > 17 && !defined(_LIBCPP_HAS_NO_CONCEPTS) namespace views = ranges::views; -#endif // _LIBCPP_STD_VER > 17 && !defined(_LIBCPP_HAS_NO_RANGES) +#endif // _LIBCPP_STD_VER > 17 && !defined(_LIBCPP_HAS_NO_CONCEPTS) _LIBCPP_END_NAMESPACE_STD diff --git a/libcxx/include/span b/libcxx/include/span index d33ad09a388f..bd11330e69d9 100644 --- a/libcxx/include/span +++ b/libcxx/include/span @@ -170,7 +170,7 @@ struct __is_std_span : false_type {}; template <class _Tp, size_t _Sz> struct __is_std_span<span<_Tp, _Sz>> : true_type {}; -#if !defined(_LIBCPP_HAS_NO_RANGES) +#if !defined(_LIBCPP_HAS_NO_CONCEPTS) template <class _Range, class _ElementType> concept __span_compatible_range = ranges::contiguous_range<_Range> && @@ -210,7 +210,7 @@ public: constexpr span (const span&) noexcept = default; constexpr span& operator=(const span&) noexcept = default; -#if !defined(_LIBCPP_HAS_NO_RANGES) +#if !defined(_LIBCPP_HAS_NO_CONCEPTS) template <class _It, enable_if_t<contiguous_iterator<_It> && is_convertible_v<remove_reference_t<iter_reference_t<_It>>(*)[], element_type (*)[]>, @@ -248,7 +248,7 @@ public: _LIBCPP_INLINE_VISIBILITY constexpr span(const array<_OtherElementType, _Extent>& __arr) noexcept : __data{__arr.data()} {} -#if !defined(_LIBCPP_HAS_NO_RANGES) +#if !defined(_LIBCPP_HAS_NO_CONCEPTS) template <__span_compatible_range<element_type> _Range> _LIBCPP_INLINE_VISIBILITY constexpr explicit span(_Range&& __r) : __data{ranges::data(__r)} { @@ -401,7 +401,7 @@ public: constexpr span (const span&) noexcept = default; constexpr span& operator=(const span&) noexcept = default; -#if !defined(_LIBCPP_HAS_NO_RANGES) +#if !defined(_LIBCPP_HAS_NO_CONCEPTS) template <class _It, enable_if_t<contiguous_iterator<_It> && is_convertible_v<remove_reference_t<iter_reference_t<_It> > (*)[], element_type (*)[]>, @@ -434,7 +434,7 @@ public: _LIBCPP_INLINE_VISIBILITY constexpr span(const array<_OtherElementType, _Sz>& __arr) noexcept : __data{__arr.data()}, __size{_Sz} {} -#if !defined(_LIBCPP_HAS_NO_RANGES) +#if !defined(_LIBCPP_HAS_NO_CONCEPTS) template <__span_compatible_range<element_type> _Range> _LIBCPP_INLINE_VISIBILITY constexpr span(_Range&& __r) : __data(ranges::data(__r)), __size{ranges::size(__r)} {} @@ -543,13 +543,13 @@ private: size_type __size; }; -#if !defined(_LIBCPP_HAS_NO_RANGES) +#if !defined(_LIBCPP_HAS_NO_CONCEPTS) template <class _Tp, size_t _Extent> inline constexpr bool ranges::enable_borrowed_range<span<_Tp, _Extent> > = true; template <class _ElementType, size_t _Extent> inline constexpr bool ranges::enable_view<span<_ElementType, _Extent>> = true; -#endif // !defined(_LIBCPP_HAS_NO_RANGES) +#endif // !defined(_LIBCPP_HAS_NO_CONCEPTS) // as_bytes & as_writable_bytes template <class _Tp, size_t _Extent> @@ -564,7 +564,7 @@ auto as_writable_bytes(span<_Tp, _Extent> __s) noexcept -> enable_if_t<!is_const_v<_Tp>, decltype(__s.__as_writable_bytes())> { return __s.__as_writable_bytes(); } -#if !defined(_LIBCPP_HAS_NO_RANGES) +#if !defined(_LIBCPP_HAS_NO_CONCEPTS) // Deduction guides template<contiguous_iterator _It, class _EndOrSize> span(_It, _EndOrSize) -> span<remove_reference_t<iter_reference_t<_It>>>; @@ -579,7 +579,7 @@ template<class _Tp, size_t _Sz> template<class _Tp, size_t _Sz> span(const array<_Tp, _Sz>&) -> span<const _Tp, _Sz>; -#if !defined(_LIBCPP_HAS_NO_RANGES) +#if !defined(_LIBCPP_HAS_NO_CONCEPTS) template<ranges::contiguous_range _Range> span(_Range&&) -> span<remove_reference_t<ranges::range_reference_t<_Range>>>; #endif diff --git a/libcxx/include/string_view b/libcxx/include/string_view index 3861ad67ca5d..992e88ea3c00 100644 --- a/libcxx/include/string_view +++ b/libcxx/include/string_view @@ -286,7 +286,7 @@ public: #endif } -#if _LIBCPP_STD_VER > 17 && !defined(_LIBCPP_HAS_NO_RANGES) +#if _LIBCPP_STD_VER > 17 && !defined(_LIBCPP_HAS_NO_CONCEPTS) template <contiguous_iterator _It, sized_sentinel_for<_It> _End> requires (is_same_v<iter_value_t<_It>, _CharT> && !is_convertible_v<_End, size_type>) constexpr _LIBCPP_HIDE_FROM_ABI basic_string_view(_It __begin, _End __end) @@ -296,7 +296,7 @@ public: } #endif -#if _LIBCPP_STD_VER > 20 && !defined(_LIBCPP_HAS_NO_RANGES) +#if _LIBCPP_STD_VER > 20 && !defined(_LIBCPP_HAS_NO_CONCEPTS) template <class _Range> requires ( !is_same_v<remove_cvref_t<_Range>, basic_string_view> && @@ -707,23 +707,23 @@ private: size_type __size; }; -#if _LIBCPP_STD_VER > 17 && !defined(_LIBCPP_HAS_NO_RANGES) +#if _LIBCPP_STD_VER > 17 && !defined(_LIBCPP_HAS_NO_CONCEPTS) template <class _CharT, class _Traits> inline constexpr bool ranges::enable_view<basic_string_view<_CharT, _Traits>> = true; template <class _CharT, class _Traits> inline constexpr bool ranges::enable_borrowed_range<basic_string_view<_CharT, _Traits> > = true; -#endif // _LIBCPP_STD_VER > 17 && !defined(_LIBCPP_HAS_NO_RANGES) +#endif // _LIBCPP_STD_VER > 17 && !defined(_LIBCPP_HAS_NO_CONCEPTS) // [string.view.deduct] -#if _LIBCPP_STD_VER > 17 && !defined(_LIBCPP_HAS_NO_RANGES) +#if _LIBCPP_STD_VER > 17 && !defined(_LIBCPP_HAS_NO_CONCEPTS) template <contiguous_iterator _It, sized_sentinel_for<_It> _End> basic_string_view(_It, _End) -> basic_string_view<iter_value_t<_It>>; #endif -#if _LIBCPP_STD_VER > 20 && !defined(_LIBCPP_HAS_NO_RANGES) +#if _LIBCPP_STD_VER > 20 && !defined(_LIBCPP_HAS_NO_CONCEPTS) template <ranges::contiguous_range _Range> basic_string_view(_Range) -> basic_string_view<ranges::range_value_t<_Range>>; #endif diff --git a/libcxx/include/variant b/libcxx/include/variant index 51bcd6ef4e98..21fd53729d55 100644 --- a/libcxx/include/variant +++ b/libcxx/include/variant @@ -1252,7 +1252,7 @@ template <class _Tp, class... _Types> using __best_match_t = typename invoke_result_t<_MakeOverloads<_Types...>, _Tp, _Tp>::type; -} // __variant_detail +} // namespace __variant_detail template <class... _Types> class _LIBCPP_TEMPLATE_VIS variant diff --git a/libcxx/src/filesystem/operations.cpp b/libcxx/src/filesystem/operations.cpp index 62bcfbff097f..7aeeffaae8f3 100644 --- a/libcxx/src/filesystem/operations.cpp +++ b/libcxx/src/filesystem/operations.cpp @@ -24,9 +24,10 @@ # define NOMINMAX # include <windows.h> #else -# include <unistd.h> +# include <dirent.h> # include <sys/stat.h> # include <sys/statvfs.h> +# include <unistd.h> #endif #include <time.h> #include <fcntl.h> /* values for fchmodat */ @@ -1338,6 +1339,19 @@ bool __remove(const path& p, error_code* ec) { return true; } +// We currently have two implementations of `__remove_all`. The first one is general and +// used on platforms where we don't have access to the `openat()` family of POSIX functions. +// That implementation uses `directory_iterator`, however it is vulnerable to some race +// conditions, see https://reviews.llvm.org/D118134 for details. +// +// The second implementation is used on platforms where `openat()` & friends are available, +// and it threads file descriptors through recursive calls to avoid such race conditions. +#if defined(_LIBCPP_WIN32API) +# define REMOVE_ALL_USE_DIRECTORY_ITERATOR +#endif + +#if defined(REMOVE_ALL_USE_DIRECTORY_ITERATOR) + namespace { uintmax_t remove_all_impl(path const& p, error_code& ec) { @@ -1377,6 +1391,97 @@ uintmax_t __remove_all(const path& p, error_code* ec) { return count; } +#else // !REMOVE_ALL_USE_DIRECTORY_ITERATOR + +namespace { + +template <class Cleanup> +struct scope_exit { + explicit scope_exit(Cleanup const& cleanup) + : cleanup_(cleanup) + { } + + ~scope_exit() { cleanup_(); } + +private: + Cleanup cleanup_; +}; + +uintmax_t remove_all_impl(int parent_directory, const path& p, error_code& ec) { + // First, try to open the path as a directory. + const int options = O_CLOEXEC | O_RDONLY | O_DIRECTORY | O_NOFOLLOW; + int fd = ::openat(parent_directory, p.c_str(), options); + if (fd != -1) { + // If that worked, iterate over the contents of the directory and + // remove everything in it, recursively. + scope_exit close_fd([=] { ::close(fd); }); + DIR* stream = ::fdopendir(fd); + if (stream == nullptr) { + ec = detail::capture_errno(); + return 0; + } + scope_exit close_stream([=] { ::closedir(stream); }); + + uintmax_t count = 0; + while (true) { + auto [str, type] = detail::posix_readdir(stream, ec); + static_assert(std::is_same_v<decltype(str), std::string_view>); + if (str == "." || str == "..") { + continue; + } else if (ec || str.empty()) { + break; // we're done iterating through the directory + } else { + count += remove_all_impl(fd, str, ec); + } + } + + // Then, remove the now-empty directory itself. + if (::unlinkat(parent_directory, p.c_str(), AT_REMOVEDIR) == -1) { + ec = detail::capture_errno(); + return count; + } + + return count + 1; // the contents of the directory + the directory itself + } + + ec = detail::capture_errno(); + + // If we failed to open `p` because it didn't exist, it's not an + // error -- it might have moved or have been deleted already. + if (ec == errc::no_such_file_or_directory) { + ec.clear(); + return 0; + } + + // If opening `p` failed because it wasn't a directory, remove it as + // a normal file instead. Note that `openat()` can return either ENOTDIR + // or ELOOP depending on the exact reason of the failure. + if (ec == errc::not_a_directory || ec == errc::too_many_symbolic_link_levels) { + ec.clear(); + if (::unlinkat(parent_directory, p.c_str(), /* flags = */0) == -1) { + ec = detail::capture_errno(); + return 0; + } + return 1; + } + + // Otherwise, it's a real error -- we don't remove anything. + return 0; +} + +} // end namespace + +uintmax_t __remove_all(const path& p, error_code* ec) { + ErrorHandler<uintmax_t> err("remove_all", ec, &p); + error_code mec; + uintmax_t count = remove_all_impl(AT_FDCWD, p, mec); + if (mec) + return err.report(mec); + return count; +} + +#endif // REMOVE_ALL_USE_DIRECTORY_ITERATOR + void __rename(const path& from, const path& to, error_code* ec) { ErrorHandler<void> err("rename", ec, &from, &to); if (detail::rename(from.c_str(), to.c_str()) == -1) diff --git a/libunwind/include/__libunwind_config.h b/libunwind/include/__libunwind_config.h index 221980a24aeb..67527d9da404 100644 --- a/libunwind/include/__libunwind_config.h +++ b/libunwind/include/__libunwind_config.h @@ -55,7 +55,7 @@ # define _LIBUNWIND_CONTEXT_SIZE 167 # define _LIBUNWIND_CURSOR_SIZE 179 # define _LIBUNWIND_HIGHEST_DWARF_REGISTER _LIBUNWIND_HIGHEST_DWARF_REGISTER_PPC64 -# elif defined(__ppc__) +# elif defined(__powerpc__) # define _LIBUNWIND_TARGET_PPC 1 # define _LIBUNWIND_CONTEXT_SIZE 117 # define _LIBUNWIND_CURSOR_SIZE 124 diff --git a/libunwind/src/UnwindRegistersRestore.S b/libunwind/src/UnwindRegistersRestore.S index 694738b5155a..f3d9dd31683e 100644 --- a/libunwind/src/UnwindRegistersRestore.S +++ b/libunwind/src/UnwindRegistersRestore.S @@ -394,7 +394,7 @@ Lnovec: PPC64_LR(3) bctr -#elif defined(__ppc__) +#elif defined(__powerpc__) DEFINE_LIBUNWIND_FUNCTION(_ZN9libunwind13Registers_ppc6jumptoEv) // diff --git a/libunwind/src/UnwindRegistersSave.S b/libunwind/src/UnwindRegistersSave.S index e77012e5c613..7af5c9d154fb 100644 --- a/libunwind/src/UnwindRegistersSave.S +++ b/libunwind/src/UnwindRegistersSave.S @@ -559,7 +559,7 @@ DEFINE_LIBUNWIND_FUNCTION(__unw_getcontext) blr -#elif defined(__ppc__) +#elif defined(__powerpc__) // // extern int unw_getcontext(unw_context_t* thread_state) diff --git a/libunwind/src/assembly.h b/libunwind/src/assembly.h index b17f2ade590b..978f6bd619bd 100644 --- a/libunwind/src/assembly.h +++ b/libunwind/src/assembly.h @@ -234,7 +234,7 @@ #endif #endif /* __arm__ */ -#if defined(__ppc__) || defined(__powerpc64__) +#if defined(__powerpc__) #define PPC_LEFT_SHIFT(index) << (index) #endif diff --git a/libunwind/src/config.h b/libunwind/src/config.h index f469d3c232e6..560edda04eaa 100644 --- a/libunwind/src/config.h +++ b/libunwind/src/config.h @@ -105,7 +105,7 @@ #define _LIBUNWIND_BUILD_SJLJ_APIS #endif -#if defined(__i386__) || defined(__x86_64__) || defined(__ppc__) || defined(__ppc64__) || defined(__powerpc64__) +#if defined(__i386__) || defined(__x86_64__) || defined(__powerpc__) #define _LIBUNWIND_SUPPORT_FRAME_APIS #endif diff --git a/libunwind/src/libunwind.cpp b/libunwind/src/libunwind.cpp index 48750ce670fb..ef48cbe860c7 100644 --- a/libunwind/src/libunwind.cpp +++ b/libunwind/src/libunwind.cpp @@ -51,7 +51,7 @@ _LIBUNWIND_HIDDEN int __unw_init_local(unw_cursor_t *cursor, # define REGISTER_KIND Registers_x86_64 #elif defined(__powerpc64__) # define REGISTER_KIND Registers_ppc64 -#elif defined(__ppc__) +#elif defined(__powerpc__) # define REGISTER_KIND Registers_ppc #elif defined(__aarch64__) # define REGISTER_KIND Registers_arm64 diff --git a/lld/Common/ErrorHandler.cpp b/lld/Common/ErrorHandler.cpp index 15b3bd058ee9..e557e533dedc 100644 --- a/lld/Common/ErrorHandler.cpp +++ b/lld/Common/ErrorHandler.cpp @@ -107,6 +107,13 @@ void lld::diagnosticHandler(const DiagnosticInfo &di) { SmallString<128> s; raw_svector_ostream os(s); DiagnosticPrinterRawOStream dp(os); + + // For an inline asm diagnostic, prepend the module name to get something like + // "$module <inline asm>:1:5: ". + if (auto *dism = dyn_cast<DiagnosticInfoSrcMgr>(&di)) + if (dism->isInlineAsmDiag()) + os << dism->getModuleName() << ' '; + di.print(dp); switch (di.getSeverity()) { case DS_Error: diff --git a/lld/ELF/Arch/AArch64.cpp b/lld/ELF/Arch/AArch64.cpp index 784d578312d7..5789bc935b63 100644 --- a/lld/ELF/Arch/AArch64.cpp +++ b/lld/ELF/Arch/AArch64.cpp @@ -591,6 +591,55 @@ AArch64Relaxer::AArch64Relaxer(ArrayRef<Relocation> relocs) { safeToRelaxAdrpLdr = i == size; } +bool AArch64Relaxer::tryRelaxAdrpAdd(const Relocation &adrpRel, + const Relocation &addRel, uint64_t secAddr, + uint8_t *buf) const { + // When the address of sym is within the range of ADR then + // we may relax + // ADRP xn, sym + // ADD xn, xn, :lo12: sym + // to + // NOP + // ADR xn, sym + if (!config->relax || adrpRel.type != R_AARCH64_ADR_PREL_PG_HI21 || + addRel.type != R_AARCH64_ADD_ABS_LO12_NC) + return false; + // Check if the relocations apply to consecutive instructions. + if (adrpRel.offset + 4 != addRel.offset) + return false; + if (adrpRel.sym != addRel.sym) + return false; + if (adrpRel.addend != 0 || addRel.addend != 0) + return false; + + uint32_t adrpInstr = read32le(buf + adrpRel.offset); + uint32_t addInstr = read32le(buf + addRel.offset); + // Check if the first instruction is ADRP and the second instruction is ADD. + if ((adrpInstr & 0x9f000000) != 0x90000000 || + (addInstr & 0xffc00000) != 0x91000000) + return false; + uint32_t adrpDestReg = adrpInstr & 0x1f; + uint32_t addDestReg = addInstr & 0x1f; + uint32_t addSrcReg = (addInstr >> 5) & 0x1f; + if (adrpDestReg != addDestReg || adrpDestReg != addSrcReg) + return false; + + Symbol &sym = *adrpRel.sym; + // Check if the address difference is within 1MiB range. + int64_t val = sym.getVA() - (secAddr + addRel.offset); + if (val < -1024 * 1024 || val >= 1024 * 1024) + return false; + + Relocation adrRel = {R_ABS, R_AARCH64_ADR_PREL_LO21, addRel.offset, + /*addend=*/0, &sym}; + // nop + write32le(buf + adrpRel.offset, 0xd503201f); + // adr x_<dest_reg> + write32le(buf + adrRel.offset, 0x10000000 | adrpDestReg); + target->relocate(buf + adrRel.offset, adrRel, val); + return true; +} + bool AArch64Relaxer::tryRelaxAdrpLdr(const Relocation &adrpRel, const Relocation &ldrRel, uint64_t secAddr, uint8_t *buf) const { @@ -657,6 +706,7 @@ bool AArch64Relaxer::tryRelaxAdrpLdr(const Relocation &adrpRel, getAArch64Page(secAddr + adrpSymRel.offset), 64)); target->relocate(buf + addRel.offset, addRel, SignExtend64(sym.getVA(), 64)); + tryRelaxAdrpAdd(adrpSymRel, addRel, secAddr, buf); return true; } diff --git a/lld/ELF/Config.h b/lld/ELF/Config.h index 47bbed125cb1..72b7be8165e0 100644 --- a/lld/ELF/Config.h +++ b/lld/ELF/Config.h @@ -352,12 +352,7 @@ static inline ArrayRef<VersionDefinition> namedVersionDefs() { return llvm::makeArrayRef(config->versionDefinitions).slice(2); } -static inline void errorOrWarn(const Twine &msg) { - if (!config->noinhibitExec) - error(msg); - else - warn(msg); -} +void errorOrWarn(const Twine &msg); static inline void internalLinkerError(StringRef loc, const Twine &msg) { errorOrWarn(loc + "internal linker error: " + msg + "\n" + diff --git a/lld/ELF/Driver.cpp b/lld/ELF/Driver.cpp index de26afddd28b..406571e4e9fd 100644 --- a/lld/ELF/Driver.cpp +++ b/lld/ELF/Driver.cpp @@ -77,6 +77,13 @@ std::unique_ptr<LinkerDriver> elf::driver; static void setConfigs(opt::InputArgList &args); static void readConfigs(opt::InputArgList &args); +void elf::errorOrWarn(const Twine &msg) { + if (config->noinhibitExec) + warn(msg); + else + error(msg); +} + bool elf::link(ArrayRef<const char *> args, llvm::raw_ostream &stdoutOS, llvm::raw_ostream &stderrOS, bool exitEarly, bool disableOutput) { @@ -189,8 +196,8 @@ std::vector<std::pair<MemoryBufferRef, uint64_t>> static getArchiveMembers( toString(std::move(err))); // Take ownership of memory buffers created for members of thin archives. - for (std::unique_ptr<MemoryBuffer> &mb : file->takeThinBuffers()) - make<std::unique_ptr<MemoryBuffer>>(std::move(mb)); + std::vector<std::unique_ptr<MemoryBuffer>> mbs = file->takeThinBuffers(); + std::move(mbs.begin(), mbs.end(), std::back_inserter(memoryBuffers)); return v; } @@ -546,22 +553,7 @@ void LinkerDriver::linkerMain(ArrayRef<const char *> argsArr) { // values such as a default image base address. target = getTarget(); - switch (config->ekind) { - case ELF32LEKind: - link<ELF32LE>(args); - break; - case ELF32BEKind: - link<ELF32BE>(args); - break; - case ELF64LEKind: - link<ELF64LE>(args); - break; - case ELF64BEKind: - link<ELF64BE>(args); - break; - default: - llvm_unreachable("unknown Config->EKind"); - } + link(args); } if (config->timeTraceEnabled) { @@ -1712,7 +1704,7 @@ static void handleUndefinedGlob(StringRef arg) { // Calling sym->extract() in the loop is not safe because it may add new // symbols to the symbol table, invalidating the current iterator. - std::vector<Symbol *> syms; + SmallVector<Symbol *, 0> syms; for (Symbol *sym : symtab->symbols()) if (!sym->isPlaceholder() && pat->match(sym->getName())) syms.push_back(sym); @@ -1821,7 +1813,6 @@ static void replaceCommonSymbols() { auto *bss = make<BssSection>("COMMON", s->size, s->alignment); bss->file = s->file; - bss->markDead(); inputSections.push_back(bss); s->replace(Defined{s->file, s->getName(), s->binding, s->stOther, s->type, /*value=*/0, s->size, bss}); @@ -2172,14 +2163,14 @@ static void checkAndReportMissingFeature(StringRef config, uint32_t features, // // This is also the case with AARCH64's BTI and PAC which use the similar // GNU_PROPERTY_AARCH64_FEATURE_1_AND mechanism. -template <class ELFT> static uint32_t getAndFeatures() { +static uint32_t getAndFeatures() { if (config->emachine != EM_386 && config->emachine != EM_X86_64 && config->emachine != EM_AARCH64) return 0; uint32_t ret = -1; - for (InputFile *f : objectFiles) { - uint32_t features = cast<ObjFile<ELFT>>(f)->andFeatures; + for (ELFFileBase *f : objectFiles) { + uint32_t features = f->andFeatures; checkAndReportMissingFeature( config->zBtiReport, features, GNU_PROPERTY_AARCH64_FEATURE_1_BTI, @@ -2225,7 +2216,7 @@ template <class ELFT> static uint32_t getAndFeatures() { // Do actual linking. Note that when this function is called, // all linker scripts have already been parsed. -template <class ELFT> void LinkerDriver::link(opt::InputArgList &args) { +void LinkerDriver::link(opt::InputArgList &args) { llvm::TimeTraceScope timeScope("Link", StringRef("LinkerDriver::Link")); // If a --hash-style option was not given, set to a default value, // which varies depending on the target. @@ -2393,7 +2384,7 @@ template <class ELFT> void LinkerDriver::link(opt::InputArgList &args) { // // With this the symbol table should be complete. After this, no new names // except a few linker-synthesized ones will be added to the symbol table. - compileBitcodeFiles<ELFT>(skipLinkedOutput); + invokeELFT(compileBitcodeFiles, skipLinkedOutput); // Symbol resolution finished. Report backward reference problems. reportBackrefs(); @@ -2434,7 +2425,7 @@ template <class ELFT> void LinkerDriver::link(opt::InputArgList &args) { llvm::TimeTraceScope timeScope("Strip sections"); llvm::erase_if(inputSections, [](InputSectionBase *s) { if (s->type == SHT_LLVM_SYMPART) { - readSymbolPartitionSection<ELFT>(s); + invokeELFT(readSymbolPartitionSection, s); return true; } @@ -2465,7 +2456,7 @@ template <class ELFT> void LinkerDriver::link(opt::InputArgList &args) { // Read .note.gnu.property sections from input object files which // contain a hint to tweak linker's and loader's behaviors. - config->andFeatures = getAndFeatures<ELFT>(); + config->andFeatures = getAndFeatures(); // The Target instance handles target-specific stuff, such as applying // relocations or writing a PLT section. It also contains target-dependent @@ -2501,10 +2492,10 @@ template <class ELFT> void LinkerDriver::link(opt::InputArgList &args) { inputSections.push_back(createCommentSection()); // Split SHF_MERGE and .eh_frame sections into pieces in preparation for garbage collection. - splitSections<ELFT>(); + invokeELFT(splitSections); // Garbage collection and removal of shared symbols from unused shared objects. - markLive<ELFT>(); + invokeELFT(markLive); demoteSharedSymbols(); // Make copies of any input sections that need to be copied into each @@ -2513,7 +2504,7 @@ template <class ELFT> void LinkerDriver::link(opt::InputArgList &args) { // Create synthesized sections such as .got and .plt. This is called before // processSectionCommands() so that they can be placed by SECTIONS commands. - createSyntheticSections<ELFT>(); + invokeELFT(createSyntheticSections); // Some input sections that are used for exception handling need to be moved // into synthetic sections. Do that now so that they aren't assigned to @@ -2552,8 +2543,8 @@ template <class ELFT> void LinkerDriver::link(opt::InputArgList &args) { // Two input sections with different output sections should not be folded. // ICF runs after processSectionCommands() so that we know the output sections. if (config->icf != ICFLevel::None) { - findKeepUniqueSections<ELFT>(args); - doIcf<ELFT>(); + invokeELFT(findKeepUniqueSections, args); + invokeELFT(doIcf); } // Read the callgraph now that we know what was gced or icfed @@ -2561,9 +2552,9 @@ template <class ELFT> void LinkerDriver::link(opt::InputArgList &args) { if (auto *arg = args.getLastArg(OPT_call_graph_ordering_file)) if (Optional<MemoryBufferRef> buffer = readFile(arg->getValue())) readCallGraph(*buffer); - readCallGraphsFromObjectFiles<ELFT>(); + invokeELFT(readCallGraphsFromObjectFiles); } // Write the result to the file. - writeResult<ELFT>(); + invokeELFT(writeResult); } diff --git a/lld/ELF/Driver.h b/lld/ELF/Driver.h index b8cbb3b19268..510ca626c6d1 100644 --- a/lld/ELF/Driver.h +++ b/lld/ELF/Driver.h @@ -33,7 +33,7 @@ public: private: void createFiles(llvm::opt::InputArgList &args); void inferMachineType(); - template <class ELFT> void link(llvm::opt::InputArgList &args); + void link(llvm::opt::InputArgList &args); template <class ELFT> void compileBitcodeFiles(bool skipLinkedOutput); // True if we are in --whole-archive and --no-whole-archive. diff --git a/lld/ELF/ICF.cpp b/lld/ELF/ICF.cpp index 3b991e8d3470..ed19bde41323 100644 --- a/lld/ELF/ICF.cpp +++ b/lld/ELF/ICF.cpp @@ -122,7 +122,7 @@ private: void forEachClass(llvm::function_ref<void(size_t, size_t)> fn); - std::vector<InputSection *> sections; + SmallVector<InputSection *, 0> sections; // We repeat the main loop while `Repeat` is true. std::atomic<bool> repeat; diff --git a/lld/ELF/InputFiles.cpp b/lld/ELF/InputFiles.cpp index 4da371c619f4..b5510b3b2736 100644 --- a/lld/ELF/InputFiles.cpp +++ b/lld/ELF/InputFiles.cpp @@ -13,6 +13,7 @@ #include "SymbolTable.h" #include "Symbols.h" #include "SyntheticSections.h" +#include "Target.h" #include "lld/Common/CommonLinkerContext.h" #include "lld/Common/DWARF.h" #include "llvm/ADT/STLExtras.h" @@ -217,24 +218,7 @@ template <class ELFT> static void doParseFile(InputFile *file) { } // Add symbols in File to the symbol table. -void elf::parseFile(InputFile *file) { - switch (config->ekind) { - case ELF32LEKind: - doParseFile<ELF32LE>(file); - return; - case ELF32BEKind: - doParseFile<ELF32BE>(file); - return; - case ELF64LEKind: - doParseFile<ELF64LE>(file); - return; - case ELF64BEKind: - doParseFile<ELF64BE>(file); - return; - default: - llvm_unreachable("unknown ELFT"); - } -} +void elf::parseFile(InputFile *file) { invokeELFT(doParseFile, file); } // Concatenates arguments to construct a string representing an error location. static std::string createFileLineMsg(StringRef path, unsigned line) { @@ -401,14 +385,15 @@ uint32_t ObjFile<ELFT>::getSectionIndex(const Elf_Sym &sym) const { } template <class ELFT> void ObjFile<ELFT>::parse(bool ignoreComdats) { + object::ELFFile<ELFT> obj = this->getObj(); // Read a section table. justSymbols is usually false. if (this->justSymbols) initializeJustSymbols(); else - initializeSections(ignoreComdats); + initializeSections(ignoreComdats, obj); // Read a symbol table. - initializeSymbols(); + initializeSymbols(obj); } // Sections with SHT_GROUP and comdat bits define comdat section groups. @@ -490,12 +475,12 @@ template <class ELFT> void ObjFile<ELFT>::initializeJustSymbols() { static void addDependentLibrary(StringRef specifier, const InputFile *f) { if (!config->dependentLibraries) return; - if (fs::exists(specifier)) - driver->addFile(specifier, /*withLOption=*/false); - else if (Optional<std::string> s = findFromSearchPaths(specifier)) + if (Optional<std::string> s = searchLibraryBaseName(specifier)) driver->addFile(*s, /*withLOption=*/true); - else if (Optional<std::string> s = searchLibraryBaseName(specifier)) + else if (Optional<std::string> s = findFromSearchPaths(specifier)) driver->addFile(*s, /*withLOption=*/true); + else if (fs::exists(specifier)) + driver->addFile(specifier, /*withLOption=*/false); else error(toString(f) + ": unable to find library from dependent library specifier: " + @@ -541,9 +526,8 @@ static void handleSectionGroup(ArrayRef<InputSectionBase *> sections, } template <class ELFT> -void ObjFile<ELFT>::initializeSections(bool ignoreComdats) { - const ELFFile<ELFT> &obj = this->getObj(); - +void ObjFile<ELFT>::initializeSections(bool ignoreComdats, + const llvm::object::ELFFile<ELFT> &obj) { ArrayRef<Elf_Shdr> objSections = getELFShdrs<ELFT>(); StringRef shstrtab = CHECK(obj.getSectionStringTable(objSections), this); uint64_t size = objSections.size(); @@ -602,7 +586,8 @@ void ObjFile<ELFT>::initializeSections(bool ignoreComdats) { .second; if (keepGroup) { if (config->relocatable) - this->sections[i] = createInputSection(i, sec, shstrtab); + this->sections[i] = createInputSection( + i, sec, check(obj.getSectionName(sec, shstrtab))); selectedGroups.push_back(entries); continue; } @@ -626,7 +611,8 @@ void ObjFile<ELFT>::initializeSections(bool ignoreComdats) { case SHT_NULL: break; default: - this->sections[i] = createInputSection(i, sec, shstrtab); + this->sections[i] = + createInputSection(i, sec, check(obj.getSectionName(sec, shstrtab))); } } @@ -891,10 +877,8 @@ InputSectionBase *ObjFile<ELFT>::getRelocTarget(uint32_t idx, template <class ELFT> InputSectionBase *ObjFile<ELFT>::createInputSection(uint32_t idx, const Elf_Shdr &sec, - StringRef shstrtab) { - StringRef name = CHECK(getObj().getSectionName(sec, shstrtab), this); - - if (config->emachine == EM_ARM && sec.sh_type == SHT_ARM_ATTRIBUTES) { + StringRef name) { + if (sec.sh_type == SHT_ARM_ATTRIBUTES && config->emachine == EM_ARM) { ARMAttributeParser attributes; ArrayRef<uint8_t> contents = check(this->getObj().getSectionContents(sec)); if (Error e = attributes.parse(contents, config->ekind == ELF32LEKind @@ -918,7 +902,7 @@ InputSectionBase *ObjFile<ELFT>::createInputSection(uint32_t idx, } } - if (config->emachine == EM_RISCV && sec.sh_type == SHT_RISCV_ATTRIBUTES) { + if (sec.sh_type == SHT_RISCV_ATTRIBUTES && config->emachine == EM_RISCV) { RISCVAttributeParser attributes; ArrayRef<uint8_t> contents = check(this->getObj().getSectionContents(sec)); if (Error e = attributes.parse(contents, support::little)) { @@ -1040,7 +1024,8 @@ InputSectionBase *ObjFile<ELFT>::createInputSection(uint32_t idx, // Initialize this->Symbols. this->Symbols is a parallel array as // its corresponding ELF symbol table. -template <class ELFT> void ObjFile<ELFT>::initializeSymbols() { +template <class ELFT> +void ObjFile<ELFT>::initializeSymbols(const object::ELFFile<ELFT> &obj) { ArrayRef<InputSectionBase *> sections(this->sections); SymbolTable &symtab = *elf::symtab; @@ -1053,7 +1038,11 @@ template <class ELFT> void ObjFile<ELFT>::initializeSymbols() { for (size_t i = 0, end = firstGlobal; i != end; ++i) { const Elf_Sym &eSym = eSyms[i]; - uint32_t secIdx = getSectionIndex(eSym); + uint32_t secIdx = eSym.st_shndx; + if (LLVM_UNLIKELY(secIdx == SHN_XINDEX)) + secIdx = check(getExtendedSymbolTableIndex<ELFT>(eSym, i, shndxTable)); + else if (secIdx >= SHN_LORESERVE) + secIdx = 0; if (LLVM_UNLIKELY(secIdx >= sections.size())) fatal(toString(this) + ": invalid section index: " + Twine(secIdx)); if (LLVM_UNLIKELY(eSym.getBinding() != STB_LOCAL)) @@ -1093,7 +1082,11 @@ template <class ELFT> void ObjFile<ELFT>::initializeSymbols() { Twine(firstGlobal) + ")"); continue; } - uint32_t secIdx = getSectionIndex(eSym); + uint32_t secIdx = eSym.st_shndx; + if (LLVM_UNLIKELY(secIdx == SHN_XINDEX)) + secIdx = check(getExtendedSymbolTableIndex<ELFT>(eSym, i, shndxTable)); + else if (secIdx >= SHN_LORESERVE) + secIdx = 0; if (LLVM_UNLIKELY(secIdx >= sections.size())) fatal(toString(this) + ": invalid section index: " + Twine(secIdx)); InputSectionBase *sec = sections[secIdx]; diff --git a/lld/ELF/InputFiles.h b/lld/ELF/InputFiles.h index 6febea3f437d..2bac30f54081 100644 --- a/lld/ELF/InputFiles.h +++ b/lld/ELF/InputFiles.h @@ -55,11 +55,8 @@ void parseFile(InputFile *file); // The root class of input files. class InputFile { -private: - // Cache for getNameForScript(). - mutable SmallString<0> nameForScriptCache; - protected: + SmallVector<Symbol *, 0> symbols; SmallVector<InputSectionBase *, 0> sections; public: @@ -103,15 +100,6 @@ public: // definition. bool shouldExtractForCommon(StringRef name); - // If not empty, this stores the name of the archive containing this file. - // We use this string for creating error messages. - SmallString<0> archiveName; - - // Cache for toString(). Only toString() should use this member. - mutable SmallString<0> toStringCache; - - SmallVector<Symbol *, 0> symbols; - // .got2 in the current file. This is used by PPC32 -fPIC/-fPIE to compute // offsets in PLT call stubs. InputSection *ppc32Got2 = nullptr; @@ -162,6 +150,17 @@ public: protected: InputFile(Kind k, MemoryBufferRef m); + +public: + // If not empty, this stores the name of the archive containing this file. + // We use this string for creating error messages. + SmallString<0> archiveName; + // Cache for toString(). Only toString() should use this member. + mutable SmallString<0> toStringCache; + +private: + // Cache for getNameForScript(). + mutable SmallString<0> nameForScriptCache; }; class ELFFileBase : public InputFile { @@ -212,6 +211,7 @@ protected: uint32_t firstGlobal = 0; public: + uint32_t andFeatures = 0; bool hasCommonSyms = false; }; @@ -268,8 +268,6 @@ public: // R_MIPS_GPREL16 / R_MIPS_GPREL32 relocations. uint32_t mipsGp0 = 0; - uint32_t andFeatures = 0; - // True if the file defines functions compiled with // -fsplit-stack. Usually false. bool splitStack = false; @@ -282,14 +280,15 @@ public: DWARFCache *getDwarf(); private: - void initializeSections(bool ignoreComdats); - void initializeSymbols(); + void initializeSections(bool ignoreComdats, + const llvm::object::ELFFile<ELFT> &obj); + void initializeSymbols(const llvm::object::ELFFile<ELFT> &obj); void initializeJustSymbols(); InputSectionBase *getRelocTarget(uint32_t idx, const Elf_Shdr &sec, uint32_t info); InputSectionBase *createInputSection(uint32_t idx, const Elf_Shdr &sec, - StringRef shstrtab); + StringRef name); bool shouldMerge(const Elf_Shdr &sec, StringRef name); diff --git a/lld/ELF/InputSection.cpp b/lld/ELF/InputSection.cpp index 943cf18e6cf0..4b047f75ad69 100644 --- a/lld/ELF/InputSection.cpp +++ b/lld/ELF/InputSection.cpp @@ -27,7 +27,6 @@ #include <algorithm> #include <mutex> #include <set> -#include <unordered_set> #include <vector> using namespace llvm; @@ -84,22 +83,7 @@ InputSectionBase::InputSectionBase(InputFile *file, uint64_t flags, if (!zlib::isAvailable()) error(toString(file) + ": contains a compressed section, " + "but zlib is not available"); - switch (config->ekind) { - case ELF32LEKind: - parseCompressedHeader<ELF32LE>(); - break; - case ELF32BEKind: - parseCompressedHeader<ELF32BE>(); - break; - case ELF64LEKind: - parseCompressedHeader<ELF64LE>(); - break; - case ELF64BEKind: - parseCompressedHeader<ELF64BE>(); - break; - default: - llvm_unreachable("unknown ELFT"); - } + invokeELFT(parseCompressedHeader); } } @@ -366,7 +350,7 @@ template <class ELFT> void InputSection::copyShtGroup(uint8_t *buf) { // different in the output. We also need to handle combined or discarded // members. ArrayRef<InputSectionBase *> sections = file->getSections(); - std::unordered_set<uint32_t> seen; + DenseSet<uint32_t> seen; for (uint32_t idx : from.slice(1)) { OutputSection *osec = sections[idx]->getOutputSection(); if (osec && seen.insert(osec->sectionIndex).second) @@ -1041,6 +1025,14 @@ void InputSectionBase::relocateAlloc(uint8_t *buf, uint8_t *bufEnd) { } target.relocate(bufLoc, rel, targetVA); break; + case R_AARCH64_PAGE_PC: + if (i + 1 < size && aarch64relaxer.tryRelaxAdrpAdd( + rel, relocations[i + 1], secAddr, buf)) { + ++i; + continue; + } + target.relocate(bufLoc, rel, targetVA); + break; case R_PPC64_RELAX_GOT_PC: { // The R_PPC64_PCREL_OPT relocation must appear immediately after // R_PPC64_GOT_PCREL34 in the relocations table at the same offset. @@ -1127,7 +1119,8 @@ void InputSectionBase::relocateAlloc(uint8_t *buf, uint8_t *bufEnd) { // For each function-defining prologue, find any calls to __morestack, // and replace them with calls to __morestack_non_split. static void switchMorestackCallsToMorestackNonSplit( - DenseSet<Defined *> &prologues, std::vector<Relocation *> &morestackCalls) { + DenseSet<Defined *> &prologues, + SmallVector<Relocation *, 0> &morestackCalls) { // If the target adjusted a function's prologue, all calls to // __morestack inside that function should be switched to @@ -1177,7 +1170,7 @@ template <class ELFT> void InputSectionBase::adjustSplitStackFunctionPrologues(uint8_t *buf, uint8_t *end) { DenseSet<Defined *> prologues; - std::vector<Relocation *> morestackCalls; + SmallVector<Relocation *, 0> morestackCalls; for (Relocation &rel : relocations) { // Ignore calls into the split-stack api. @@ -1359,16 +1352,12 @@ void EhInputSection::split(ArrayRef<RelTy> rels) { } static size_t findNull(StringRef s, size_t entSize) { - // Optimize the common case. - if (entSize == 1) - return s.find(0); - for (unsigned i = 0, n = s.size(); i != n; i += entSize) { const char *b = s.begin() + i; if (std::all_of(b, b + entSize, [](char c) { return c == 0; })) return i; } - return StringRef::npos; + llvm_unreachable(""); } SyntheticSection *MergeInputSection::getParent() const { @@ -1377,20 +1366,24 @@ SyntheticSection *MergeInputSection::getParent() const { // Split SHF_STRINGS section. Such section is a sequence of // null-terminated strings. -void MergeInputSection::splitStrings(ArrayRef<uint8_t> data, size_t entSize) { - size_t off = 0; +void MergeInputSection::splitStrings(StringRef s, size_t entSize) { const bool live = !(flags & SHF_ALLOC) || !config->gcSections; - StringRef s = toStringRef(data); - - while (!s.empty()) { - size_t end = findNull(s, entSize); - if (end == StringRef::npos) - fatal(toString(this) + ": string is not null terminated"); - size_t size = end + entSize; - - pieces.emplace_back(off, xxHash64(s.substr(0, size)), live); - s = s.substr(size); - off += size; + const char *p = s.data(), *end = s.data() + s.size(); + if (!std::all_of(end - entSize, end, [](char c) { return c == 0; })) + fatal(toString(this) + ": string is not null terminated"); + if (entSize == 1) { + // Optimize the common case. + do { + size_t size = strlen(p) + 1; + pieces.emplace_back(p - s.begin(), xxHash64(StringRef(p, size)), live); + p += size; + } while (p != end); + } else { + do { + size_t size = findNull(StringRef(p, end - p), entSize) + entSize; + pieces.emplace_back(p - s.begin(), xxHash64(StringRef(p, size)), live); + p += size; + } while (p != end); } } @@ -1402,7 +1395,7 @@ void MergeInputSection::splitNonStrings(ArrayRef<uint8_t> data, assert((size % entSize) == 0); const bool live = !(flags & SHF_ALLOC) || !config->gcSections; - pieces.assign(size / entSize, SectionPiece(0, 0, false)); + pieces.resize_for_overwrite(size / entSize); for (size_t i = 0, j = 0; i != size; i += entSize, j++) pieces[j] = {i, (uint32_t)xxHash64(data.slice(i, entSize)), live}; } @@ -1429,7 +1422,7 @@ void MergeInputSection::splitIntoPieces() { assert(pieces.empty()); if (flags & SHF_STRINGS) - splitStrings(data(), entsize); + splitStrings(toStringRef(data()), entsize); else splitNonStrings(data(), entsize); } diff --git a/lld/ELF/InputSection.h b/lld/ELF/InputSection.h index d7dea9d2587a..f060a6337cf7 100644 --- a/lld/ELF/InputSection.h +++ b/lld/ELF/InputSection.h @@ -65,7 +65,7 @@ public: // The 1-indexed partition that this section is assigned to by the garbage // collector, or 0 if this section is dead. Normally there is only one // partition, so this will either be 0 or 1. - uint8_t partition; + uint8_t partition = 1; elf::Partition &getPartition() const; // These corresponds to the fields in Elf_Shdr. @@ -96,8 +96,8 @@ protected: uint32_t entsize, uint32_t alignment, uint32_t type, uint32_t info, uint32_t link) : name(name), sectionKind(sectionKind), bss(false), keepUnique(false), - partition(0), alignment(alignment), flags(flags), entsize(entsize), - type(type), link(link), info(info) {} + alignment(alignment), flags(flags), entsize(entsize), type(type), + link(link), info(info) {} }; // This corresponds to a section of an input file. @@ -119,6 +119,12 @@ public: // its static type. InputFile *file; + // Input sections are part of an output section. Special sections + // like .eh_frame and merge sections are first combined into a + // synthetic section that is then added to an output section. In all + // cases this points one level up. + SectionBase *parent = nullptr; + // Section index of the relocation section if exists. uint32_t relSecIdx = 0; @@ -159,12 +165,6 @@ public: return rawData; } - // Input sections are part of an output section. Special sections - // like .eh_frame and merge sections are first combined into a - // synthetic section that is then added to an output section. In all - // cases this points one level up. - SectionBase *parent = nullptr; - // The next member in the section group if this section is in a group. This is // used by --gc-sections. InputSectionBase *nextInSectionGroup = nullptr; @@ -242,6 +242,7 @@ protected: // have to be as compact as possible, which is why we don't store the size (can // be found by looking at the next one). struct SectionPiece { + SectionPiece() = default; SectionPiece(size_t off, uint32_t hash, bool live) : inputOff(off), live(live), hash(hash >> 1) {} @@ -292,7 +293,7 @@ public: SyntheticSection *getParent() const; private: - void splitStrings(ArrayRef<uint8_t> a, size_t size); + void splitStrings(StringRef s, size_t size); void splitNonStrings(ArrayRef<uint8_t> a, size_t size); }; diff --git a/lld/ELF/LTO.cpp b/lld/ELF/LTO.cpp index 5b7ac6a5e925..88fcd8baf9c9 100644 --- a/lld/ELF/LTO.cpp +++ b/lld/ELF/LTO.cpp @@ -292,7 +292,7 @@ static void thinLTOCreateEmptyIndexFiles() { ModuleSummaryIndex m(/*HaveGVs*/ false); m.setSkipModuleByDistributedBackend(); - WriteIndexToFile(m, *os); + writeIndexToFile(m, *os); if (config->thinLTOEmitImportsFiles) openFile(path + ".imports"); } diff --git a/lld/ELF/LinkerScript.cpp b/lld/ELF/LinkerScript.cpp index bfb583453735..56d94744fae5 100644 --- a/lld/ELF/LinkerScript.cpp +++ b/lld/ELF/LinkerScript.cpp @@ -310,7 +310,7 @@ void LinkerScript::processInsertCommands() { for (const InsertCommand &cmd : insertCommands) { for (StringRef name : cmd.names) { // If base is empty, it may have been discarded by - // adjustSectionsBeforeSorting(). We do not handle such output sections. + // adjustOutputSections(). We do not handle such output sections. auto from = llvm::find_if(sectionCommands, [&](SectionCommand *subCmd) { return isa<OutputSection>(subCmd) && cast<OutputSection>(subCmd)->name == name; @@ -1114,7 +1114,7 @@ static void maybePropagatePhdrs(OutputSection &sec, } } -void LinkerScript::adjustSectionsBeforeSorting() { +void LinkerScript::adjustOutputSections() { // If the output section contains only symbol assignments, create a // corresponding output section. The issue is what to do with linker script // like ".foo : { symbol = 42; }". One option would be to convert it to @@ -1148,14 +1148,16 @@ void LinkerScript::adjustSectionsBeforeSorting() { sec->alignment = std::max<uint32_t>(sec->alignment, sec->alignExpr().getValue()); - // The input section might have been removed (if it was an empty synthetic - // section), but we at least know the flags. - if (sec->hasInputSections) + bool isEmpty = (getFirstInputSection(sec) == nullptr); + bool discardable = isEmpty && isDiscardable(*sec); + // If sec has at least one input section and not discarded, remember its + // flags to be inherited by subsequent output sections. (sec may contain + // just one empty synthetic section.) + if (sec->hasInputSections && !discardable) flags = sec->flags; // We do not want to keep any special flags for output section // in case it is empty. - bool isEmpty = (getFirstInputSection(sec) == nullptr); if (isEmpty) sec->flags = flags & ((sec->nonAlloc ? 0 : (uint64_t)SHF_ALLOC) | SHF_WRITE | SHF_EXECINSTR); @@ -1172,7 +1174,7 @@ void LinkerScript::adjustSectionsBeforeSorting() { if (sec->sectionIndex != UINT32_MAX) maybePropagatePhdrs(*sec, defPhdrs); - if (isEmpty && isDiscardable(*sec)) { + if (discardable) { sec->markDead(); cmd = nullptr; } diff --git a/lld/ELF/LinkerScript.h b/lld/ELF/LinkerScript.h index d2a6f5e9acb1..24c2c632f93b 100644 --- a/lld/ELF/LinkerScript.h +++ b/lld/ELF/LinkerScript.h @@ -319,7 +319,7 @@ public: void addOrphanSections(); void diagnoseOrphanHandling() const; - void adjustSectionsBeforeSorting(); + void adjustOutputSections(); void adjustSectionsAfterSorting(); SmallVector<PhdrEntry *, 0> createPhdrs(); diff --git a/lld/ELF/MarkLive.cpp b/lld/ELF/MarkLive.cpp index 597c0684b8b2..017507c1abd0 100644 --- a/lld/ELF/MarkLive.cpp +++ b/lld/ELF/MarkLive.cpp @@ -371,9 +371,6 @@ template <class ELFT> void elf::markLive() { llvm::TimeTraceScope timeScope("markLive"); // If --gc-sections is not given, retain all input sections. if (!config->gcSections) { - for (InputSectionBase *sec : inputSections) - sec->markLive(); - // If a DSO defines a symbol referenced in a regular object, it is needed. for (Symbol *sym : symtab->symbols()) if (auto *s = dyn_cast<SharedSymbol>(sym)) @@ -382,6 +379,9 @@ template <class ELFT> void elf::markLive() { return; } + for (InputSectionBase *sec : inputSections) + sec->markDead(); + // Follow the graph to mark all live sections. for (unsigned curPart = 1; curPart <= partitions.size(); ++curPart) MarkLive<ELFT>(curPart).run(); diff --git a/lld/ELF/OutputSections.cpp b/lld/ELF/OutputSections.cpp index c73d6e439238..241b3ea3b418 100644 --- a/lld/ELF/OutputSections.cpp +++ b/lld/ELF/OutputSections.cpp @@ -12,6 +12,7 @@ #include "SymbolTable.h" #include "SyntheticSections.h" #include "Target.h" +#include "lld/Common/Arrays.h" #include "lld/Common/Memory.h" #include "lld/Common/Strings.h" #include "llvm/BinaryFormat/Dwarf.h" @@ -21,8 +22,6 @@ #include "llvm/Support/Parallel.h" #include "llvm/Support/SHA1.h" #include "llvm/Support/TimeProfiler.h" -#include <regex> -#include <unordered_set> #if LLVM_ENABLE_ZLIB #include <zlib.h> #endif @@ -342,12 +341,8 @@ template <class ELFT> void OutputSection::maybeCompress() { // Split input into 1-MiB shards. constexpr size_t shardSize = 1 << 20; - const size_t numShards = (size + shardSize - 1) / shardSize; - auto shardsIn = std::make_unique<ArrayRef<uint8_t>[]>(numShards); - for (size_t i = 0, start = 0, end; start != size; ++i, start = end) { - end = std::min(start + shardSize, (size_t)size); - shardsIn[i] = makeArrayRef<uint8_t>(buf.get() + start, end - start); - } + auto shardsIn = split(makeArrayRef<uint8_t>(buf.get(), size), shardSize); + const size_t numShards = shardsIn.size(); // Compress shards and compute Alder-32 checksums. Use Z_SYNC_FLUSH for all // shards but the last to flush the output to a byte boundary to be @@ -471,7 +466,7 @@ static void finalizeShtGroup(OutputSection *os, // Some group members may be combined or discarded, so we need to compute the // new size. The content will be rewritten in InputSection::copyShtGroup. - std::unordered_set<uint32_t> seen; + DenseSet<uint32_t> seen; ArrayRef<InputSectionBase *> sections = section->file->getSections(); for (const uint32_t &idx : section->getDataAs<uint32_t>().slice(1)) if (OutputSection *osec = sections[read32(&idx)]->getOutputSection()) @@ -521,18 +516,15 @@ void OutputSection::finalize() { // crtbegin files. // // Gcc uses any of crtbegin[<empty>|S|T].o. -// Clang uses Gcc's plus clang_rt.crtbegin[<empty>|S|T][-<arch>|<empty>].o. - -static bool isCrtbegin(StringRef s) { - static std::regex re(R"((clang_rt\.)?crtbegin[ST]?(-.*)?\.o)"); - s = sys::path::filename(s); - return std::regex_match(s.begin(), s.end(), re); -} +// Clang uses Gcc's plus clang_rt.crtbegin[-<arch>|<empty>].o. -static bool isCrtend(StringRef s) { - static std::regex re(R"((clang_rt\.)?crtend[ST]?(-.*)?\.o)"); +static bool isCrt(StringRef s, StringRef beginEnd) { s = sys::path::filename(s); - return std::regex_match(s.begin(), s.end(), re); + if (!s.consume_back(".o")) + return false; + if (s.consume_front("clang_rt.")) + return s.consume_front(beginEnd); + return s.consume_front(beginEnd) && s.size() <= 1; } // .ctors and .dtors are sorted by this order: @@ -554,12 +546,12 @@ static bool isCrtend(StringRef s) { // are too many real-world use cases of .ctors, so we had no choice to // support that with this rather ad-hoc semantics. static bool compCtors(const InputSection *a, const InputSection *b) { - bool beginA = isCrtbegin(a->file->getName()); - bool beginB = isCrtbegin(b->file->getName()); + bool beginA = isCrt(a->file->getName(), "crtbegin"); + bool beginB = isCrt(b->file->getName(), "crtbegin"); if (beginA != beginB) return beginA; - bool endA = isCrtend(a->file->getName()); - bool endB = isCrtend(b->file->getName()); + bool endA = isCrt(a->file->getName(), "crtend"); + bool endB = isCrt(b->file->getName(), "crtend"); if (endA != endB) return endB; return getPriority(a->name) > getPriority(b->name); diff --git a/lld/ELF/SyntheticSections.cpp b/lld/ELF/SyntheticSections.cpp index f125e3f0a51a..986c1308cbaf 100644 --- a/lld/ELF/SyntheticSections.cpp +++ b/lld/ELF/SyntheticSections.cpp @@ -83,8 +83,10 @@ static ArrayRef<uint8_t> getVersion() { // by "readelf --string-dump .comment <file>". // The returned object is a mergeable string section. MergeInputSection *elf::createCommentSection() { - return make<MergeInputSection>(SHF_MERGE | SHF_STRINGS, SHT_PROGBITS, 1, - getVersion(), ".comment"); + auto *sec = make<MergeInputSection>(SHF_MERGE | SHF_STRINGS, SHT_PROGBITS, 1, + getVersion(), ".comment"); + sec->splitIntoPieces(); + return sec; } // .MIPS.abiflags section. @@ -100,7 +102,7 @@ template <class ELFT> void MipsAbiFlagsSection<ELFT>::writeTo(uint8_t *buf) { } template <class ELFT> -MipsAbiFlagsSection<ELFT> *MipsAbiFlagsSection<ELFT>::create() { +std::unique_ptr<MipsAbiFlagsSection<ELFT>> MipsAbiFlagsSection<ELFT>::create() { Elf_Mips_ABIFlags flags = {}; bool create = false; @@ -142,7 +144,7 @@ MipsAbiFlagsSection<ELFT> *MipsAbiFlagsSection<ELFT>::create() { }; if (create) - return make<MipsAbiFlagsSection<ELFT>>(flags); + return std::make_unique<MipsAbiFlagsSection<ELFT>>(flags); return nullptr; } @@ -165,7 +167,7 @@ template <class ELFT> void MipsOptionsSection<ELFT>::writeTo(uint8_t *buf) { } template <class ELFT> -MipsOptionsSection<ELFT> *MipsOptionsSection<ELFT>::create() { +std::unique_ptr<MipsOptionsSection<ELFT>> MipsOptionsSection<ELFT>::create() { // N64 ABI only. if (!ELFT::Is64Bits) return nullptr; @@ -204,7 +206,7 @@ MipsOptionsSection<ELFT> *MipsOptionsSection<ELFT>::create() { } }; - return make<MipsOptionsSection<ELFT>>(reginfo); + return std::make_unique<MipsOptionsSection<ELFT>>(reginfo); } // MIPS .reginfo section. @@ -222,7 +224,7 @@ template <class ELFT> void MipsReginfoSection<ELFT>::writeTo(uint8_t *buf) { } template <class ELFT> -MipsReginfoSection<ELFT> *MipsReginfoSection<ELFT>::create() { +std::unique_ptr<MipsReginfoSection<ELFT>> MipsReginfoSection<ELFT>::create() { // Section should be alive for O32 and N32 ABIs only. if (ELFT::Is64Bits) return nullptr; @@ -249,7 +251,7 @@ MipsReginfoSection<ELFT> *MipsReginfoSection<ELFT>::create() { sec->getFile<ELFT>()->mipsGp0 = r->ri_gp_value; }; - return make<MipsReginfoSection<ELFT>>(reginfo); + return std::make_unique<MipsReginfoSection<ELFT>>(reginfo); } InputSection *elf::createInterpSection() { @@ -1228,7 +1230,8 @@ StringTableSection::StringTableSection(StringRef name, bool dynamic) : SyntheticSection(dynamic ? (uint64_t)SHF_ALLOC : 0, SHT_STRTAB, 1, name), dynamic(dynamic) { // ELF string tables start with a NUL byte. - addString(""); + strings.push_back(""); + size = 1; } // Adds a string to the string table. If `hashIt` is true we hash and check for @@ -1241,6 +1244,8 @@ unsigned StringTableSection::addString(StringRef s, bool hashIt) { if (!r.second) return r.first->second; } + if (s.empty()) + return 0; unsigned ret = this->size; this->size = this->size + s.size() + 1; strings.push_back(s); @@ -1589,9 +1594,11 @@ uint32_t DynamicReloc::getSymIndex(SymbolTableBaseSection *symTab) const { RelocationBaseSection::RelocationBaseSection(StringRef name, uint32_t type, int32_t dynamicTag, - int32_t sizeDynamicTag) + int32_t sizeDynamicTag, + bool combreloc) : SyntheticSection(SHF_ALLOC, type, config->wordsize, name), - dynamicTag(dynamicTag), sizeDynamicTag(sizeDynamicTag) {} + dynamicTag(dynamicTag), sizeDynamicTag(sizeDynamicTag), + combreloc(combreloc) {} void RelocationBaseSection::addSymbolReloc(RelType dynType, InputSectionBase &isec, @@ -1640,10 +1647,13 @@ void RelocationBaseSection::addReloc(DynamicReloc::Kind kind, RelType dynType, addReloc({dynType, &inputSec, offsetInSec, kind, sym, addend, expr}); } -void RelocationBaseSection::addReloc(const DynamicReloc &reloc) { - if (reloc.type == target->relativeRel) - ++numRelativeRelocs; - relocs.push_back(reloc); +void RelocationBaseSection::partitionRels() { + if (!combreloc) + return; + const RelType relativeRel = target->relativeRel; + numRelativeRelocs = + llvm::partition(relocs, [=](auto &r) { return r.type == relativeRel; }) - + relocs.begin(); } void RelocationBaseSection::finalizeContents() { @@ -1667,20 +1677,6 @@ void RelocationBaseSection::finalizeContents() { } } -RelrBaseSection::RelrBaseSection() - : SyntheticSection(SHF_ALLOC, - config->useAndroidRelrTags ? SHT_ANDROID_RELR : SHT_RELR, - config->wordsize, ".relr.dyn") {} - -template <class ELFT> -static void encodeDynamicReloc(typename ELFT::Rela *p, - const DynamicReloc &rel) { - p->r_offset = rel.r_offset; - p->setSymbolAndType(rel.r_sym, rel.type, config->isMips64EL); - if (config->isRela) - p->r_addend = rel.addend; -} - void DynamicReloc::computeRaw(SymbolTableBaseSection *symtab) { r_offset = getOffset(); r_sym = getSymIndex(symtab); @@ -1688,27 +1684,15 @@ void DynamicReloc::computeRaw(SymbolTableBaseSection *symtab) { kind = AddendOnly; // Catch errors } -template <class ELFT> -RelocationSection<ELFT>::RelocationSection(StringRef name, bool sort) - : RelocationBaseSection(name, config->isRela ? SHT_RELA : SHT_REL, - config->isRela ? DT_RELA : DT_REL, - config->isRela ? DT_RELASZ : DT_RELSZ), - sort(sort) { - this->entsize = config->isRela ? sizeof(Elf_Rela) : sizeof(Elf_Rel); -} - -template <class ELFT> void RelocationSection<ELFT>::writeTo(uint8_t *buf) { +void RelocationBaseSection::computeRels() { SymbolTableBaseSection *symTab = getPartition().dynSymTab.get(); - parallelForEach(relocs, [symTab](DynamicReloc &rel) { rel.computeRaw(symTab); }); // Sort by (!IsRelative,SymIndex,r_offset). DT_REL[A]COUNT requires us to // place R_*_RELATIVE first. SymIndex is to improve locality, while r_offset // is to make results easier to read. - if (sort) { - const RelType relativeRel = target->relativeRel; - auto nonRelative = - llvm::partition(relocs, [=](auto &r) { return r.type == relativeRel; }); + if (combreloc) { + auto nonRelative = relocs.begin() + numRelativeRelocs; parallelSort(relocs.begin(), nonRelative, [&](auto &a, auto &b) { return a.r_offset < b.r_offset; }); // Non-relative relocations are few, so don't bother with parallelSort. @@ -1716,20 +1700,41 @@ template <class ELFT> void RelocationSection<ELFT>::writeTo(uint8_t *buf) { return std::tie(a.r_sym, a.r_offset) < std::tie(b.r_sym, b.r_offset); }); } +} +template <class ELFT> +RelocationSection<ELFT>::RelocationSection(StringRef name, bool combreloc) + : RelocationBaseSection(name, config->isRela ? SHT_RELA : SHT_REL, + config->isRela ? DT_RELA : DT_REL, + config->isRela ? DT_RELASZ : DT_RELSZ, combreloc) { + this->entsize = config->isRela ? sizeof(Elf_Rela) : sizeof(Elf_Rel); +} + +template <class ELFT> void RelocationSection<ELFT>::writeTo(uint8_t *buf) { + computeRels(); for (const DynamicReloc &rel : relocs) { - encodeDynamicReloc<ELFT>(reinterpret_cast<Elf_Rela *>(buf), rel); + auto *p = reinterpret_cast<Elf_Rela *>(buf); + p->r_offset = rel.r_offset; + p->setSymbolAndType(rel.r_sym, rel.type, config->isMips64EL); + if (config->isRela) + p->r_addend = rel.addend; buf += config->isRela ? sizeof(Elf_Rela) : sizeof(Elf_Rel); } } +RelrBaseSection::RelrBaseSection() + : SyntheticSection(SHF_ALLOC, + config->useAndroidRelrTags ? SHT_ANDROID_RELR : SHT_RELR, + config->wordsize, ".relr.dyn") {} + template <class ELFT> AndroidPackedRelocationSection<ELFT>::AndroidPackedRelocationSection( StringRef name) : RelocationBaseSection( name, config->isRela ? SHT_ANDROID_RELA : SHT_ANDROID_REL, config->isRela ? DT_ANDROID_RELA : DT_ANDROID_REL, - config->isRela ? DT_ANDROID_RELASZ : DT_ANDROID_RELSZ) { + config->isRela ? DT_ANDROID_RELASZ : DT_ANDROID_RELSZ, + /*combreloc=*/false) { this->entsize = 1; } @@ -2153,7 +2158,7 @@ void SymbolTableBaseSection::addSymbol(Symbol *b) { // Adding a local symbol to a .dynsym is a bug. assert(this->type != SHT_DYNSYM || !b->isLocal()); - bool hashIt = b->isLocal(); + bool hashIt = b->isLocal() && config->optimize >= 2; symbols.push_back({b, strTabSec.addString(b->getName(), hashIt)}); } @@ -2765,13 +2770,13 @@ readAddressAreas(DWARFContext &dwarf, InputSection *sec) { } template <class ELFT> -static std::vector<GdbIndexSection::NameAttrEntry> +static SmallVector<GdbIndexSection::NameAttrEntry, 0> readPubNamesAndTypes(const LLDDwarfObj<ELFT> &obj, const SmallVectorImpl<GdbIndexSection::CuEntry> &cus) { const LLDDWARFSection &pubNames = obj.getGnuPubnamesSection(); const LLDDWARFSection &pubTypes = obj.getGnuPubtypesSection(); - std::vector<GdbIndexSection::NameAttrEntry> ret; + SmallVector<GdbIndexSection::NameAttrEntry, 0> ret; for (const LLDDWARFSection *pub : {&pubNames, &pubTypes}) { DWARFDataExtractor data(obj, *pub, config->isLE, config->wordsize); DWARFDebugPubTable table; @@ -2798,9 +2803,9 @@ readPubNamesAndTypes(const LLDDwarfObj<ELFT> &obj, // Create a list of symbols from a given list of symbol names and types // by uniquifying them by name. -static std::vector<GdbIndexSection::GdbSymbol> -createSymbols(ArrayRef<std::vector<GdbIndexSection::NameAttrEntry>> nameAttrs, - const std::vector<GdbIndexSection::GdbChunk> &chunks) { +static SmallVector<GdbIndexSection::GdbSymbol, 0> createSymbols( + ArrayRef<SmallVector<GdbIndexSection::NameAttrEntry, 0>> nameAttrs, + const SmallVector<GdbIndexSection::GdbChunk, 0> &chunks) { using GdbSymbol = GdbIndexSection::GdbSymbol; using NameAttrEntry = GdbIndexSection::NameAttrEntry; @@ -2827,7 +2832,7 @@ createSymbols(ArrayRef<std::vector<GdbIndexSection::NameAttrEntry>> nameAttrs, size_t shift = 32 - countTrailingZeros(numShards); // Instantiate GdbSymbols while uniqufying them by name. - auto symbols = std::make_unique<std::vector<GdbSymbol>[]>(numShards); + auto symbols = std::make_unique<SmallVector<GdbSymbol, 0>[]>(numShards); parallelForEachN(0, concurrency, [&](size_t threadId) { uint32_t i = 0; @@ -2857,9 +2862,9 @@ createSymbols(ArrayRef<std::vector<GdbIndexSection::NameAttrEntry>> nameAttrs, // The return type is a flattened vector, so we'll copy each vector // contents to Ret. - std::vector<GdbSymbol> ret; + SmallVector<GdbSymbol, 0> ret; ret.reserve(numSymbols); - for (std::vector<GdbSymbol> &vec : + for (SmallVector<GdbSymbol, 0> &vec : makeMutableArrayRef(symbols.get(), numShards)) for (GdbSymbol &sym : vec) ret.push_back(std::move(sym)); @@ -2906,8 +2911,8 @@ template <class ELFT> GdbIndexSection *GdbIndexSection::create() { return !s->isLive(); }); - std::vector<GdbChunk> chunks(files.size()); - std::vector<std::vector<NameAttrEntry>> nameAttrs(files.size()); + SmallVector<GdbChunk, 0> chunks(files.size()); + SmallVector<SmallVector<NameAttrEntry, 0>, 0> nameAttrs(files.size()); parallelForEachN(0, files.size(), [&](size_t i) { // To keep memory usage low, we don't want to keep cached DWARFContext, so @@ -3328,11 +3333,15 @@ template <class ELFT> void elf::splitSections() { llvm::TimeTraceScope timeScope("Split sections"); // splitIntoPieces needs to be called on each MergeInputSection // before calling finalizeContents(). - parallelForEach(inputSections, [](InputSectionBase *sec) { - if (auto *s = dyn_cast<MergeInputSection>(sec)) - s->splitIntoPieces(); - else if (auto *eh = dyn_cast<EhInputSection>(sec)) - eh->split<ELFT>(); + parallelForEach(objectFiles, [](ELFFileBase *file) { + for (InputSectionBase *sec : file->getSections()) { + if (!sec) + continue; + if (auto *s = dyn_cast<MergeInputSection>(sec)) + s->splitIntoPieces(); + else if (auto *eh = dyn_cast<EhInputSection>(sec)) + eh->split<ELFT>(); + } }); } @@ -3476,7 +3485,7 @@ void ARMExidxSyntheticSection::finalizeContents() { sentinel = executableSections.back(); // Optionally merge adjacent duplicate entries. if (config->mergeArmExidx) { - std::vector<InputSection *> selectedSections; + SmallVector<InputSection *, 0> selectedSections; selectedSections.reserve(executableSections.size()); selectedSections.push_back(executableSections[0]); size_t prev = 0; @@ -3813,7 +3822,10 @@ void InStruct::reset() { gotPlt.reset(); igotPlt.reset(); ppc64LongBranchTarget.reset(); + mipsAbiFlags.reset(); mipsGot.reset(); + mipsOptions.reset(); + mipsReginfo.reset(); mipsRldMap.reset(); partEnd.reset(); partIndex.reset(); diff --git a/lld/ELF/SyntheticSections.h b/lld/ELF/SyntheticSections.h index 9f4073048ce5..1b63a5d29d10 100644 --- a/lld/ELF/SyntheticSections.h +++ b/lld/ELF/SyntheticSections.h @@ -40,9 +40,7 @@ public: SyntheticSection(uint64_t flags, uint32_t type, uint32_t alignment, StringRef name) : InputSection(nullptr, flags, type, alignment, {}, name, - InputSectionBase::Synthetic) { - markLive(); - } + InputSectionBase::Synthetic) {} virtual ~SyntheticSection() = default; virtual void writeTo(uint8_t *buf) = 0; @@ -515,11 +513,11 @@ private: class RelocationBaseSection : public SyntheticSection { public: RelocationBaseSection(StringRef name, uint32_t type, int32_t dynamicTag, - int32_t sizeDynamicTag); + int32_t sizeDynamicTag, bool combreloc); /// Add a dynamic relocation without writing an addend to the output section. /// This overload can be used if the addends are written directly instead of /// using relocations on the input section (e.g. MipsGotSection::writeTo()). - void addReloc(const DynamicReloc &reloc); + void addReloc(const DynamicReloc &reloc) { relocs.push_back(reloc); } /// Add a dynamic relocation against \p sym with an optional addend. void addSymbolReloc(RelType dynType, InputSectionBase &isec, uint64_t offsetInSec, Symbol &sym, int64_t addend = 0, @@ -541,6 +539,7 @@ public: bool isNeeded() const override { return !relocs.empty(); } size_t getSize() const override { return relocs.size() * this->entsize; } size_t getRelativeRelocCount() const { return numRelativeRelocs; } + void partitionRels(); void finalizeContents() override; static bool classof(const SectionBase *d) { return SyntheticSection::classof(d) && @@ -551,7 +550,9 @@ public: SmallVector<DynamicReloc, 0> relocs; protected: - size_t numRelativeRelocs = 0; + void computeRels(); + size_t numRelativeRelocs = 0; // used by -z combreloc + bool combreloc; }; template <class ELFT> @@ -560,11 +561,8 @@ class RelocationSection final : public RelocationBaseSection { using Elf_Rela = typename ELFT::Rela; public: - RelocationSection(StringRef name, bool sort); + RelocationSection(StringRef name, bool combreloc); void writeTo(uint8_t *buf) override; - -private: - bool sort; }; template <class ELFT> @@ -820,10 +818,10 @@ private: // Each chunk contains information gathered from debug sections of a // single object file. - std::vector<GdbChunk> chunks; + SmallVector<GdbChunk, 0> chunks; // A symbol table for this .gdb_index section. - std::vector<GdbSymbol> symbols; + SmallVector<GdbSymbol, 0> symbols; size_t size; }; @@ -980,7 +978,7 @@ class MipsAbiFlagsSection final : public SyntheticSection { using Elf_Mips_ABIFlags = llvm::object::Elf_Mips_ABIFlags<ELFT>; public: - static MipsAbiFlagsSection *create(); + static std::unique_ptr<MipsAbiFlagsSection> create(); MipsAbiFlagsSection(Elf_Mips_ABIFlags flags); size_t getSize() const override { return sizeof(Elf_Mips_ABIFlags); } @@ -996,7 +994,7 @@ template <class ELFT> class MipsOptionsSection final : public SyntheticSection { using Elf_Mips_RegInfo = llvm::object::Elf_Mips_RegInfo<ELFT>; public: - static MipsOptionsSection *create(); + static std::unique_ptr<MipsOptionsSection<ELFT>> create(); MipsOptionsSection(Elf_Mips_RegInfo reginfo); void writeTo(uint8_t *buf) override; @@ -1014,7 +1012,7 @@ template <class ELFT> class MipsReginfoSection final : public SyntheticSection { using Elf_Mips_RegInfo = llvm::object::Elf_Mips_RegInfo<ELFT>; public: - static MipsReginfoSection *create(); + static std::unique_ptr<MipsReginfoSection> create(); MipsReginfoSection(Elf_Mips_RegInfo reginfo); size_t getSize() const override { return sizeof(Elf_Mips_RegInfo); } @@ -1088,7 +1086,7 @@ public: // Links to the ARMExidxSections so we can transfer the relocations once the // layout is known. - std::vector<InputSection *> exidxSections; + SmallVector<InputSection *, 0> exidxSections; private: size_t size = 0; @@ -1097,7 +1095,7 @@ private: // InputObjects, we store pointers to the executable sections that need // .ARM.exidx sections. We can then use the dependentSections of these to // either find the .ARM.exidx section or know that we need to generate one. - std::vector<InputSection *> executableSections; + SmallVector<InputSection *, 0> executableSections; // The executable InputSection with the highest address to use for the // sentinel. We store separately from ExecutableSections as merging of @@ -1244,7 +1242,10 @@ struct InStruct { std::unique_ptr<GotPltSection> gotPlt; std::unique_ptr<IgotPltSection> igotPlt; std::unique_ptr<PPC64LongBranchTargetSection> ppc64LongBranchTarget; + std::unique_ptr<SyntheticSection> mipsAbiFlags; std::unique_ptr<MipsGotSection> mipsGot; + std::unique_ptr<SyntheticSection> mipsOptions; + std::unique_ptr<SyntheticSection> mipsReginfo; std::unique_ptr<MipsRldMapSection> mipsRldMap; std::unique_ptr<SyntheticSection> partEnd; std::unique_ptr<SyntheticSection> partIndex; diff --git a/lld/ELF/Target.cpp b/lld/ELF/Target.cpp index f0e7ebfc64df..7bc5121eabe4 100644 --- a/lld/ELF/Target.cpp +++ b/lld/ELF/Target.cpp @@ -91,7 +91,7 @@ TargetInfo *elf::getTarget() { llvm_unreachable("unknown target machine"); } -template <class ELFT> static ErrorPlace getErrPlace(const uint8_t *loc) { +ErrorPlace elf::getErrorPlace(const uint8_t *loc) { assert(loc != nullptr); for (InputSectionBase *d : inputSections) { auto *isec = cast<InputSection>(d); @@ -118,21 +118,6 @@ template <class ELFT> static ErrorPlace getErrPlace(const uint8_t *loc) { return {}; } -ErrorPlace elf::getErrorPlace(const uint8_t *loc) { - switch (config->ekind) { - case ELF32LEKind: - return getErrPlace<ELF32LE>(loc); - case ELF32BEKind: - return getErrPlace<ELF32BE>(loc); - case ELF64LEKind: - return getErrPlace<ELF64LE>(loc); - case ELF64BEKind: - return getErrPlace<ELF64BE>(loc); - default: - llvm_unreachable("unknown ELF type"); - } -} - TargetInfo::~TargetInfo() {} int64_t TargetInfo::getImplicitAddend(const uint8_t *buf, RelType type) const { diff --git a/lld/ELF/Target.h b/lld/ELF/Target.h index f7b947ec3aa2..e002114f8439 100644 --- a/lld/ELF/Target.h +++ b/lld/ELF/Target.h @@ -227,6 +227,8 @@ class AArch64Relaxer { public: explicit AArch64Relaxer(ArrayRef<Relocation> relocs); + bool tryRelaxAdrpAdd(const Relocation &adrpRel, const Relocation &addRel, + uint64_t secAddr, uint8_t *buf) const; bool tryRelaxAdrpLdr(const Relocation &adrpRel, const Relocation &ldrRel, uint64_t secAddr, uint8_t *buf) const; }; @@ -298,4 +300,25 @@ inline void write64(void *p, uint64_t v) { } // namespace elf } // namespace lld +#ifdef __clang__ +#pragma clang diagnostic ignored "-Wgnu-zero-variadic-macro-arguments" +#endif +#define invokeELFT(f, ...) \ + switch (config->ekind) { \ + case ELF32LEKind: \ + f<ELF32LE>(__VA_ARGS__); \ + break; \ + case ELF32BEKind: \ + f<ELF32BE>(__VA_ARGS__); \ + break; \ + case ELF64LEKind: \ + f<ELF64LE>(__VA_ARGS__); \ + break; \ + case ELF64BEKind: \ + f<ELF64BE>(__VA_ARGS__); \ + break; \ + default: \ + llvm_unreachable("unknown config->ekind"); \ + } + #endif diff --git a/lld/ELF/Writer.cpp b/lld/ELF/Writer.cpp index 69fcad390d61..9383ac46c8e7 100644 --- a/lld/ELF/Writer.cpp +++ b/lld/ELF/Writer.cpp @@ -315,8 +315,7 @@ template <class ELFT> void elf::createSyntheticSections() { // If there is a SECTIONS command and a .data.rel.ro section name use name // .data.rel.ro.bss so that we match in the .data.rel.ro output section. // This makes sure our relro is contiguous. - bool hasDataRelRo = - script->hasSectionsCommand && findSection(".data.rel.ro", 0); + bool hasDataRelRo = script->hasSectionsCommand && findSection(".data.rel.ro"); in.bssRelRo = std::make_unique<BssSection>( hasDataRelRo ? ".data.rel.ro.bss" : ".bss.rel.ro", 0, 1); add(*in.bssRelRo); @@ -327,12 +326,12 @@ template <class ELFT> void elf::createSyntheticSections() { in.mipsRldMap = std::make_unique<MipsRldMapSection>(); add(*in.mipsRldMap); } - if (auto *sec = MipsAbiFlagsSection<ELFT>::create()) - add(*sec); - if (auto *sec = MipsOptionsSection<ELFT>::create()) - add(*sec); - if (auto *sec = MipsReginfoSection<ELFT>::create()) - add(*sec); + if ((in.mipsAbiFlags = MipsAbiFlagsSection<ELFT>::create())) + add(*in.mipsAbiFlags); + if ((in.mipsOptions = MipsOptionsSection<ELFT>::create())) + add(*in.mipsOptions); + if ((in.mipsReginfo = MipsReginfoSection<ELFT>::create())) + add(*in.mipsReginfo); } StringRef relaDynName = config->isRela ? ".rela.dyn" : ".rel.dyn"; @@ -1430,22 +1429,19 @@ template <class ELFT> void Writer<ELFT>::sortInputSections() { template <class ELFT> void Writer<ELFT>::sortSections() { llvm::TimeTraceScope timeScope("Sort sections"); - script->adjustSectionsBeforeSorting(); // Don't sort if using -r. It is not necessary and we want to preserve the // relative order for SHF_LINK_ORDER sections. - if (config->relocatable) + if (config->relocatable) { + script->adjustOutputSections(); return; + } sortInputSections(); - for (SectionCommand *cmd : script->sectionCommands) { - auto *os = dyn_cast<OutputSection>(cmd); - if (!os) - continue; - os->sortRank = getSectionRank(os); - } - + for (SectionCommand *cmd : script->sectionCommands) + if (auto *osec = dyn_cast_or_null<OutputSection>(cmd)) + osec->sortRank = getSectionRank(osec); if (!script->hasSectionsCommand) { // We know that all the OutputSections are contiguous in this case. auto isSection = [](SectionCommand *cmd) { @@ -1455,14 +1451,15 @@ template <class ELFT> void Writer<ELFT>::sortSections() { llvm::find_if(script->sectionCommands, isSection), llvm::find_if(llvm::reverse(script->sectionCommands), isSection).base(), compareSections); - - // Process INSERT commands. From this point onwards the order of - // script->sectionCommands is fixed. - script->processInsertCommands(); - return; } + // Process INSERT commands and update output section attributes. From this + // point onwards the order of script->sectionCommands is fixed. script->processInsertCommands(); + script->adjustOutputSections(); + + if (!script->hasSectionsCommand) + return; // Orphan sections are sections present in the input files which are // not explicitly placed into the output file by the linker script. @@ -1571,8 +1568,8 @@ template <class ELFT> void Writer<ELFT>::resolveShfLinkOrder() { // Link order may be distributed across several InputSectionDescriptions. // Sorting is performed separately. - std::vector<InputSection **> scriptSections; - std::vector<InputSection *> sections; + SmallVector<InputSection **, 0> scriptSections; + SmallVector<InputSection *, 0> sections; for (SectionCommand *cmd : sec->commands) { auto *isd = dyn_cast<InputSectionDescription>(cmd); if (!isd) @@ -2086,11 +2083,16 @@ template <class ELFT> void Writer<ELFT>::finalizeSections() { // Dynamic section must be the last one in this list and dynamic // symbol table section (dynSymTab) must be the first one. for (Partition &part : partitions) { + if (part.relaDyn) { + // Compute DT_RELACOUNT to be used by part.dynamic. + part.relaDyn->partitionRels(); + finalizeSynthetic(part.relaDyn.get()); + } + finalizeSynthetic(part.dynSymTab.get()); finalizeSynthetic(part.gnuHashTab.get()); finalizeSynthetic(part.hashTab.get()); finalizeSynthetic(part.verDef.get()); - finalizeSynthetic(part.relaDyn.get()); finalizeSynthetic(part.relrDyn.get()); finalizeSynthetic(part.ehFrameHdr.get()); finalizeSynthetic(part.verSym.get()); diff --git a/lld/MachO/Driver.cpp b/lld/MachO/Driver.cpp index e4c9f4dd6024..f0cddab94f55 100644 --- a/lld/MachO/Driver.cpp +++ b/lld/MachO/Driver.cpp @@ -1484,6 +1484,12 @@ bool macho::link(ArrayRef<const char *> argsArr, llvm::raw_ostream &stdoutOS, inputFiles.insert(make<OpaqueFile>(*buffer, segName, sectName)); } + for (const Arg *arg : args.filtered(OPT_add_empty_section)) { + StringRef segName = arg->getValue(0); + StringRef sectName = arg->getValue(1); + inputFiles.insert(make<OpaqueFile>(MemoryBufferRef(), segName, sectName)); + } + gatherInputSections(); if (config->callGraphProfileSort) extractCallGraphProfile(); diff --git a/lld/MachO/Driver.h b/lld/MachO/Driver.h index c2933344e611..dbfc05a0497c 100644 --- a/lld/MachO/Driver.h +++ b/lld/MachO/Driver.h @@ -81,9 +81,8 @@ public: notFounds.insert(path.str()); } - // Writes the dependencies to specified path. - // The content is sorted by its Op Code, then within each section, - // alphabetical order. + // Writes the dependencies to specified path. The content is first sorted by + // OpCode and then by the filename (in alphabetical order). void write(llvm::StringRef version, const llvm::SetVector<InputFile *> &inputs, llvm::StringRef output); diff --git a/lld/MachO/InputFiles.h b/lld/MachO/InputFiles.h index 6a4a4fdb43b6..0b661c828c7c 100644 --- a/lld/MachO/InputFiles.h +++ b/lld/MachO/InputFiles.h @@ -177,6 +177,7 @@ public: void parseLoadCommands(MemoryBufferRef mb); void parseReexports(const llvm::MachO::InterfaceFile &interface); + bool isReferenced() const { return numReferencedSymbols > 0; } static bool classof(const InputFile *f) { return f->kind() == DylibKind; } @@ -187,21 +188,17 @@ public: uint32_t compatibilityVersion = 0; uint32_t currentVersion = 0; int64_t ordinal = 0; // Ordinal numbering starts from 1, so 0 is a sentinel + unsigned numReferencedSymbols = 0; RefState refState; bool reexport = false; bool forceNeeded = false; bool forceWeakImport = false; bool deadStrippable = false; bool explicitlyLinked = false; - - unsigned numReferencedSymbols = 0; - - bool isReferenced() const { return numReferencedSymbols > 0; } - // An executable can be used as a bundle loader that will load the output // file being linked, and that contains symbols referenced, but not // implemented in the bundle. When used like this, it is very similar - // to a Dylib, so we re-used the same class to represent it. + // to a dylib, so we've used the same class to represent it. bool isBundleLoader; private: diff --git a/lld/MachO/MapFile.cpp b/lld/MachO/MapFile.cpp index 93abea2ed08b..8f9381ff0d79 100644 --- a/lld/MachO/MapFile.cpp +++ b/lld/MachO/MapFile.cpp @@ -40,18 +40,32 @@ using namespace llvm::sys; using namespace lld; using namespace lld::macho; -// Returns a list of all symbols that we want to print out. -static std::vector<Defined *> getSymbols() { - std::vector<Defined *> v; +using Symbols = std::vector<Defined *>; +// Returns a pair where the left element is a container of all live Symbols and +// the right element is a container of all dead symbols. +static std::pair<Symbols, Symbols> getSymbols() { + Symbols liveSymbols, deadSymbols; for (InputFile *file : inputFiles) if (isa<ObjFile>(file)) for (Symbol *sym : file->symbols) if (auto *d = dyn_cast_or_null<Defined>(sym)) - if (d->isLive() && d->isec && d->getFile() == file) { - assert(!shouldOmitFromOutput(d->isec)); - v.push_back(d); + if (d->isec && d->getFile() == file) { + if (d->isLive()) { + assert(!shouldOmitFromOutput(d->isec)); + liveSymbols.push_back(d); + } else { + deadSymbols.push_back(d); + } } - return v; + parallelSort(liveSymbols.begin(), liveSymbols.end(), + [](Defined *a, Defined *b) { + return a->getVA() != b->getVA() ? a->getVA() < b->getVA() + : a->getName() < b->getName(); + }); + parallelSort( + deadSymbols.begin(), deadSymbols.end(), + [](Defined *a, Defined *b) { return a->getName() < b->getName(); }); + return {std::move(liveSymbols), std::move(deadSymbols)}; } // Construct a map from symbols to their stringified representations. @@ -104,14 +118,6 @@ void macho::writeMapFile() { } } - // Collect symbol info that we want to print out. - std::vector<Defined *> syms = getSymbols(); - parallelSort(syms.begin(), syms.end(), [](Defined *a, Defined *b) { - return a->getVA() != b->getVA() ? a->getVA() < b->getVA() - : a->getName() < b->getName(); - }); - DenseMap<Symbol *, std::string> symStr = getSymbolStrings(syms); - // Dump table of sections os << "# Sections:\n"; os << "# Address\tSize \tSegment\tSection\n"; @@ -125,12 +131,29 @@ void macho::writeMapFile() { } // Dump table of symbols + Symbols liveSymbols, deadSymbols; + std::tie(liveSymbols, deadSymbols) = getSymbols(); + + DenseMap<Symbol *, std::string> liveSymbolStrings = + getSymbolStrings(liveSymbols); os << "# Symbols:\n"; os << "# Address\t File Name\n"; - for (Symbol *sym : syms) { + for (Symbol *sym : liveSymbols) { + assert(sym->isLive()); os << format("0x%08llX\t[%3u] %s\n", sym->getVA(), - readerToFileOrdinal[sym->getFile()], symStr[sym].c_str()); + readerToFileOrdinal[sym->getFile()], + liveSymbolStrings[sym].c_str()); } - // TODO: when we implement -dead_strip, we should dump dead stripped symbols + if (config->deadStrip) { + DenseMap<Symbol *, std::string> deadSymbolStrings = + getSymbolStrings(deadSymbols); + os << "# Dead Stripped Symbols:\n"; + os << "# Address\t File Name\n"; + for (Symbol *sym : deadSymbols) { + assert(!sym->isLive()); + os << format("<<dead>>\t[%3u] %s\n", readerToFileOrdinal[sym->getFile()], + deadSymbolStrings[sym].c_str()); + } + } } diff --git a/lld/MachO/Options.td b/lld/MachO/Options.td index 3d1d97641d71..ab79aa7d8670 100644 --- a/lld/MachO/Options.td +++ b/lld/MachO/Options.td @@ -252,6 +252,10 @@ def segcreate : MultiArg<["-"], "segcreate", 3>, Alias<sectcreate>, HelpText<"Alias for -sectcreate">, Group<grp_content>; +def add_empty_section : MultiArg<["-"], "add_empty_section", 2>, + MetaVarName<"<segment> <section>">, + HelpText<"Create an empty <section> in <segment>">, + Group<grp_content>; def filelist : Separate<["-"], "filelist">, MetaVarName<"<file>">, HelpText<"Read names of files to link from <file>">, diff --git a/lld/MachO/OutputSection.h b/lld/MachO/OutputSection.h index eb554854cc89..51f39dd3498d 100644 --- a/lld/MachO/OutputSection.h +++ b/lld/MachO/OutputSection.h @@ -58,13 +58,23 @@ public: // Unneeded sections are omitted entirely (header and body). virtual bool isNeeded() const { return true; } - virtual void finalize() { - // TODO investigate refactoring synthetic section finalization logic into - // overrides of this function. - } + // The implementations of this method can assume that it is only called right + // before addresses get assigned to this particular OutputSection. In + // particular, this means that it gets called only after addresses have been + // assigned to output sections that occur earlier in the output binary. + // Naturally, this means different sections' finalize() methods cannot execute + // concurrently with each other. As such, avoid using this method for + // operations that do not require this strict sequential guarantee. + // + // Operations that need to occur late in the linking process, but which do not + // need the sequential guarantee, should be named `finalizeContents()`. See + // e.g. LinkEditSection::finalizeContents() and + // CStringSection::finalizeContents(). + virtual void finalize() {} virtual void writeTo(uint8_t *buf) const = 0; + // Handle section$start$ and section$end$ symbols. void assignAddressesToStartEndSymbols(); StringRef name; diff --git a/lld/MachO/SyntheticSections.h b/lld/MachO/SyntheticSections.h index 49b68c77672e..12e422b5c5d8 100644 --- a/lld/MachO/SyntheticSections.h +++ b/lld/MachO/SyntheticSections.h @@ -62,6 +62,8 @@ public: align = target->wordSize; } + // Implementations of this method can assume that the regular (non-__LINKEDIT) + // sections already have their addresses assigned. virtual void finalizeContents() {} // Sections in __LINKEDIT are special: their offsets are recorded in the diff --git a/lld/MachO/UnwindInfoSection.cpp b/lld/MachO/UnwindInfoSection.cpp index 49af2f6ad9a8..8b1e357499aa 100644 --- a/lld/MachO/UnwindInfoSection.cpp +++ b/lld/MachO/UnwindInfoSection.cpp @@ -392,7 +392,7 @@ UnwindInfoSectionImpl<Ptr>::findLsdaReloc(ConcatInputSection *isec) const { } // Scan the __LD,__compact_unwind entries and compute the space needs of -// __TEXT,__unwind_info and __TEXT,__eh_frame +// __TEXT,__unwind_info and __TEXT,__eh_frame. template <class Ptr> void UnwindInfoSectionImpl<Ptr>::finalize() { if (symbols.empty()) return; diff --git a/lld/MachO/Writer.cpp b/lld/MachO/Writer.cpp index 2c0794e08ae3..851cb3db3859 100644 --- a/lld/MachO/Writer.cpp +++ b/lld/MachO/Writer.cpp @@ -1108,8 +1108,10 @@ template <class LP> void Writer::run() { treatSpecialUndefineds(); if (config->entry && !isa<Undefined>(config->entry)) prepareBranchTarget(config->entry); + // Canonicalization of all pointers to InputSections should be handled by - // these two methods. + // these two scan* methods. I.e. from this point onward, for all live + // InputSections, we should have `isec->canonical() == isec`. scanSymbols(); scanRelocations(); @@ -1119,6 +1121,8 @@ template <class LP> void Writer::run() { if (in.stubHelper->isNeeded()) in.stubHelper->setup(); + // At this point, we should know exactly which output sections are needed, + // courtesy of scanSymbols() and scanRelocations(). createOutputSections<LP>(); // After this point, we create no new segments; HOWEVER, we might @@ -1146,11 +1150,10 @@ void macho::resetWriter() { LCDylib::resetInstanceCount(); } void macho::createSyntheticSections() { in.header = make<MachHeaderSection>(); - if (config->dedupLiterals) { + if (config->dedupLiterals) in.cStringSection = make<DeduplicatedCStringSection>(); - } else { + else in.cStringSection = make<CStringSection>(); - } in.wordLiteralSection = config->dedupLiterals ? make<WordLiteralSection>() : nullptr; in.rebase = make<RebaseSection>(); diff --git a/lld/docs/ReleaseNotes.rst b/lld/docs/ReleaseNotes.rst index f7e099b9cf6e..82683801fad9 100644 --- a/lld/docs/ReleaseNotes.rst +++ b/lld/docs/ReleaseNotes.rst @@ -1,19 +1,21 @@ -======================== -lld 14.0.0 Release Notes -======================== +=========================== +lld |release| Release Notes +=========================== .. contents:: :local: -.. warning:: - These are in-progress notes for the upcoming LLVM 14.0.0 release. - Release notes for previous releases can be found on - `the Download Page <https://releases.llvm.org/download.html>`_. +.. only:: PreRelease + + .. warning:: + These are in-progress notes for the upcoming LLVM |release| release. + Release notes for previous releases can be found on + `the Download Page <https://releases.llvm.org/download.html>`_. Introduction ============ -This document contains the release notes for the lld linker, release 14.0.0. +This document contains the release notes for the lld linker, release |release|. Here we describe the status of lld, including major improvements from the previous release. All lld releases may be downloaded from the `LLVM releases web site <https://llvm.org/releases/>`_. @@ -33,6 +35,9 @@ ELF Improvements (`D110014 <https://reviews.llvm.org/D110014>`_) * If ``-Map`` is specified, ``--cref`` will be printed to the specified file. (`D114663 <https://reviews.llvm.org/D114663>`_) +* No longer deduplicate local symbol names at the default optimization level of ``-O1``. + This results in a larger ``.strtab`` (usually less than 1%) but a faster link + time. Use optimization level ``-O2`` to restore the deduplication. Architecture specific changes: diff --git a/lld/docs/conf.py b/lld/docs/conf.py index 8d0fec72caf8..95befddf80ea 100644 --- a/lld/docs/conf.py +++ b/lld/docs/conf.py @@ -43,15 +43,6 @@ master_doc = 'index' project = u'lld' copyright = u'2011-%d, LLVM Project' % date.today().year -# The version info for the project you're documenting, acts as replacement for -# |version| and |release|, also used in various other places throughout the -# built documents. -# -# The short version. -version = '14' -# The full version, including alpha/beta/rc tags. -release = '14' - # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. #language = None diff --git a/lldb/bindings/interface/SBThread.i b/lldb/bindings/interface/SBThread.i index ba7f5b3fdf76..1e46bd6e21f6 100644 --- a/lldb/bindings/interface/SBThread.i +++ b/lldb/bindings/interface/SBThread.i @@ -409,7 +409,7 @@ public: Retruns a SBValue object representing the siginfo for the current signal. ") GetSiginfo; lldb::SBValue - GetSiginfo(SBError &error); + GetSiginfo(); STRING_EXTENSION(SBThread) diff --git a/lldb/docs/design/reproducers.rst b/lldb/docs/design/reproducers.rst deleted file mode 100644 index cac8721196d3..000000000000 --- a/lldb/docs/design/reproducers.rst +++ /dev/null @@ -1,205 +0,0 @@ -Reproducers -=========== - -As unbelievable as it may sound, the debugger has bugs. These bugs might -manifest themselves as errors, missing results or even a crash. Quite often -these bugs don't reproduce in simple, isolated scenarios. The debugger deals -with a lot of moving parts and subtle differences can easily add up. - -Reproducers in LLDB improve the experience for both the users encountering bugs -and the developers working on resolving them. The general idea consists of -*capturing* all the information necessary to later *replay* a debug session -while debugging the debugger. - -.. contents:: - :local: - -Usage ------ - -Reproducers are a generic concept in LLDB and are not inherently coupled with -the command line driver. The functionality can be used for anything that uses -the SB API and the driver is just one example. However, because it's probably -the most common way users interact with lldb, that's the workflow described in -this section. - -Capture -``````` - -Until reproducer capture is enabled by default, you need to launch LLDB in -capture mode. For the command line driver, this means passing ``--capture``. -You cannot enable reproducer capture from within LLDB, as this would be too -late to capture initialization of the debugger. - -.. code-block:: bash - - $ lldb --capture - -In capture mode, LLDB will keep track of all the information it needs to replay -the current debug session. Most data is captured lazily to limit the impact on -performance. To create the reproducer, use the ``reproducer generate`` -sub-command. It's always possible to check the status of the reproducers with -the ``reproducer status`` sub-command. Note that generating the reproducer -terminates the debug session. - -.. code-block:: none - - (lldb) reproducer status - Reproducer is in capture mode. - (lldb) reproducer generate - Reproducer written to '/path/to/reproducer' - Please have a look at the directory to assess if you're willing to share the contained information. - - -The resulting reproducer is a directory. It was a conscious decision to not -compress and archive it automatically. The reproducer can contain potentially -sensitive information like object and symbol files, their paths on disk, debug -information, memory excerpts of the inferior process, etc. - -Replay -`````` - -It is strongly recommended to replay the reproducer locally to ensure it -actually reproduces the expected behavior. If the reproducer doesn't behave -correctly locally, it means there's a bug in the reproducer implementation that -should be addressed. - -To replay a reproducer, simply pass its path to LLDB through the ``--replay`` -flag. It is unnecessary to pass any other command line flags. The flags that -were passed to LLDB during capture are already part of the reproducer. - -.. code-block:: bash - - $ lldb --replay /path/to/reproducer - - -During replay LLDB will behave similar to batch mode. The session should be -identical to the recorded debug session. The only expected differences are that -the binary being debugged doesn't actually run during replay. That means that -you won't see any of its side effects, like things being printed to the -terminal. Another expected difference is the behavior of the ``reproducer -generate`` command, which becomes a NOOP during replay. - -Augmenting a Bug Report with a Reproducer -````````````````````````````````````````` - -A reproducer can significantly improve a bug report, but it in itself is not -sufficient. Always describe the expected and unexpected behavior. Just like the -debugger can have bugs, the reproducer can have bugs too. - - -Design ------- - - -Replay -`````` - -Reproducers support two replay modes. The main and most common mode is active -replay. It's called active, because it's LLDB that is driving replay by calling -the captured SB API functions one after each other. The second mode is passive -replay. In this mode, LLDB sits idle until an SB API function is called, for -example from Python, and then replays just this individual call. - -Active Replay -^^^^^^^^^^^^^ - -No matter how a reproducer was captured, they can always be replayed with the -command line driver. When a reproducer is passed with the ``--replay`` flag, the -driver short-circuits and passes off control to the reproducer infrastructure, -effectively bypassing its normal operation. This works because the driver is -implemented using the SB API and is therefore nothing more than a sequence of -SB API calls. - -Replay is driven by the ``Registry::Replay``. As long as there's data in the -buffer holding the API data, the next SB API function call is deserialized. -Once the function is known, the registry can retrieve its signature, and use -that to deserialize its arguments. The function can then be invoked, most -commonly through the synthesized default replayer, or potentially using a -custom defined replay function. This process continues, until more data is -available or a replay error is encountered. - -During replay only a function's side effects matter. The result returned by the -replayed function is ignored because it cannot be observed beyond the driver. -This is sound, because anything that is passed into a subsequent API call will -have been serialized as an input argument. This also works for SB API objects -because the reproducers know about every object that has crossed the API -boundary, which is true by definition for object return values. - - -Passive Replay -^^^^^^^^^^^^^^ - -Passive replay exists to support running the API test suite against a -reproducer. The API test suite is written in Python and tests the debugger by -calling into its API from Python. To make this work, the API must transparently -replay itself when called. This is what makes passive replay different from -driver replay, where it is lldb itself that's driving replay. For passive -replay, the driving factor is external. - -In order to replay API calls, the reproducers need a way to intercept them. -Every API call is already instrumented with an ``LLDB_RECORD_*`` macro that -captures its input arguments. Furthermore, it also contains the necessary logic -to detect which calls cross the API boundary and should be intercepted. We were -able to reuse all of this to implement passive replay. - -During passive replay is enabled, nothing happens until an SB API is called. -Inside that API function, the macro detects whether this call should be -replayed (i.e. crossed the API boundary). If the answer is yes, the next -function is deserialized from the SB API data and compared to the current -function. If the signature matches, we deserialize its input arguments and -reinvoke the current function with the deserialized arguments. We don't need to -do anything special to prevent us from recursively calling the replayed version -again, as the API boundary crossing logic knows that we're still behind the API -boundary when we re-invoked the current function. - -Another big difference with driver replay is the return value. While this -didn't matter for driver replay, it's key for passive replay, because that's -what gets checked by the test suite. Luckily, the ``LLDB_RECORD_*`` macros -contained sufficient type information to derive the result type. - -Testing -------- - -Reproducers are tested in the following ways: - - - Unit tests to cover the reproducer infrastructure. There are tests for the - provider, loader and for the reproducer instrumentation. - - Feature specific end-to-end test cases in the ``test/Shell/Reproducer`` - directory. These tests serve as integration and regression tests for the - reproducers infrastructure, as well as doing some sanity checking for basic - debugger functionality. - - The API and shell tests can be run against a replayed reproducer. The - ``check-lldb-reproducers`` target will run the API and shell test suite - twice: first running the test normally while capturing a reproducer and then - a second time using the replayed session as the test input. For the shell - tests this use a little shim (``lldb-repro``) that uses the arguments and - current working directory to transparently generate or replay a reproducer. - For the API tests an extra argument with the reproducer path is passed to - ``dotest.py`` which initializes the debugger in the appropriate mode. - Certain tests do not fit this paradigm (for example test that check the - output of the binary being debugged) and are skipped by marking them as - unsupported by adding ``UNSUPPORTED: lldb-repro`` to the top of the shell - test or adding the ``skipIfReproducer`` decorator for the API tests. - -Knows Issues ------------- - -The reproducers are still a work in progress. Here's a non-exhaustive list of -outstanding work, limitations and known issues. - - - The VFS cannot deal with more than one current working directory. Changing - the current working directory during the debug session will break relative - paths. - - Not all SB APIs are properly instrumented. We need customer serialization - for APIs that take buffers and lengths. - - We leak memory during replay because the reproducer doesn't capture the end - of an object's life time. We need to add instrumentation to the destructor - of SB API objects. - - The reproducer includes every file opened by LLDB. This is overkill. For - example we do not need to capture source files for code listings. There's - currently no way to say that some file shouldn't be included in the - reproducer. - - We do not yet automatically generate a reproducer on a crash. The reason is - that generating the reproducer is too expensive to do in a signal handler. - We should re-invoke lldb after a crash and do the heavy lifting. diff --git a/lldb/include/lldb/API/SBPlatform.h b/lldb/include/lldb/API/SBPlatform.h index 4f5d04a24e95..dcc8a14ff0c1 100644 --- a/lldb/include/lldb/API/SBPlatform.h +++ b/lldb/include/lldb/API/SBPlatform.h @@ -172,7 +172,6 @@ public: protected: friend class SBDebugger; friend class SBTarget; - friend class SBThread; lldb::PlatformSP GetSP() const; diff --git a/lldb/include/lldb/API/SBTarget.h b/lldb/include/lldb/API/SBTarget.h index 9e75b5e503a8..abd9ebf07407 100644 --- a/lldb/include/lldb/API/SBTarget.h +++ b/lldb/include/lldb/API/SBTarget.h @@ -875,7 +875,6 @@ protected: friend class SBSection; friend class SBSourceManager; friend class SBSymbol; - friend class SBThread; friend class SBValue; friend class SBVariablesOptions; diff --git a/lldb/include/lldb/API/SBThread.h b/lldb/include/lldb/API/SBThread.h index 76f794c25d9e..924aae2f109a 100644 --- a/lldb/include/lldb/API/SBThread.h +++ b/lldb/include/lldb/API/SBThread.h @@ -208,7 +208,7 @@ public: bool SafeToCallFunctions(); - SBValue GetSiginfo(SBError &error); + SBValue GetSiginfo(); private: friend class SBBreakpoint; diff --git a/lldb/include/lldb/API/SBType.h b/lldb/include/lldb/API/SBType.h index 5885432d0624..529b4d0eeffc 100644 --- a/lldb/include/lldb/API/SBType.h +++ b/lldb/include/lldb/API/SBType.h @@ -225,7 +225,6 @@ protected: friend class SBFunction; friend class SBModule; friend class SBTarget; - friend class SBThread; friend class SBTypeEnumMember; friend class SBTypeEnumMemberList; friend class SBTypeNameSpecifier; diff --git a/lldb/include/lldb/Target/Target.h b/lldb/include/lldb/Target/Target.h index 2c8b36d1e3d9..42a641f6d52a 100644 --- a/lldb/include/lldb/Target/Target.h +++ b/lldb/include/lldb/Target/Target.h @@ -1019,10 +1019,11 @@ public: lldb::addr_t *load_addr_ptr = nullptr); size_t ReadCStringFromMemory(const Address &addr, std::string &out_str, - Status &error); + Status &error, bool force_live_memory = false); size_t ReadCStringFromMemory(const Address &addr, char *dst, - size_t dst_max_len, Status &result_error); + size_t dst_max_len, Status &result_error, + bool force_live_memory = false); /// Read a NULL terminated string from memory /// diff --git a/lldb/include/lldb/Target/Thread.h b/lldb/include/lldb/Target/Thread.h index f1d4e6c7ef01..2fd7d8859f52 100644 --- a/lldb/include/lldb/Target/Thread.h +++ b/lldb/include/lldb/Target/Thread.h @@ -1185,10 +1185,7 @@ public: lldb::ThreadSP GetCurrentExceptionBacktrace(); - virtual llvm::Expected<std::unique_ptr<llvm::MemoryBuffer>> - GetSiginfo(size_t max_size) const { - return llvm::make_error<UnimplementedError>(); - } + lldb::ValueObjectSP GetSiginfoValue(); protected: friend class ThreadPlan; @@ -1239,6 +1236,11 @@ protected: void FrameSelectedCallback(lldb_private::StackFrame *frame); + virtual llvm::Expected<std::unique_ptr<llvm::MemoryBuffer>> + GetSiginfo(size_t max_size) const { + return llvm::make_error<UnimplementedError>(); + } + // Classes that inherit from Process can see and modify these lldb::ProcessWP m_process_wp; ///< The process that owns this thread. lldb::StopInfoSP m_stop_info_sp; ///< The private stop reason for this thread diff --git a/lldb/source/API/SBThread.cpp b/lldb/source/API/SBThread.cpp index dcc2a6ed3d18..a08cb741814b 100644 --- a/lldb/source/API/SBThread.cpp +++ b/lldb/source/API/SBThread.cpp @@ -1318,49 +1318,11 @@ lldb_private::Thread *SBThread::get() { return m_opaque_sp->GetThreadSP().get(); } -SBValue SBThread::GetSiginfo(SBError &error) { - LLDB_INSTRUMENT_VA(this, error); +SBValue SBThread::GetSiginfo() { + LLDB_INSTRUMENT_VA(this); - SBValue value; - SBProcess process = GetProcess(); - if (!process.IsValid()) { - error.SetErrorString("no process"); - return value; - } - SBTarget target = process.GetTarget(); - if (!target.IsValid()) { - error.SetErrorString("unable to get target"); - return value; - } - SBPlatform platform = target.GetPlatform(); - if (!platform.IsValid()) { - error.SetErrorString("unable to get platform"); - return value; - } - CompilerType type = platform.GetSP()->GetSiginfoType( - target.GetSP()->GetArchitecture().GetTriple()); - if (!type.IsValid()) { - error.SetErrorString("no siginfo_t for the platform"); - return value; - } - llvm::Optional<uint64_t> type_size = type.GetByteSize(nullptr); - assert(type_size); ThreadSP thread_sp = m_opaque_sp->GetThreadSP(); - if (!thread_sp) { - error.SetErrorString("unable to get thread"); - return value; - } - llvm::Expected<std::unique_ptr<llvm::MemoryBuffer>> data = - thread_sp->GetSiginfo(type_size.getValue()); - if (!data) { - error.SetErrorString(llvm::toString(data.takeError()).c_str()); - return value; - } - SBData sb_data; - sb_data.SetData(error, data.get()->getBufferStart(), - data.get()->getBufferSize(), process.GetByteOrder(), 0); - if (!sb_data.IsValid()) - return value; - - return target.CreateValueFromData("siginfo", sb_data, type); + if (!thread_sp) + return SBValue(); + return thread_sp->GetSiginfoValue(); } diff --git a/lldb/source/Plugins/Disassembler/LLVMC/DisassemblerLLVMC.cpp b/lldb/source/Plugins/Disassembler/LLVMC/DisassemblerLLVMC.cpp index 8c54219f0a14..edc5f005c7ac 100644 --- a/lldb/source/Plugins/Disassembler/LLVMC/DisassemblerLLVMC.cpp +++ b/lldb/source/Plugins/Disassembler/LLVMC/DisassemblerLLVMC.cpp @@ -1097,19 +1097,19 @@ DisassemblerLLVMC::DisassemblerLLVMC(const ArchSpec &arch, thumb_arch_name.erase(0, 3); thumb_arch_name.insert(0, "thumb"); } else { - thumb_arch_name = "thumbv8.7a"; + thumb_arch_name = "thumbv9.3a"; } thumb_arch.GetTriple().setArchName(llvm::StringRef(thumb_arch_name)); } // If no sub architecture specified then use the most recent arm architecture - // so the disassembler will return all instruction. Without it we will see a - // lot of unknow opcode in case the code uses instructions which are not - // available in the oldest arm version (used when no sub architecture is - // specified) + // so the disassembler will return all instructions. Without it we will see a + // lot of unknown opcodes if the code uses instructions which are not + // available in the oldest arm version (which is used when no sub architecture + // is specified). if (triple.getArch() == llvm::Triple::arm && triple.getSubArch() == llvm::Triple::NoSubArch) - triple.setArchName("armv8.7a"); + triple.setArchName("armv9.3a"); std::string features_str; const char *triple_str = triple.getTriple().c_str(); @@ -1179,9 +1179,9 @@ DisassemblerLLVMC::DisassemblerLLVMC(const ArchSpec &arch, } // If any AArch64 variant, enable latest ISA with any optional - // extensions like SVE. + // extensions like MTE. if (triple.isAArch64()) { - features_str += "+v8.7a,+sve2,+mte"; + features_str += "+v9.3a,+mte"; if (triple.getVendor() == llvm::Triple::Apple) cpu = "apple-latest"; diff --git a/lldb/source/Plugins/ExpressionParser/Clang/ClangFunctionCaller.h b/lldb/source/Plugins/ExpressionParser/Clang/ClangFunctionCaller.h index 8060b8c0aedc..151935b0ce68 100644 --- a/lldb/source/Plugins/ExpressionParser/Clang/ClangFunctionCaller.h +++ b/lldb/source/Plugins/ExpressionParser/Clang/ClangFunctionCaller.h @@ -21,7 +21,6 @@ namespace lldb_private { class ASTStructExtractor; -class ClangExpressionParser; /// \class ClangFunctionCaller ClangFunctionCaller.h /// "lldb/Expression/ClangFunctionCaller.h" Encapsulates a function that can diff --git a/lldb/source/Plugins/ExpressionParser/Clang/ClangUserExpression.h b/lldb/source/Plugins/ExpressionParser/Clang/ClangUserExpression.h index b628f6debf66..30cdd2f3e990 100644 --- a/lldb/source/Plugins/ExpressionParser/Clang/ClangUserExpression.h +++ b/lldb/source/Plugins/ExpressionParser/Clang/ClangUserExpression.h @@ -28,6 +28,8 @@ namespace lldb_private { +class ClangExpressionParser; + /// \class ClangUserExpression ClangUserExpression.h /// "lldb/Expression/ClangUserExpression.h" Encapsulates a single expression /// for use with Clang diff --git a/lldb/source/Plugins/Process/gdb-remote/GDBRemoteClientBase.cpp b/lldb/source/Plugins/Process/gdb-remote/GDBRemoteClientBase.cpp index 803e5842cd7d..8364ffeef46f 100644 --- a/lldb/source/Plugins/Process/gdb-remote/GDBRemoteClientBase.cpp +++ b/lldb/source/Plugins/Process/gdb-remote/GDBRemoteClientBase.cpp @@ -40,7 +40,7 @@ StateType GDBRemoteClientBase::SendContinuePacketAndWaitForResponse( ContinueDelegate &delegate, const UnixSignals &signals, llvm::StringRef payload, std::chrono::seconds interrupt_timeout, StringExtractorGDBRemote &response) { - Log *log(ProcessGDBRemoteLog::GetLogIfAllCategoriesSet(GDBR_LOG_PROCESS)); + Log *log = GetLog(GDBRLog::Process); response.Clear(); { @@ -184,8 +184,7 @@ GDBRemoteClientBase::SendPacketAndWaitForResponse( std::chrono::seconds interrupt_timeout) { Lock lock(*this, interrupt_timeout); if (!lock) { - if (Log *log = - ProcessGDBRemoteLog::GetLogIfAllCategoriesSet(GDBR_LOG_PROCESS)) + if (Log *log = GetLog(GDBRLog::Process)) LLDB_LOGF(log, "GDBRemoteClientBase::%s failed to get mutex, not sending " "packet '%.*s'", @@ -203,8 +202,7 @@ GDBRemoteClientBase::SendPacketAndReceiveResponseWithOutputSupport( llvm::function_ref<void(llvm::StringRef)> output_callback) { Lock lock(*this, interrupt_timeout); if (!lock) { - if (Log *log = - ProcessGDBRemoteLog::GetLogIfAllCategoriesSet(GDBR_LOG_PROCESS)) + if (Log *log = GetLog(GDBRLog::Process)) LLDB_LOGF(log, "GDBRemoteClientBase::%s failed to get mutex, not sending " "packet '%.*s'", @@ -237,7 +235,7 @@ GDBRemoteClientBase::SendPacketAndWaitForResponseNoLock( if (response.ValidateResponse()) return packet_result; // Response says it wasn't valid - Log *log = ProcessGDBRemoteLog::GetLogIfAllCategoriesSet(GDBR_LOG_PACKETS); + Log *log = GetLog(GDBRLog::Packets); LLDB_LOGF( log, "error: packet with payload \"%.*s\" got invalid response \"%s\": %s", @@ -311,7 +309,7 @@ void GDBRemoteClientBase::ContinueLock::unlock() { GDBRemoteClientBase::ContinueLock::LockResult GDBRemoteClientBase::ContinueLock::lock() { - Log *log = ProcessGDBRemoteLog::GetLogIfAllCategoriesSet(GDBR_LOG_PROCESS); + Log *log = GetLog(GDBRLog::Process); LLDB_LOGF(log, "GDBRemoteClientBase::ContinueLock::%s() resuming with %s", __FUNCTION__, m_comm.m_continue_packet.c_str()); @@ -349,7 +347,7 @@ GDBRemoteClientBase::Lock::Lock(GDBRemoteClientBase &comm, } void GDBRemoteClientBase::Lock::SyncWithContinueThread() { - Log *log(ProcessGDBRemoteLog::GetLogIfAllCategoriesSet(GDBR_LOG_PROCESS)); + Log *log = GetLog(GDBRLog::Process); std::unique_lock<std::mutex> lock(m_comm.m_mutex); if (m_comm.m_is_running && m_interrupt_timeout == std::chrono::seconds(0)) return; // We were asked to avoid interrupting the sender. Lock is not diff --git a/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunication.cpp b/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunication.cpp index 25ae08838bf8..38d9e400978d 100644 --- a/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunication.cpp +++ b/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunication.cpp @@ -93,7 +93,7 @@ char GDBRemoteCommunication::CalculcateChecksum(llvm::StringRef payload) { } size_t GDBRemoteCommunication::SendAck() { - Log *log(ProcessGDBRemoteLog::GetLogIfAllCategoriesSet(GDBR_LOG_PACKETS)); + Log *log = GetLog(GDBRLog::Packets); ConnectionStatus status = eConnectionStatusSuccess; char ch = '+'; const size_t bytes_written = WriteAll(&ch, 1, status, nullptr); @@ -103,7 +103,7 @@ size_t GDBRemoteCommunication::SendAck() { } size_t GDBRemoteCommunication::SendNack() { - Log *log(ProcessGDBRemoteLog::GetLogIfAllCategoriesSet(GDBR_LOG_PACKETS)); + Log *log = GetLog(GDBRLog::Packets); ConnectionStatus status = eConnectionStatusSuccess; char ch = '-'; const size_t bytes_written = WriteAll(&ch, 1, status, nullptr); @@ -128,7 +128,7 @@ GDBRemoteCommunication::PacketResult GDBRemoteCommunication::SendRawPacketNoLock(llvm::StringRef packet, bool skip_ack) { if (IsConnected()) { - Log *log(ProcessGDBRemoteLog::GetLogIfAllCategoriesSet(GDBR_LOG_PACKETS)); + Log *log = GetLog(GDBRLog::Packets); ConnectionStatus status = eConnectionStatusSuccess; const char *packet_data = packet.data(); const size_t packet_length = packet.size(); @@ -222,7 +222,7 @@ GDBRemoteCommunication::ReadPacket(StringExtractorGDBRemote &response, bool sync_on_timeout) { using ResponseType = StringExtractorGDBRemote::ResponseType; - Log *log(ProcessGDBRemoteLog::GetLogIfAllCategoriesSet(GDBR_LOG_PACKETS)); + Log *log = GetLog(GDBRLog::Packets); for (;;) { PacketResult result = WaitForPacketNoLock(response, timeout, sync_on_timeout); @@ -241,7 +241,7 @@ GDBRemoteCommunication::WaitForPacketNoLock(StringExtractorGDBRemote &packet, uint8_t buffer[8192]; Status error; - Log *log(ProcessGDBRemoteLog::GetLogIfAllCategoriesSet(GDBR_LOG_PACKETS)); + Log *log = GetLog(GDBRLog::Packets); // Check for a packet from our cache first without trying any reading... if (CheckForPacket(nullptr, 0, packet) != PacketType::Invalid) @@ -382,7 +382,7 @@ GDBRemoteCommunication::WaitForPacketNoLock(StringExtractorGDBRemote &packet, } bool GDBRemoteCommunication::DecompressPacket() { - Log *log(ProcessGDBRemoteLog::GetLogIfAllCategoriesSet(GDBR_LOG_PACKETS)); + Log *log = GetLog(GDBRLog::Packets); if (!CompressionIsEnabled()) return true; @@ -616,7 +616,7 @@ GDBRemoteCommunication::CheckForPacket(const uint8_t *src, size_t src_len, // Put the packet data into the buffer in a thread safe fashion std::lock_guard<std::recursive_mutex> guard(m_bytes_mutex); - Log *log(ProcessGDBRemoteLog::GetLogIfAllCategoriesSet(GDBR_LOG_PACKETS)); + Log *log = GetLog(GDBRLog::Packets); if (src && src_len > 0) { if (log && log->GetVerbose()) { @@ -881,7 +881,7 @@ GDBRemoteCommunication::ListenThread(lldb::thread_arg_t arg) { Status GDBRemoteCommunication::StartDebugserverProcess( const char *url, Platform *platform, ProcessLaunchInfo &launch_info, uint16_t *port, const Args *inferior_args, int pass_comm_fd) { - Log *log(ProcessGDBRemoteLog::GetLogIfAllCategoriesSet(GDBR_LOG_PROCESS)); + Log *log = GetLog(GDBRLog::Process); LLDB_LOGF(log, "GDBRemoteCommunication::%s(url=%s, port=%" PRIu16 ")", __FUNCTION__, url ? url : "<empty>", port ? *port : uint16_t(0)); diff --git a/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationClient.cpp b/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationClient.cpp index f6526d03863b..91b9151328a8 100644 --- a/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationClient.cpp +++ b/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationClient.cpp @@ -399,8 +399,7 @@ void GDBRemoteCommunicationClient::GetRemoteQSupported() { packet_response.GetHexMaxU64(/*little_endian=*/false, UINT64_MAX); if (m_max_packet_size == 0) { m_max_packet_size = UINT64_MAX; // Must have been a garbled response - Log *log( - ProcessGDBRemoteLog::GetLogIfAllCategoriesSet(GDBR_LOG_PROCESS)); + Log *log(GetLog(GDBRLog::Process)); LLDB_LOGF(log, "Garbled PacketSize spec in qSupported response"); } } @@ -485,8 +484,7 @@ GDBRemoteCommunicationClient::SendThreadSpecificPacketAndWaitForResponse( StringExtractorGDBRemote &response) { Lock lock(*this); if (!lock) { - if (Log *log = ProcessGDBRemoteLog::GetLogIfAnyCategoryIsSet( - GDBR_LOG_PROCESS | GDBR_LOG_PACKETS)) + if (Log *log = GetLog(GDBRLog::Process | GDBRLog::Packets)) LLDB_LOGF(log, "GDBRemoteCommunicationClient::%s: Didn't get sequence mutex " "for %s packet.", @@ -622,7 +620,7 @@ DataBufferSP GDBRemoteCommunicationClient::ReadMemoryTags(lldb::addr_t addr, packet.Printf("qMemTags:%" PRIx64 ",%zx:%" PRIx32, addr, len, type); StringExtractorGDBRemote response; - Log *log = ProcessGDBRemoteLog::GetLogIfAnyCategoryIsSet(GDBR_LOG_MEMORY); + Log *log = GetLog(GDBRLog::Memory); if (SendPacketAndWaitForResponse(packet.GetString(), response) != PacketResult::Success || @@ -1200,7 +1198,7 @@ static void ParseOSType(llvm::StringRef value, std::string &os_name, } bool GDBRemoteCommunicationClient::GetHostInfo(bool force) { - Log *log(ProcessGDBRemoteLog::GetLogIfAnyCategoryIsSet(GDBR_LOG_PROCESS)); + Log *log = GetLog(GDBRLog::Process); if (force || m_qHostInfo_is_valid == eLazyBoolCalculate) { // host info computation can require DNS traffic and shelling out to external processes. @@ -2110,8 +2108,7 @@ bool GDBRemoteCommunicationClient::GetProcessInfo( } bool GDBRemoteCommunicationClient::GetCurrentProcessInfo(bool allow_lazy) { - Log *log(ProcessGDBRemoteLog::GetLogIfAnyCategoryIsSet(GDBR_LOG_PROCESS | - GDBR_LOG_PACKETS)); + Log *log(GetLog(GDBRLog::Process | GDBRLog::Packets)); if (allow_lazy) { if (m_qProcessInfo_is_valid == eLazyBoolYes) @@ -2887,8 +2884,7 @@ GDBRemoteCommunicationClient::GetCurrentProcessAndThreadIDs( ids.emplace_back(1, 1); } } else { - Log *log(ProcessGDBRemoteLog::GetLogIfAnyCategoryIsSet(GDBR_LOG_PROCESS | - GDBR_LOG_PACKETS)); + Log *log(GetLog(GDBRLog::Process | GDBRLog::Packets)); LLDB_LOG(log, "error: failed to get packet sequence mutex, not sending " "packet 'qfThreadInfo'"); sequence_mutex_unavailable = true; @@ -3557,7 +3553,7 @@ bool GDBRemoteCommunicationClient::SyncThreadState(lldb::tid_t tid) { llvm::Expected<TraceSupportedResponse> GDBRemoteCommunicationClient::SendTraceSupported(std::chrono::seconds timeout) { - Log *log(ProcessGDBRemoteLog::GetLogIfAllCategoriesSet(GDBR_LOG_PROCESS)); + Log *log = GetLog(GDBRLog::Process); StreamGDBRemote escaped_packet; escaped_packet.PutCString("jLLDBTraceSupported"); @@ -3583,7 +3579,7 @@ GDBRemoteCommunicationClient::SendTraceSupported(std::chrono::seconds timeout) { llvm::Error GDBRemoteCommunicationClient::SendTraceStop(const TraceStopRequest &request, std::chrono::seconds timeout) { - Log *log(ProcessGDBRemoteLog::GetLogIfAllCategoriesSet(GDBR_LOG_PROCESS)); + Log *log = GetLog(GDBRLog::Process); StreamGDBRemote escaped_packet; escaped_packet.PutCString("jLLDBTraceStop:"); @@ -3618,7 +3614,7 @@ GDBRemoteCommunicationClient::SendTraceStop(const TraceStopRequest &request, llvm::Error GDBRemoteCommunicationClient::SendTraceStart(const llvm::json::Value ¶ms, std::chrono::seconds timeout) { - Log *log(ProcessGDBRemoteLog::GetLogIfAllCategoriesSet(GDBR_LOG_PROCESS)); + Log *log = GetLog(GDBRLog::Process); StreamGDBRemote escaped_packet; escaped_packet.PutCString("jLLDBTraceStart:"); @@ -3653,7 +3649,7 @@ GDBRemoteCommunicationClient::SendTraceStart(const llvm::json::Value ¶ms, llvm::Expected<std::string> GDBRemoteCommunicationClient::SendTraceGetState(llvm::StringRef type, std::chrono::seconds timeout) { - Log *log(ProcessGDBRemoteLog::GetLogIfAllCategoriesSet(GDBR_LOG_PROCESS)); + Log *log = GetLog(GDBRLog::Process); StreamGDBRemote escaped_packet; escaped_packet.PutCString("jLLDBTraceGetState:"); @@ -3687,7 +3683,7 @@ GDBRemoteCommunicationClient::SendTraceGetState(llvm::StringRef type, llvm::Expected<std::vector<uint8_t>> GDBRemoteCommunicationClient::SendTraceGetBinaryData( const TraceGetBinaryDataRequest &request, std::chrono::seconds timeout) { - Log *log(ProcessGDBRemoteLog::GetLogIfAllCategoriesSet(GDBR_LOG_PROCESS)); + Log *log = GetLog(GDBRLog::Process); StreamGDBRemote escaped_packet; escaped_packet.PutCString("jLLDBTraceGetBinaryData:"); @@ -4132,8 +4128,7 @@ void GDBRemoteCommunicationClient::ServeSymbolLookups( // our symbol lookup failed so we must abort return; - } else if (Log *log = ProcessGDBRemoteLog::GetLogIfAnyCategoryIsSet( - GDBR_LOG_PROCESS | GDBR_LOG_PACKETS)) { + } else if (Log *log = GetLog(GDBRLog::Process | GDBRLog::Packets)) { LLDB_LOGF(log, "GDBRemoteCommunicationClient::%s: Didn't get sequence mutex.", __FUNCTION__); @@ -4147,7 +4142,7 @@ GDBRemoteCommunicationClient::GetSupportedStructuredDataPlugins() { // Query the server for the array of supported asynchronous JSON packets. m_supported_async_json_packets_is_valid = true; - Log *log(ProcessGDBRemoteLog::GetLogIfAllCategoriesSet(GDBR_LOG_PROCESS)); + Log *log = GetLog(GDBRLog::Process); // Poll it now. StringExtractorGDBRemote response; diff --git a/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationServer.cpp b/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationServer.cpp index 49d88b72b01b..7d21b0ff01da 100644 --- a/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationServer.cpp +++ b/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationServer.cpp @@ -138,7 +138,7 @@ GDBRemoteCommunicationServer::Handle_QErrorStringEnable( GDBRemoteCommunication::PacketResult GDBRemoteCommunicationServer::SendIllFormedResponse( const StringExtractorGDBRemote &failed_packet, const char *message) { - Log *log(ProcessGDBRemoteLog::GetLogIfAllCategoriesSet(GDBR_LOG_PACKETS)); + Log *log = GetLog(GDBRLog::Packets); LLDB_LOGF(log, "GDBRemoteCommunicationServer::%s: ILLFORMED: '%s' (%s)", __FUNCTION__, failed_packet.GetStringRef().data(), message ? message : ""); diff --git a/lldb/source/Plugins/Process/gdb-remote/GDBRemoteRegisterContext.cpp b/lldb/source/Plugins/Process/gdb-remote/GDBRemoteRegisterContext.cpp index 9410c9bd83ec..1b66e8c16281 100644 --- a/lldb/source/Plugins/Process/gdb-remote/GDBRemoteRegisterContext.cpp +++ b/lldb/source/Plugins/Process/gdb-remote/GDBRemoteRegisterContext.cpp @@ -238,8 +238,7 @@ bool GDBRemoteRegisterContext::ReadRegisterBytes(const RegisterInfo *reg_info) { if (GetRegisterIsValid(reg)) return true; } else { - Log *log(ProcessGDBRemoteLog::GetLogIfAnyCategoryIsSet(GDBR_LOG_THREAD | - GDBR_LOG_PACKETS)); + Log *log(GetLog(GDBRLog::Thread | GDBRLog::Packets)); LLDB_LOGF( log, "error: GDBRemoteRegisterContext::ReadRegisterBytes tried " @@ -454,8 +453,7 @@ bool GDBRemoteRegisterContext::WriteRegisterBytes(const RegisterInfo *reg_info, return success; } } else { - Log *log(ProcessGDBRemoteLog::GetLogIfAnyCategoryIsSet(GDBR_LOG_THREAD | - GDBR_LOG_PACKETS)); + Log *log(GetLog(GDBRLog::Thread | GDBRLog::Packets)); if (log) { if (log->GetVerbose()) { StreamString strm; @@ -560,8 +558,7 @@ bool GDBRemoteRegisterContext::ReadAllRegisterValues( return true; } else { - Log *log(ProcessGDBRemoteLog::GetLogIfAnyCategoryIsSet(GDBR_LOG_THREAD | - GDBR_LOG_PACKETS)); + Log *log(GetLog(GDBRLog::Thread | GDBRLog::Packets)); if (log) { if (log->GetVerbose()) { StreamString strm; @@ -736,8 +733,7 @@ bool GDBRemoteRegisterContext::WriteAllRegisterValues( return num_restored > 0; } } else { - Log *log(ProcessGDBRemoteLog::GetLogIfAnyCategoryIsSet(GDBR_LOG_THREAD | - GDBR_LOG_PACKETS)); + Log *log(GetLog(GDBRLog::Thread | GDBRLog::Packets)); if (log) { if (log->GetVerbose()) { StreamString strm; diff --git a/lldb/source/Plugins/Process/gdb-remote/ProcessGDBRemote.cpp b/lldb/source/Plugins/Process/gdb-remote/ProcessGDBRemote.cpp index d8ad0b4e4e4b..82357cd117d7 100644 --- a/lldb/source/Plugins/Process/gdb-remote/ProcessGDBRemote.cpp +++ b/lldb/source/Plugins/Process/gdb-remote/ProcessGDBRemote.cpp @@ -269,7 +269,7 @@ ProcessGDBRemote::ProcessGDBRemote(lldb::TargetSP target_sp, m_gdb_comm.SetPacketRecorder(provider.GetNewPacketRecorder()); } - Log *log(ProcessGDBRemoteLog::GetLogIfAllCategoriesSet(GDBR_LOG_ASYNC)); + Log *log = GetLog(GDBRLog::Async); const uint32_t async_event_mask = eBroadcastBitAsyncContinue | eBroadcastBitAsyncThreadShouldExit; @@ -705,7 +705,7 @@ Status ProcessGDBRemote::WillLaunchOrAttach() { // Process Control Status ProcessGDBRemote::DoLaunch(lldb_private::Module *exe_module, ProcessLaunchInfo &launch_info) { - Log *log(ProcessGDBRemoteLog::GetLogIfAllCategoriesSet(GDBR_LOG_PROCESS)); + Log *log = GetLog(GDBRLog::Process); Status error; LLDB_LOGF(log, "ProcessGDBRemote::%s() entered", __FUNCTION__); @@ -897,7 +897,7 @@ Status ProcessGDBRemote::DoLaunch(lldb_private::Module *exe_module, Status ProcessGDBRemote::ConnectToDebugserver(llvm::StringRef connect_url) { Status error; // Only connect if we have a valid connect URL - Log *log(ProcessGDBRemoteLog::GetLogIfAllCategoriesSet(GDBR_LOG_PROCESS)); + Log *log = GetLog(GDBRLog::Process); if (!connect_url.empty()) { LLDB_LOGF(log, "ProcessGDBRemote::%s Connecting to %s", __FUNCTION__, @@ -958,7 +958,7 @@ Status ProcessGDBRemote::ConnectToDebugserver(llvm::StringRef connect_url) { } void ProcessGDBRemote::DidLaunchOrAttach(ArchSpec &process_arch) { - Log *log(ProcessGDBRemoteLog::GetLogIfAllCategoriesSet(GDBR_LOG_PROCESS)); + Log *log = GetLog(GDBRLog::Process); BuildDynamicRegisterInfo(false); // See if the GDB server supports qHostInfo or qProcessInfo packets. Prefer @@ -1094,7 +1094,7 @@ void ProcessGDBRemote::DidLaunch() { Status ProcessGDBRemote::DoAttachToProcessWithID( lldb::pid_t attach_pid, const ProcessAttachInfo &attach_info) { - Log *log(ProcessGDBRemoteLog::GetLogIfAllCategoriesSet(GDBR_LOG_PROCESS)); + Log *log = GetLog(GDBRLog::Process); Status error; LLDB_LOGF(log, "ProcessGDBRemote::%s()", __FUNCTION__); @@ -1203,7 +1203,7 @@ Status ProcessGDBRemote::WillResume() { Status ProcessGDBRemote::DoResume() { Status error; - Log *log(ProcessGDBRemoteLog::GetLogIfAllCategoriesSet(GDBR_LOG_PROCESS)); + Log *log = GetLog(GDBRLog::Process); LLDB_LOGF(log, "ProcessGDBRemote::Resume()"); ListenerSP listener_sp( @@ -1513,7 +1513,7 @@ bool ProcessGDBRemote::UpdateThreadIDList() { bool ProcessGDBRemote::DoUpdateThreadList(ThreadList &old_thread_list, ThreadList &new_thread_list) { // locker will keep a mutex locked until it goes out of scope - Log *log(ProcessGDBRemoteLog::GetLogIfAllCategoriesSet(GDBR_LOG_THREAD)); + Log *log = GetLog(GDBRLog::Thread); LLDB_LOGV(log, "pid = {0}", GetID()); size_t num_thread_ids = m_thread_ids.size(); @@ -1793,8 +1793,7 @@ ThreadSP ProcessGDBRemote::SetThreadStopInfo( } } if (watch_id == LLDB_INVALID_WATCH_ID) { - Log *log(ProcessGDBRemoteLog::GetLogIfAllCategoriesSet( - GDBR_LOG_WATCHPOINTS)); + Log *log(GetLog(GDBRLog::Watchpoints)); LLDB_LOGF(log, "failed to find watchpoint"); } thread_sp->SetStopInfo(StopInfo::CreateStopReasonWithWatchpointID( @@ -2236,8 +2235,7 @@ StateType ProcessGDBRemote::SetThreadStopInfo(StringExtractor &stop_packet) { } else if (key.compare("library") == 0) { auto error = LoadModules(); if (error) { - Log *log( - ProcessGDBRemoteLog::GetLogIfAllCategoriesSet(GDBR_LOG_PROCESS)); + Log *log(GetLog(GDBRLog::Process)); LLDB_LOG_ERROR(log, std::move(error), "Failed to load modules: {0}"); } } else if (key.compare("fork") == 0 || key.compare("vfork") == 0) { @@ -2245,8 +2243,7 @@ StateType ProcessGDBRemote::SetThreadStopInfo(StringExtractor &stop_packet) { StringExtractorGDBRemote thread_id{value}; auto pid_tid = thread_id.GetPidTid(LLDB_INVALID_PROCESS_ID); if (!pid_tid) { - Log *log( - ProcessGDBRemoteLog::GetLogIfAllCategoriesSet(GDBR_LOG_PROCESS)); + Log *log(GetLog(GDBRLog::Process)); LLDB_LOG(log, "Invalid PID/TID to fork: {0}", value); pid_tid = {{LLDB_INVALID_PROCESS_ID, LLDB_INVALID_THREAD_ID}}; } @@ -2263,7 +2260,7 @@ StateType ProcessGDBRemote::SetThreadStopInfo(StringExtractor &stop_packet) { } if (stop_pid != LLDB_INVALID_PROCESS_ID && stop_pid != pid) { - Log *log(ProcessGDBRemoteLog::GetLogIfAllCategoriesSet(GDBR_LOG_PROCESS)); + Log *log = GetLog(GDBRLog::Process); LLDB_LOG(log, "Received stop for incorrect PID = {0} (inferior PID = {1})", stop_pid, pid); @@ -2350,7 +2347,7 @@ Status ProcessGDBRemote::DoHalt(bool &caused_stop) { Status ProcessGDBRemote::DoDetach(bool keep_stopped) { Status error; - Log *log(ProcessGDBRemoteLog::GetLogIfAllCategoriesSet(GDBR_LOG_PROCESS)); + Log *log = GetLog(GDBRLog::Process); LLDB_LOGF(log, "ProcessGDBRemote::DoDetach(keep_stopped: %i)", keep_stopped); error = m_gdb_comm.Detach(keep_stopped); @@ -2379,7 +2376,7 @@ Status ProcessGDBRemote::DoDetach(bool keep_stopped) { Status ProcessGDBRemote::DoDestroy() { Status error; - Log *log(ProcessGDBRemoteLog::GetLogIfAllCategoriesSet(GDBR_LOG_PROCESS)); + Log *log = GetLog(GDBRLog::Process); LLDB_LOGF(log, "ProcessGDBRemote::DoDestroy()"); // There is a bug in older iOS debugservers where they don't shut down the @@ -2561,7 +2558,7 @@ void ProcessGDBRemote::SetLastStopPacket( const bool did_exec = response.GetStringRef().find(";reason:exec;") != std::string::npos; if (did_exec) { - Log *log(ProcessGDBRemoteLog::GetLogIfAllCategoriesSet(GDBR_LOG_PROCESS)); + Log *log = GetLog(GDBRLog::Process); LLDB_LOGF(log, "ProcessGDBRemote::SetLastStopPacket () - detected exec"); m_thread_list_real.Clear(); @@ -2591,7 +2588,7 @@ addr_t ProcessGDBRemote::GetImageInfoAddress() { if (addr == LLDB_INVALID_ADDRESS) { llvm::Expected<LoadedModuleInfoList> list = GetLoadedModuleList(); if (!list) { - Log *log(ProcessGDBRemoteLog::GetLogIfAllCategoriesSet(GDBR_LOG_PROCESS)); + Log *log = GetLog(GDBRLog::Process); LLDB_LOG_ERROR(log, list.takeError(), "Failed to read module list: {0}."); } else { addr = list->m_link_map; @@ -3036,7 +3033,7 @@ Status ProcessGDBRemote::EnableBreakpointSite(BreakpointSite *bp_site) { assert(bp_site != nullptr); // Get logging info - Log *log(ProcessGDBRemoteLog::GetLogIfAllCategoriesSet(GDBR_LOG_BREAKPOINTS)); + Log *log = GetLog(GDBRLog::Breakpoints); user_id_t site_id = bp_site->GetID(); // Get the breakpoint address @@ -3160,7 +3157,7 @@ Status ProcessGDBRemote::DisableBreakpointSite(BreakpointSite *bp_site) { assert(bp_site != nullptr); addr_t addr = bp_site->GetLoadAddress(); user_id_t site_id = bp_site->GetID(); - Log *log(ProcessGDBRemoteLog::GetLogIfAllCategoriesSet(GDBR_LOG_BREAKPOINTS)); + Log *log = GetLog(GDBRLog::Breakpoints); LLDB_LOGF(log, "ProcessGDBRemote::DisableBreakpointSite (site_id = %" PRIu64 ") addr = 0x%8.8" PRIx64, @@ -3225,8 +3222,7 @@ Status ProcessGDBRemote::EnableWatchpoint(Watchpoint *wp, bool notify) { if (wp) { user_id_t watchID = wp->GetID(); addr_t addr = wp->GetLoadAddress(); - Log *log( - ProcessGDBRemoteLog::GetLogIfAllCategoriesSet(GDBR_LOG_WATCHPOINTS)); + Log *log(GetLog(GDBRLog::Watchpoints)); LLDB_LOGF(log, "ProcessGDBRemote::EnableWatchpoint(watchID = %" PRIu64 ")", watchID); if (wp->IsEnabled()) { @@ -3262,8 +3258,7 @@ Status ProcessGDBRemote::DisableWatchpoint(Watchpoint *wp, bool notify) { if (wp) { user_id_t watchID = wp->GetID(); - Log *log( - ProcessGDBRemoteLog::GetLogIfAllCategoriesSet(GDBR_LOG_WATCHPOINTS)); + Log *log(GetLog(GDBRLog::Watchpoints)); addr_t addr = wp->GetLoadAddress(); @@ -3311,7 +3306,7 @@ void ProcessGDBRemote::Clear() { Status ProcessGDBRemote::DoSignal(int signo) { Status error; - Log *log(ProcessGDBRemoteLog::GetLogIfAllCategoriesSet(GDBR_LOG_PROCESS)); + Log *log = GetLog(GDBRLog::Process); LLDB_LOGF(log, "ProcessGDBRemote::DoSignal (signal = %d)", signo); if (!m_gdb_comm.SendAsyncSignal(signo, GetInterruptTimeout())) @@ -3432,7 +3427,7 @@ Status ProcessGDBRemote::LaunchAndConnectToDebugserver( } if (error.Fail()) { - Log *log(ProcessGDBRemoteLog::GetLogIfAllCategoriesSet(GDBR_LOG_PROCESS)); + Log *log = GetLog(GDBRLog::Process); LLDB_LOGF(log, "failed to start debugserver process: %s", error.AsCString()); @@ -3458,7 +3453,7 @@ bool ProcessGDBRemote::MonitorDebugserverProcess( ) { // "debugserver_pid" argument passed in is the process ID for debugserver // that we are tracking... - Log *log(ProcessGDBRemoteLog::GetLogIfAllCategoriesSet(GDBR_LOG_PROCESS)); + Log *log = GetLog(GDBRLog::Process); const bool handled = true; LLDB_LOGF(log, @@ -3537,7 +3532,7 @@ void ProcessGDBRemote::DebuggerInitialize(Debugger &debugger) { } bool ProcessGDBRemote::StartAsyncThread() { - Log *log(ProcessGDBRemoteLog::GetLogIfAllCategoriesSet(GDBR_LOG_PROCESS)); + Log *log = GetLog(GDBRLog::Process); LLDB_LOGF(log, "ProcessGDBRemote::%s ()", __FUNCTION__); @@ -3565,7 +3560,7 @@ bool ProcessGDBRemote::StartAsyncThread() { } void ProcessGDBRemote::StopAsyncThread() { - Log *log(ProcessGDBRemoteLog::GetLogIfAllCategoriesSet(GDBR_LOG_PROCESS)); + Log *log = GetLog(GDBRLog::Process); LLDB_LOGF(log, "ProcessGDBRemote::%s ()", __FUNCTION__); @@ -3589,7 +3584,7 @@ void ProcessGDBRemote::StopAsyncThread() { thread_result_t ProcessGDBRemote::AsyncThread(void *arg) { ProcessGDBRemote *process = (ProcessGDBRemote *)arg; - Log *log(ProcessGDBRemoteLog::GetLogIfAllCategoriesSet(GDBR_LOG_PROCESS)); + Log *log = GetLog(GDBRLog::Process); LLDB_LOGF(log, "ProcessGDBRemote::%s (arg = %p, pid = %" PRIu64 ") thread starting...", @@ -3792,7 +3787,7 @@ bool ProcessGDBRemote::NewThreadNotifyBreakpointHit( } Status ProcessGDBRemote::UpdateAutomaticSignalFiltering() { - Log *log(ProcessGDBRemoteLog::GetLogIfAllCategoriesSet(GDBR_LOG_PROCESS)); + Log *log = GetLog(GDBRLog::Process); LLDB_LOG(log, "Check if need to update ignored signals"); // QPassSignals package is not supported by the server, there is no way we @@ -3896,9 +3891,7 @@ DataExtractor ProcessGDBRemote::GetAuxvData() { buf = std::make_shared<DataBufferHeap>(response->c_str(), response->length()); else - LLDB_LOG_ERROR( - ProcessGDBRemoteLog::GetLogIfAnyCategoryIsSet(GDBR_LOG_PROCESS), - response.takeError(), "{0}"); + LLDB_LOG_ERROR(GetLog(GDBRLog::Process), response.takeError(), "{0}"); } return DataExtractor(buf, GetByteOrder(), GetAddressByteSize()); } @@ -4086,8 +4079,7 @@ void ProcessGDBRemote::GetMaxMemorySize() { else { // In unlikely scenario that max packet size is less then 70, we will // hope that data being written is small enough to fit. - Log *log(ProcessGDBRemoteLog::GetLogIfAnyCategoryIsSet( - GDBR_LOG_COMM | GDBR_LOG_MEMORY)); + Log *log(GetLog(GDBRLog::Comm | GDBRLog::Memory)); if (log) log->Warning("Packet size is too small. " "LLDB may face problems while writing memory"); @@ -4266,8 +4258,7 @@ bool ParseRegisters(XMLNode feature_node, GdbServerTargetInfo &target_info, SplitCommaSeparatedRegisterNumberString( value, reg_info.invalidate_regs, 0); } else { - Log *log(ProcessGDBRemoteLog::GetLogIfAllCategoriesSet( - GDBR_LOG_PROCESS)); + Log *log(GetLog(GDBRLog::Process)); LLDB_LOGF(log, "ProcessGDBRemote::%s unhandled reg attribute %s = %s", __FUNCTION__, name.data(), value.data()); @@ -4309,8 +4300,7 @@ bool ParseRegisters(XMLNode feature_node, GdbServerTargetInfo &target_info, } if (reg_info.byte_size == 0) { - Log *log( - ProcessGDBRemoteLog::GetLogIfAllCategoriesSet(GDBR_LOG_PROCESS)); + Log *log(GetLog(GDBRLog::Process)); LLDB_LOGF(log, "ProcessGDBRemote::%s Skipping zero bitsize register %s", __FUNCTION__, reg_info.name.AsCString()); @@ -4982,7 +4972,7 @@ static const char *const s_async_json_packet_prefix = "JSON-async:"; static StructuredData::ObjectSP ParseStructuredDataPacket(llvm::StringRef packet) { - Log *log(ProcessGDBRemoteLog::GetLogIfAllCategoriesSet(GDBR_LOG_PROCESS)); + Log *log = GetLog(GDBRLog::Process); if (!packet.consume_front(s_async_json_packet_prefix)) { if (log) { @@ -5355,7 +5345,7 @@ void ProcessGDBRemote::DidForkSwitchHardwareTraps(bool enable) { } void ProcessGDBRemote::DidFork(lldb::pid_t child_pid, lldb::tid_t child_tid) { - Log *log(ProcessGDBRemoteLog::GetLogIfAllCategoriesSet(GDBR_LOG_PROCESS)); + Log *log = GetLog(GDBRLog::Process); lldb::pid_t parent_pid = m_gdb_comm.GetCurrentProcessID(); // Any valid TID will suffice, thread-relevant actions will set a proper TID @@ -5417,7 +5407,7 @@ void ProcessGDBRemote::DidFork(lldb::pid_t child_pid, lldb::tid_t child_tid) { } void ProcessGDBRemote::DidVFork(lldb::pid_t child_pid, lldb::tid_t child_tid) { - Log *log(ProcessGDBRemoteLog::GetLogIfAllCategoriesSet(GDBR_LOG_PROCESS)); + Log *log = GetLog(GDBRLog::Process); assert(!m_vfork_in_progress); m_vfork_in_progress = true; diff --git a/lldb/source/Plugins/Process/gdb-remote/ProcessGDBRemoteLog.h b/lldb/source/Plugins/Process/gdb-remote/ProcessGDBRemoteLog.h index 44e390ec8cad..730384204393 100644 --- a/lldb/source/Plugins/Process/gdb-remote/ProcessGDBRemoteLog.h +++ b/lldb/source/Plugins/Process/gdb-remote/ProcessGDBRemoteLog.h @@ -29,28 +29,9 @@ enum class GDBRLog : Log::MaskType { LLVM_MARK_AS_BITMASK_ENUM(Watchpoints) }; -#define GDBR_LOG_PROCESS ::lldb_private::process_gdb_remote::GDBRLog::Process -#define GDBR_LOG_THREAD ::lldb_private::process_gdb_remote::GDBRLog::Thread -#define GDBR_LOG_PACKETS ::lldb_private::process_gdb_remote::GDBRLog::Packets -#define GDBR_LOG_MEMORY ::lldb_private::process_gdb_remote::GDBRLog::Memory -#define GDBR_LOG_MEMORY_DATA_SHORT \ - ::lldb_private::process_gdb_remote::GDBRLog::MemoryDataShort -#define GDBR_LOG_MEMORY_DATA_LONG \ - ::lldb_private::process_gdb_remote::GDBRLog::MemoryDataLong -#define GDBR_LOG_BREAKPOINTS \ - ::lldb_private::process_gdb_remote::GDBRLog::Breakpoints -#define GDBR_LOG_WATCHPOINTS \ - ::lldb_private::process_gdb_remote::GDBRLog::Watchpoints -#define GDBR_LOG_STEP ::lldb_private::process_gdb_remote::GDBRLog::Step -#define GDBR_LOG_COMM ::lldb_private::process_gdb_remote::GDBRLog::Comm -#define GDBR_LOG_ASYNC ::lldb_private::process_gdb_remote::GDBRLog::Async - class ProcessGDBRemoteLog { public: static void Initialize(); - - static Log *GetLogIfAllCategoriesSet(GDBRLog mask) { return GetLog(mask); } - static Log *GetLogIfAnyCategoryIsSet(GDBRLog mask) { return GetLog(mask); } }; } // namespace process_gdb_remote diff --git a/lldb/source/Plugins/Process/gdb-remote/ThreadGDBRemote.h b/lldb/source/Plugins/Process/gdb-remote/ThreadGDBRemote.h index fb83c74fd2c5..5bc90a3dedce 100644 --- a/lldb/source/Plugins/Process/gdb-remote/ThreadGDBRemote.h +++ b/lldb/source/Plugins/Process/gdb-remote/ThreadGDBRemote.h @@ -90,9 +90,6 @@ public: StructuredData::ObjectSP FetchThreadExtendedInfo() override; - llvm::Expected<std::unique_ptr<llvm::MemoryBuffer>> - GetSiginfo(size_t max_size) const override; - protected: friend class ProcessGDBRemote; @@ -118,6 +115,9 @@ protected: void SetStopInfoFromPacket(StringExtractor &stop_packet, uint32_t stop_id); bool CalculateStopInfo() override; + + llvm::Expected<std::unique_ptr<llvm::MemoryBuffer>> + GetSiginfo(size_t max_size) const override; }; } // namespace process_gdb_remote diff --git a/lldb/source/Plugins/SymbolFile/DWARF/AppleDWARFIndex.cpp b/lldb/source/Plugins/SymbolFile/DWARF/AppleDWARFIndex.cpp index 4e09b523b778..ec4057efbbc5 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/AppleDWARFIndex.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/AppleDWARFIndex.cpp @@ -122,8 +122,7 @@ void AppleDWARFIndex::GetTypes( if (!m_apple_types_up) return; - Log *log = LogChannelDWARF::GetLogIfAny(DWARF_LOG_TYPE_COMPLETION | - DWARF_LOG_LOOKUPS); + Log *log = GetLog(DWARFLog::TypeCompletion | DWARFLog::Lookups); const bool has_tag = m_apple_types_up->GetHeader().header_data.ContainsAtom( DWARFMappedHash::eAtomTypeTag); const bool has_qualified_name_hash = diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp index be555c130bfe..2daffecee58e 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp @@ -443,8 +443,7 @@ TypeSP DWARFASTParserClang::ParseTypeFromDWARF(const SymbolContext &sc, if (!die) return nullptr; - Log *log(LogChannelDWARF::GetLogIfAny(DWARF_LOG_TYPE_COMPLETION | - DWARF_LOG_LOOKUPS)); + Log *log = GetLog(DWARFLog::TypeCompletion | DWARFLog::Lookups); SymbolFileDWARF *dwarf = die.GetDWARF(); if (log) { @@ -548,8 +547,7 @@ lldb::TypeSP DWARFASTParserClang::ParseTypeModifier(const SymbolContext &sc, const DWARFDIE &die, ParsedDWARFTypeAttributes &attrs) { - Log *log(LogChannelDWARF::GetLogIfAny(DWARF_LOG_TYPE_COMPLETION | - DWARF_LOG_LOOKUPS)); + Log *log = GetLog(DWARFLog::TypeCompletion | DWARFLog::Lookups); SymbolFileDWARF *dwarf = die.GetDWARF(); const dw_tag_t tag = die.Tag(); LanguageType cu_language = SymbolFileDWARF::GetLanguage(*die.GetCU()); @@ -771,8 +769,7 @@ DWARFASTParserClang::ParseTypeModifier(const SymbolContext &sc, TypeSP DWARFASTParserClang::ParseEnum(const SymbolContext &sc, const DWARFDIE &die, ParsedDWARFTypeAttributes &attrs) { - Log *log(LogChannelDWARF::GetLogIfAny(DWARF_LOG_TYPE_COMPLETION | - DWARF_LOG_LOOKUPS)); + Log *log = GetLog(DWARFLog::TypeCompletion | DWARFLog::Lookups); SymbolFileDWARF *dwarf = die.GetDWARF(); const dw_tag_t tag = die.Tag(); TypeSP type_sp; @@ -900,8 +897,7 @@ ConvertDWARFCallingConventionToClang(const ParsedDWARFTypeAttributes &attrs) { break; } - Log *log(LogChannelDWARF::GetLogIfAny(DWARF_LOG_TYPE_COMPLETION | - DWARF_LOG_LOOKUPS)); + Log *log = GetLog(DWARFLog::TypeCompletion | DWARFLog::Lookups); LLDB_LOG(log, "Unsupported DW_AT_calling_convention value: {0}", attrs.calling_convention); // Use the default calling convention as a fallback. @@ -910,8 +906,7 @@ ConvertDWARFCallingConventionToClang(const ParsedDWARFTypeAttributes &attrs) { TypeSP DWARFASTParserClang::ParseSubroutine(const DWARFDIE &die, ParsedDWARFTypeAttributes &attrs) { - Log *log(LogChannelDWARF::GetLogIfAny(DWARF_LOG_TYPE_COMPLETION | - DWARF_LOG_LOOKUPS)); + Log *log = GetLog(DWARFLog::TypeCompletion | DWARFLog::Lookups); SymbolFileDWARF *dwarf = die.GetDWARF(); const dw_tag_t tag = die.Tag(); @@ -1562,8 +1557,7 @@ DWARFASTParserClang::ParseStructureLikeDIE(const SymbolContext &sc, const dw_tag_t tag = die.Tag(); SymbolFileDWARF *dwarf = die.GetDWARF(); LanguageType cu_language = SymbolFileDWARF::GetLanguage(*die.GetCU()); - Log *log = LogChannelDWARF::GetLogIfAll(DWARF_LOG_TYPE_COMPLETION | - DWARF_LOG_LOOKUPS); + Log *log = GetLog(DWARFLog::TypeCompletion | DWARFLog::Lookups); // UniqueDWARFASTType is large, so don't create a local variables on the // stack, put it on the heap. This function is often called recursively and @@ -2214,12 +2208,6 @@ bool DWARFASTParserClang::CompleteTypeFromDWARF(const DWARFDIE &die, const dw_tag_t tag = die.Tag(); - Log *log = - nullptr; // (LogChannelDWARF::GetLogIfAny(DWARF_LOG_DEBUG_INFO|DWARF_LOG_TYPE_COMPLETION)); - if (log) - dwarf->GetObjectFile()->GetModule()->LogMessageVerboseBacktrace( - log, "0x%8.8" PRIx64 ": %s '%s' resolving forward declaration...", - die.GetID(), die.GetTagAsCString(), type->GetName().AsCString()); assert(clang_type); DWARFAttributes attributes; switch (tag) { @@ -3452,30 +3440,6 @@ DWARFASTParserClang::ResolveNamespaceDIE(const DWARFDIE &die) { namespace_decl = m_ast.GetUniqueNamespaceDeclaration( namespace_name, containing_decl_ctx, GetOwningClangModule(die), is_inline); - Log *log = - nullptr; // (LogChannelDWARF::GetLogIfAll(DWARF_LOG_DEBUG_INFO)); - if (log) { - SymbolFileDWARF *dwarf = die.GetDWARF(); - if (namespace_name) { - dwarf->GetObjectFile()->GetModule()->LogMessage( - log, - "ASTContext => %p: 0x%8.8" PRIx64 - ": DW_TAG_namespace with DW_AT_name(\"%s\") => " - "clang::NamespaceDecl *%p (original = %p)", - static_cast<void *>(&m_ast.getASTContext()), die.GetID(), - namespace_name, static_cast<void *>(namespace_decl), - static_cast<void *>(namespace_decl->getOriginalNamespace())); - } else { - dwarf->GetObjectFile()->GetModule()->LogMessage( - log, - "ASTContext => %p: 0x%8.8" PRIx64 - ": DW_TAG_namespace (anonymous) => clang::NamespaceDecl *%p " - "(original = %p)", - static_cast<void *>(&m_ast.getASTContext()), die.GetID(), - static_cast<void *>(namespace_decl), - static_cast<void *>(namespace_decl->getOriginalNamespace())); - } - } if (namespace_decl) LinkDeclContextToDIE((clang::DeclContext *)namespace_decl, die); @@ -3580,23 +3544,12 @@ bool DWARFASTParserClang::CopyUniqueClassMethodTypes( } const uint32_t src_size = src_name_to_die.GetSize(); const uint32_t dst_size = dst_name_to_die.GetSize(); - Log *log = nullptr; // (LogChannelDWARF::GetLogIfAny(DWARF_LOG_DEBUG_INFO | - // DWARF_LOG_TYPE_COMPLETION)); // Is everything kosher so we can go through the members at top speed? bool fast_path = true; - if (src_size != dst_size) { - if (src_size != 0 && dst_size != 0) { - LLDB_LOGF(log, - "warning: trying to unique class DIE 0x%8.8x to 0x%8.8x, " - "but they didn't have the same size (src=%d, dst=%d)", - src_class_die.GetOffset(), dst_class_die.GetOffset(), src_size, - dst_size); - } - + if (src_size != dst_size) fast_path = false; - } uint32_t idx; @@ -3605,15 +3558,8 @@ bool DWARFASTParserClang::CopyUniqueClassMethodTypes( src_die = src_name_to_die.GetValueAtIndexUnchecked(idx); dst_die = dst_name_to_die.GetValueAtIndexUnchecked(idx); - if (src_die.Tag() != dst_die.Tag()) { - LLDB_LOGF(log, - "warning: tried to unique class DIE 0x%8.8x to 0x%8.8x, " - "but 0x%8.8x (%s) tags didn't match 0x%8.8x (%s)", - src_class_die.GetOffset(), dst_class_die.GetOffset(), - src_die.GetOffset(), src_die.GetTagAsCString(), - dst_die.GetOffset(), dst_die.GetTagAsCString()); + if (src_die.Tag() != dst_die.Tag()) fast_path = false; - } const char *src_name = src_die.GetMangledName(); const char *dst_name = dst_die.GetMangledName(); @@ -3622,12 +3568,6 @@ bool DWARFASTParserClang::CopyUniqueClassMethodTypes( if (src_name == dst_name || (strcmp(src_name, dst_name) == 0)) continue; - LLDB_LOGF(log, - "warning: tried to unique class DIE 0x%8.8x to 0x%8.8x, " - "but 0x%8.8x (%s) names didn't match 0x%8.8x (%s)", - src_class_die.GetOffset(), dst_class_die.GetOffset(), - src_die.GetOffset(), src_name, dst_die.GetOffset(), dst_name); - fast_path = false; } } @@ -3649,33 +3589,13 @@ bool DWARFASTParserClang::CopyUniqueClassMethodTypes( clang::DeclContext *src_decl_ctx = src_dwarf_ast_parser->m_die_to_decl_ctx[src_die.GetDIE()]; - if (src_decl_ctx) { - LLDB_LOGF(log, "uniquing decl context %p from 0x%8.8x for 0x%8.8x", - static_cast<void *>(src_decl_ctx), src_die.GetOffset(), - dst_die.GetOffset()); + if (src_decl_ctx) dst_dwarf_ast_parser->LinkDeclContextToDIE(src_decl_ctx, dst_die); - } else { - LLDB_LOGF(log, - "warning: tried to unique decl context from 0x%8.8x for " - "0x%8.8x, but none was found", - src_die.GetOffset(), dst_die.GetOffset()); - } Type *src_child_type = dst_die.GetDWARF()->GetDIEToType()[src_die.GetDIE()]; - if (src_child_type) { - LLDB_LOGF(log, - "uniquing type %p (uid=0x%" PRIx64 - ") from 0x%8.8x for 0x%8.8x", - static_cast<void *>(src_child_type), src_child_type->GetID(), - src_die.GetOffset(), dst_die.GetOffset()); + if (src_child_type) dst_die.GetDWARF()->GetDIEToType()[dst_die.GetDIE()] = src_child_type; - } else { - LLDB_LOGF(log, - "warning: tried to unique lldb_private::Type from " - "0x%8.8x for 0x%8.8x, but none was found", - src_die.GetOffset(), dst_die.GetOffset()); - } } } else { // We must do this slowly. For each member of the destination, look up a @@ -3693,38 +3613,16 @@ bool DWARFASTParserClang::CopyUniqueClassMethodTypes( if (src_die && (src_die.Tag() == dst_die.Tag())) { clang::DeclContext *src_decl_ctx = src_dwarf_ast_parser->m_die_to_decl_ctx[src_die.GetDIE()]; - if (src_decl_ctx) { - LLDB_LOGF(log, "uniquing decl context %p from 0x%8.8x for 0x%8.8x", - static_cast<void *>(src_decl_ctx), src_die.GetOffset(), - dst_die.GetOffset()); + if (src_decl_ctx) dst_dwarf_ast_parser->LinkDeclContextToDIE(src_decl_ctx, dst_die); - } else { - LLDB_LOGF(log, - "warning: tried to unique decl context from 0x%8.8x " - "for 0x%8.8x, but none was found", - src_die.GetOffset(), dst_die.GetOffset()); - } Type *src_child_type = dst_die.GetDWARF()->GetDIEToType()[src_die.GetDIE()]; if (src_child_type) { - LLDB_LOGF( - log, - "uniquing type %p (uid=0x%" PRIx64 ") from 0x%8.8x for 0x%8.8x", - static_cast<void *>(src_child_type), src_child_type->GetID(), - src_die.GetOffset(), dst_die.GetOffset()); dst_die.GetDWARF()->GetDIEToType()[dst_die.GetDIE()] = src_child_type; - } else { - LLDB_LOGF(log, - "warning: tried to unique lldb_private::Type from " - "0x%8.8x for 0x%8.8x, but none was found", - src_die.GetOffset(), dst_die.GetOffset()); } } else { - LLDB_LOGF(log, "warning: couldn't find a match for 0x%8.8x", - dst_die.GetOffset()); - failures.push_back(dst_die); } } @@ -3748,47 +3646,20 @@ bool DWARFASTParserClang::CopyUniqueClassMethodTypes( // Both classes have the artificial types, link them clang::DeclContext *src_decl_ctx = src_dwarf_ast_parser->m_die_to_decl_ctx[src_die.GetDIE()]; - if (src_decl_ctx) { - LLDB_LOGF(log, "uniquing decl context %p from 0x%8.8x for 0x%8.8x", - static_cast<void *>(src_decl_ctx), src_die.GetOffset(), - dst_die.GetOffset()); + if (src_decl_ctx) dst_dwarf_ast_parser->LinkDeclContextToDIE(src_decl_ctx, dst_die); - } else { - LLDB_LOGF(log, - "warning: tried to unique decl context from 0x%8.8x " - "for 0x%8.8x, but none was found", - src_die.GetOffset(), dst_die.GetOffset()); - } Type *src_child_type = dst_die.GetDWARF()->GetDIEToType()[src_die.GetDIE()]; - if (src_child_type) { - LLDB_LOGF( - log, - "uniquing type %p (uid=0x%" PRIx64 ") from 0x%8.8x for 0x%8.8x", - static_cast<void *>(src_child_type), src_child_type->GetID(), - src_die.GetOffset(), dst_die.GetOffset()); + if (src_child_type) dst_die.GetDWARF()->GetDIEToType()[dst_die.GetDIE()] = src_child_type; - } else { - LLDB_LOGF(log, - "warning: tried to unique lldb_private::Type from " - "0x%8.8x for 0x%8.8x, but none was found", - src_die.GetOffset(), dst_die.GetOffset()); - } } } } if (dst_size_artificial) { for (idx = 0; idx < dst_size_artificial; ++idx) { - ConstString dst_name_artificial = - dst_name_to_die_artificial.GetCStringAtIndex(idx); dst_die = dst_name_to_die_artificial.GetValueAtIndexUnchecked(idx); - LLDB_LOGF(log, - "warning: need to create artificial method for 0x%8.8x for " - "method '%s'", - dst_die.GetOffset(), dst_name_artificial.GetCString()); - failures.push_back(dst_die); } } diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugArangeSet.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugArangeSet.cpp index ce514381ee39..03cbfd28ae74 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugArangeSet.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugArangeSet.cpp @@ -138,7 +138,7 @@ llvm::Error DWARFDebugArangeSet::extract(const DWARFDataExtractor &data, } } if (num_terminators > 1) { - Log *log = LogChannelDWARF::GetLogIfAll(DWARF_LOG_DEBUG_INFO); + Log *log = GetLog(DWARFLog::DebugInfo); LLDB_LOG(log, "warning: DWARFDebugArangeSet at %#" PRIx64 " contains %u " "terminator entries", diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugAranges.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugAranges.cpp index 65923cb4ad6b..a37499175858 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugAranges.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugAranges.cpp @@ -40,7 +40,7 @@ void DWARFDebugAranges::extract(const DWARFDataExtractor &debug_aranges_data) { while (debug_aranges_data.ValidOffset(offset)) { const lldb::offset_t set_offset = offset; if (llvm::Error error = set.extract(debug_aranges_data, &offset)) { - Log *log = LogChannelDWARF::GetLogIfAll(DWARF_LOG_DEBUG_INFO); + Log *log = GetLog(DWARFLog::DebugInfo); LLDB_LOG_ERROR(log, std::move(error), "DWARFDebugAranges::extract failed to extract " ".debug_aranges set at offset %#" PRIx64, diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DebugNamesDWARFIndex.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DebugNamesDWARFIndex.cpp index 4a148e7744bb..2350c8fc3d5b 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DebugNamesDWARFIndex.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/DebugNamesDWARFIndex.cpp @@ -77,7 +77,7 @@ void DebugNamesDWARFIndex::MaybeLogLookupError(llvm::Error error, llvm::StringRef name) { // Ignore SentinelErrors, log everything else. LLDB_LOG_ERROR( - LogChannelDWARF::GetLogIfAll(DWARF_LOG_LOOKUPS), + GetLog(DWARFLog::Lookups), handleErrors(std::move(error), [](const DebugNames::SentinelError &) {}), "Failed to parse index entries for index at {1:x}, name {2}: {0}", ni.getUnitOffset(), name); diff --git a/lldb/source/Plugins/SymbolFile/DWARF/LogChannelDWARF.h b/lldb/source/Plugins/SymbolFile/DWARF/LogChannelDWARF.h index 8076c719e9c4..aa3ed4afed25 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/LogChannelDWARF.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/LogChannelDWARF.h @@ -21,19 +21,11 @@ enum class DWARFLog : Log::MaskType { TypeCompletion = Log::ChannelFlag<4>, LLVM_MARK_AS_BITMASK_ENUM(TypeCompletion) }; -#define DWARF_LOG_DEBUG_INFO ::lldb_private::DWARFLog::DebugInfo -#define DWARF_LOG_DEBUG_LINE ::lldb_private::DWARFLog::DebugLine -#define DWARF_LOG_LOOKUPS ::lldb_private::DWARFLog::Lookups -#define DWARF_LOG_TYPE_COMPLETION ::lldb_private::DWARFLog::TypeCompletion -#define DWARF_LOG_DEBUG_MAP ::lldb_private::DWARFLog::DebugMap class LogChannelDWARF { public: static void Initialize(); static void Terminate(); - - static Log *GetLogIfAll(DWARFLog mask) { return GetLog(mask); } - static Log *GetLogIfAny(DWARFLog mask) { return GetLog(mask); } }; template <> Log::Channel &LogChannelFor<DWARFLog>(); diff --git a/lldb/source/Plugins/SymbolFile/DWARF/ManualDWARFIndex.cpp b/lldb/source/Plugins/SymbolFile/DWARF/ManualDWARFIndex.cpp index e15a22affcb2..e8fbd5dd664b 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/ManualDWARFIndex.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/ManualDWARFIndex.cpp @@ -136,7 +136,7 @@ void ManualDWARFIndex::Index() { void ManualDWARFIndex::IndexUnit(DWARFUnit &unit, SymbolFileDWARFDwo *dwp, IndexSet &set) { - Log *log = LogChannelDWARF::GetLogIfAll(DWARF_LOG_LOOKUPS); + Log *log = GetLog(DWARFLog::Lookups); if (log) { m_module.LogMessage( diff --git a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp index 02d1a6a4a8be..027a4caf5555 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp @@ -138,7 +138,7 @@ static const llvm::DWARFDebugLine::LineTable * ParseLLVMLineTable(lldb_private::DWARFContext &context, llvm::DWARFDebugLine &line, dw_offset_t line_offset, dw_offset_t unit_offset) { - Log *log = LogChannelDWARF::GetLogIfAll(DWARF_LOG_DEBUG_INFO); + Log *log = GetLog(DWARFLog::DebugInfo); llvm::DWARFDataExtractor data = context.getOrLoadLineData().GetAsLLVM(); llvm::DWARFContext &ctx = context.GetAsLLVM(); @@ -162,7 +162,7 @@ static bool ParseLLVMLineTablePrologue(lldb_private::DWARFContext &context, llvm::DWARFDebugLine::Prologue &prologue, dw_offset_t line_offset, dw_offset_t unit_offset) { - Log *log = LogChannelDWARF::GetLogIfAll(DWARF_LOG_DEBUG_INFO); + Log *log = GetLog(DWARFLog::DebugInfo); bool success = true; llvm::DWARFDataExtractor data = context.getOrLoadLineData().GetAsLLVM(); llvm::DWARFContext &ctx = context.GetAsLLVM(); @@ -443,7 +443,7 @@ SymbolFileDWARF::GetTypeSystemForLanguage(LanguageType language) { } void SymbolFileDWARF::InitializeObject() { - Log *log = LogChannelDWARF::GetLogIfAll(DWARF_LOG_DEBUG_INFO); + Log *log = GetLog(DWARFLog::DebugInfo); InitializeFirstCodeAddress(); @@ -622,7 +622,7 @@ DWARFDebugAbbrev *SymbolFileDWARF::DebugAbbrev() { auto abbr = std::make_unique<DWARFDebugAbbrev>(); llvm::Error error = abbr->parse(debug_abbrev_data); if (error) { - Log *log = LogChannelDWARF::GetLogIfAll(DWARF_LOG_DEBUG_INFO); + Log *log = GetLog(DWARFLog::DebugInfo); LLDB_LOG_ERROR(log, std::move(error), "Unable to read .debug_abbrev section: {0}"); return nullptr; @@ -1030,7 +1030,7 @@ SymbolFileDWARF::GetTypeUnitSupportFiles(DWARFTypeUnit &tu) { llvm::DWARFContext &ctx = m_context.GetAsLLVM(); llvm::DWARFDebugLine::Prologue prologue; auto report = [](llvm::Error error) { - Log *log = LogChannelDWARF::GetLogIfAll(DWARF_LOG_DEBUG_INFO); + Log *log = GetLog(DWARFLog::DebugInfo); LLDB_LOG_ERROR(log, std::move(error), "SymbolFileDWARF::GetTypeUnitSupportFiles failed to parse " "the line table prologue"); @@ -1488,7 +1488,7 @@ Type *SymbolFileDWARF::ResolveTypeUID(const DIERef &die_ref) { Type *SymbolFileDWARF::ResolveTypeUID(const DWARFDIE &die, bool assert_not_being_parsed) { if (die) { - Log *log(LogChannelDWARF::GetLogIfAll(DWARF_LOG_DEBUG_INFO)); + Log *log = GetLog(DWARFLog::DebugInfo); if (log) GetObjectFile()->GetModule()->LogMessage( log, "SymbolFileDWARF::ResolveTypeUID (die = 0x%8.8x) %s '%s'", @@ -1579,8 +1579,7 @@ bool SymbolFileDWARF::CompleteType(CompilerType &compiler_type) { Type *type = GetDIEToType().lookup(dwarf_die.GetDIE()); - Log *log(LogChannelDWARF::GetLogIfAny(DWARF_LOG_DEBUG_INFO | - DWARF_LOG_TYPE_COMPLETION)); + Log *log = GetLog(DWARFLog::DebugInfo | DWARFLog::TypeCompletion); if (log) GetObjectFile()->GetModule()->LogMessageVerboseBacktrace( log, "0x%8.8" PRIx64 ": %s '%s' resolving forward declaration...", @@ -2109,7 +2108,7 @@ bool SymbolFileDWARF::DeclContextMatchesThisSymbolFile( return true; // The type systems match, return true // The namespace AST was valid, and it does not match... - Log *log(LogChannelDWARF::GetLogIfAll(DWARF_LOG_LOOKUPS)); + Log *log = GetLog(DWARFLog::Lookups); if (log) GetObjectFile()->GetModule()->LogMessage( @@ -2122,7 +2121,7 @@ void SymbolFileDWARF::FindGlobalVariables( ConstString name, const CompilerDeclContext &parent_decl_ctx, uint32_t max_matches, VariableList &variables) { std::lock_guard<std::recursive_mutex> guard(GetModuleMutex()); - Log *log(LogChannelDWARF::GetLogIfAll(DWARF_LOG_LOOKUPS)); + Log *log = GetLog(DWARFLog::Lookups); if (log) GetObjectFile()->GetModule()->LogMessage( @@ -2204,7 +2203,7 @@ void SymbolFileDWARF::FindGlobalVariables(const RegularExpression ®ex, uint32_t max_matches, VariableList &variables) { std::lock_guard<std::recursive_mutex> guard(GetModuleMutex()); - Log *log(LogChannelDWARF::GetLogIfAll(DWARF_LOG_LOOKUPS)); + Log *log = GetLog(DWARFLog::Lookups); if (log) { GetObjectFile()->GetModule()->LogMessage( @@ -2316,7 +2315,7 @@ void SymbolFileDWARF::FindFunctions(ConstString name, // Module::LookupInfo::LookupInfo() assert((name_type_mask & eFunctionNameTypeAuto) == 0); - Log *log(LogChannelDWARF::GetLogIfAll(DWARF_LOG_LOOKUPS)); + Log *log = GetLog(DWARFLog::Lookups); if (log) { GetObjectFile()->GetModule()->LogMessage( @@ -2366,7 +2365,7 @@ void SymbolFileDWARF::FindFunctions(const RegularExpression ®ex, LLDB_SCOPED_TIMERF("SymbolFileDWARF::FindFunctions (regex = '%s')", regex.GetText().str().c_str()); - Log *log(LogChannelDWARF::GetLogIfAll(DWARF_LOG_LOOKUPS)); + Log *log = GetLog(DWARFLog::Lookups); if (log) { GetObjectFile()->GetModule()->LogMessage( @@ -2414,7 +2413,7 @@ void SymbolFileDWARF::FindTypes( if (!searched_symbol_files.insert(this).second) return; - Log *log(LogChannelDWARF::GetLogIfAll(DWARF_LOG_LOOKUPS)); + Log *log = GetLog(DWARFLog::Lookups); if (log) { if (parent_decl_ctx) @@ -2529,7 +2528,7 @@ CompilerDeclContext SymbolFileDWARF::FindNamespace(ConstString name, const CompilerDeclContext &parent_decl_ctx) { std::lock_guard<std::recursive_mutex> guard(GetModuleMutex()); - Log *log(LogChannelDWARF::GetLogIfAll(DWARF_LOG_LOOKUPS)); + Log *log = GetLog(DWARFLog::Lookups); if (log) { GetObjectFile()->GetModule()->LogMessage( @@ -2851,8 +2850,7 @@ TypeSP SymbolFileDWARF::FindDefinitionTypeForDWARFDeclContext( const dw_tag_t tag = dwarf_decl_ctx[0].tag; if (type_name) { - Log *log(LogChannelDWARF::GetLogIfAny(DWARF_LOG_TYPE_COMPLETION | - DWARF_LOG_LOOKUPS)); + Log *log = GetLog(DWARFLog::TypeCompletion | DWARFLog::Lookups); if (log) { GetObjectFile()->GetModule()->LogMessage( log, diff --git a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARFDebugMap.cpp b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARFDebugMap.cpp index 6ee189e04250..08bfe37fd92f 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARFDebugMap.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARFDebugMap.cpp @@ -61,7 +61,7 @@ SymbolFileDWARFDebugMap::CompileUnitInfo::GetFileRangeMap( if (!oso_objfile) return file_range_map; - Log *log(LogChannelDWARF::GetLogIfAll(DWARF_LOG_DEBUG_MAP)); + Log *log = GetLog(DWARFLog::DebugMap); LLDB_LOGF( log, "%p: SymbolFileDWARFDebugMap::CompileUnitInfo::GetFileRangeMap ('%s')", @@ -281,7 +281,7 @@ void SymbolFileDWARFDebugMap::InitOSO() { Symtab *symtab = m_objfile_sp->GetSymtab(); if (symtab) { - Log *log(LogChannelDWARF::GetLogIfAll(DWARF_LOG_DEBUG_MAP)); + Log *log = GetLog(DWARFLog::DebugMap); std::vector<uint32_t> oso_indexes; // When a mach-o symbol is encoded, the n_type field is encoded in bits diff --git a/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp b/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp index 51b34669ebad..418f613e5cc9 100644 --- a/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp +++ b/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp @@ -154,7 +154,7 @@ void addOverridesForMethod(clang::CXXMethodDecl *decl) { [&decls, decl](const clang::CXXBaseSpecifier *specifier, clang::CXXBasePath &path) { if (auto *base_record = llvm::dyn_cast<clang::CXXRecordDecl>( - specifier->getType()->getAs<clang::RecordType>()->getDecl())) { + specifier->getType()->castAs<clang::RecordType>()->getDecl())) { clang::DeclarationName name = decl->getDeclName(); @@ -3175,7 +3175,7 @@ bool TypeSystemClang::IsBlockPointerType( if (qual_type->isBlockPointerType()) { if (function_pointer_type_ptr) { const clang::BlockPointerType *block_pointer_type = - qual_type->getAs<clang::BlockPointerType>(); + qual_type->castAs<clang::BlockPointerType>(); QualType pointee_type = block_pointer_type->getPointeeType(); QualType function_pointer_type = m_ast_up->getPointerType(pointee_type); *function_pointer_type_ptr = @@ -3817,13 +3817,13 @@ TypeSystemClang::GetTypeInfo(lldb::opaque_compiler_type_t type, const clang::Type::TypeClass type_class = qual_type->getTypeClass(); switch (type_class) { case clang::Type::Attributed: - return GetTypeInfo( - qual_type->getAs<clang::AttributedType>() - ->getModifiedType().getAsOpaquePtr(), - pointee_or_element_clang_type); + return GetTypeInfo(qual_type->castAs<clang::AttributedType>() + ->getModifiedType() + .getAsOpaquePtr(), + pointee_or_element_clang_type); case clang::Type::Builtin: { - const clang::BuiltinType *builtin_type = llvm::dyn_cast<clang::BuiltinType>( - qual_type->getCanonicalTypeInternal()); + const clang::BuiltinType *builtin_type = + llvm::cast<clang::BuiltinType>(qual_type->getCanonicalTypeInternal()); uint32_t builtin_type_flags = eTypeIsBuiltIn | eTypeHasValue; switch (builtin_type->getKind()) { @@ -4359,7 +4359,7 @@ TypeSystemClang::GetNumMemberFunctions(lldb::opaque_compiler_type_t type) { case clang::Type::ObjCObjectPointer: { const clang::ObjCObjectPointerType *objc_class_type = - qual_type->getAs<clang::ObjCObjectPointerType>(); + qual_type->castAs<clang::ObjCObjectPointerType>(); const clang::ObjCInterfaceType *objc_interface_type = objc_class_type->getInterfaceType(); if (objc_interface_type && @@ -4443,7 +4443,7 @@ TypeSystemClang::GetMemberFunctionAtIndex(lldb::opaque_compiler_type_t type, case clang::Type::ObjCObjectPointer: { const clang::ObjCObjectPointerType *objc_class_type = - qual_type->getAs<clang::ObjCObjectPointerType>(); + qual_type->castAs<clang::ObjCObjectPointerType>(); const clang::ObjCInterfaceType *objc_interface_type = objc_class_type->getInterfaceType(); if (objc_interface_type && @@ -5596,7 +5596,7 @@ uint32_t TypeSystemClang::GetNumFields(lldb::opaque_compiler_type_t type) { case clang::Type::ObjCObjectPointer: { const clang::ObjCObjectPointerType *objc_class_type = - qual_type->getAs<clang::ObjCObjectPointerType>(); + qual_type->castAs<clang::ObjCObjectPointerType>(); const clang::ObjCInterfaceType *objc_interface_type = objc_class_type->getInterfaceType(); if (objc_interface_type && @@ -5745,7 +5745,7 @@ CompilerType TypeSystemClang::GetFieldAtIndex(lldb::opaque_compiler_type_t type, case clang::Type::ObjCObjectPointer: { const clang::ObjCObjectPointerType *objc_class_type = - qual_type->getAs<clang::ObjCObjectPointerType>(); + qual_type->castAs<clang::ObjCObjectPointerType>(); const clang::ObjCInterfaceType *objc_interface_type = objc_class_type->getInterfaceType(); if (objc_interface_type && @@ -5882,7 +5882,7 @@ CompilerType TypeSystemClang::GetDirectBaseClassAtIndex( const clang::CXXRecordDecl *base_class_decl = llvm::cast<clang::CXXRecordDecl>( base_class->getType() - ->getAs<clang::RecordType>() + ->castAs<clang::RecordType>() ->getDecl()); if (base_class->isVirtual()) *bit_offset_ptr = @@ -5977,7 +5977,7 @@ CompilerType TypeSystemClang::GetVirtualBaseClassAtIndex( const clang::CXXRecordDecl *base_class_decl = llvm::cast<clang::CXXRecordDecl>( base_class->getType() - ->getAs<clang::RecordType>() + ->castAs<clang::RecordType>() ->getDecl()); *bit_offset_ptr = record_layout.getVBaseClassOffset(base_class_decl) @@ -6732,7 +6732,7 @@ size_t TypeSystemClang::GetIndexOfChildMemberWithName( child_indexes.push_back(child_idx); parent_record_decl = llvm::cast<clang::RecordDecl>( elem.Base->getType() - ->getAs<clang::RecordType>() + ->castAs<clang::RecordType>() ->getDecl()); } } @@ -6925,7 +6925,7 @@ TypeSystemClang::GetIndexOfChildWithName(lldb::opaque_compiler_type_t type, clang::CXXRecordDecl *base_class_decl = llvm::cast<clang::CXXRecordDecl>( base_class->getType() - ->getAs<clang::RecordType>() + ->castAs<clang::RecordType>() ->getDecl()); if (omit_empty_base_classes && !TypeSystemClang::RecordHasFields(base_class_decl)) diff --git a/lldb/source/Target/Target.cpp b/lldb/source/Target/Target.cpp index 01e51c0577aa..6d33db6554d2 100644 --- a/lldb/source/Target/Target.cpp +++ b/lldb/source/Target/Target.cpp @@ -1829,13 +1829,14 @@ size_t Target::ReadMemory(const Address &addr, void *dst, size_t dst_len, } size_t Target::ReadCStringFromMemory(const Address &addr, std::string &out_str, - Status &error) { + Status &error, bool force_live_memory) { char buf[256]; out_str.clear(); addr_t curr_addr = addr.GetLoadAddress(this); Address address(addr); while (true) { - size_t length = ReadCStringFromMemory(address, buf, sizeof(buf), error); + size_t length = ReadCStringFromMemory(address, buf, sizeof(buf), error, + force_live_memory); if (length == 0) break; out_str.append(buf, length); @@ -1851,7 +1852,8 @@ size_t Target::ReadCStringFromMemory(const Address &addr, std::string &out_str, } size_t Target::ReadCStringFromMemory(const Address &addr, char *dst, - size_t dst_max_len, Status &result_error) { + size_t dst_max_len, Status &result_error, + bool force_live_memory) { size_t total_cstr_len = 0; if (dst && dst_max_len) { result_error.Clear(); @@ -1874,8 +1876,8 @@ size_t Target::ReadCStringFromMemory(const Address &addr, char *dst, cache_line_size - (curr_addr % cache_line_size); addr_t bytes_to_read = std::min<addr_t>(bytes_left, cache_line_bytes_left); - size_t bytes_read = - ReadMemory(address, curr_dst, bytes_to_read, error, true); + size_t bytes_read = ReadMemory(address, curr_dst, bytes_to_read, error, + force_live_memory); if (bytes_read == 0) { result_error = error; diff --git a/lldb/source/Target/Thread.cpp b/lldb/source/Target/Thread.cpp index c5f16b4e6c1d..bde6dad554e7 100644 --- a/lldb/source/Target/Thread.cpp +++ b/lldb/source/Target/Thread.cpp @@ -13,6 +13,7 @@ #include "lldb/Core/Module.h" #include "lldb/Core/StructuredDataImpl.h" #include "lldb/Core/ValueObject.h" +#include "lldb/Core/ValueObjectConstResult.h" #include "lldb/Host/Host.h" #include "lldb/Interpreter/OptionValueFileSpecList.h" #include "lldb/Interpreter/OptionValueProperties.h" @@ -2005,3 +2006,26 @@ ThreadSP Thread::GetCurrentExceptionBacktrace() { return ThreadSP(); } + +lldb::ValueObjectSP Thread::GetSiginfoValue() { + ProcessSP process_sp = GetProcess(); + assert(process_sp); + Target &target = process_sp->GetTarget(); + PlatformSP platform_sp = target.GetPlatform(); + assert(platform_sp); + ArchSpec arch = target.GetArchitecture(); + + CompilerType type = platform_sp->GetSiginfoType(arch.GetTriple()); + if (!type.IsValid()) + return ValueObjectConstResult::Create(&target, Status("no siginfo_t for the platform")); + + llvm::Optional<uint64_t> type_size = type.GetByteSize(nullptr); + assert(type_size); + llvm::Expected<std::unique_ptr<llvm::MemoryBuffer>> data = GetSiginfo(type_size.getValue()); + if (!data) + return ValueObjectConstResult::Create(&target, Status(data.takeError())); + + DataExtractor data_extractor{data.get()->getBufferStart(), data.get()->getBufferSize(), + process_sp->GetByteOrder(), arch.GetAddressByteSize()}; + return ValueObjectConstResult::Create(&target, type, ConstString("__lldb_siginfo"), data_extractor); +} diff --git a/llvm/include/llvm-c/Core.h b/llvm/include/llvm-c/Core.h index ca3ca24487a5..09d80841fa5d 100644 --- a/llvm/include/llvm-c/Core.h +++ b/llvm/include/llvm-c/Core.h @@ -18,6 +18,7 @@ #include "llvm-c/Deprecated.h" #include "llvm-c/ErrorHandling.h" #include "llvm-c/ExternC.h" + #include "llvm-c/Types.h" LLVM_C_EXTERN_C_BEGIN diff --git a/llvm/include/llvm-c/DebugInfo.h b/llvm/include/llvm-c/DebugInfo.h index a515533f38e2..8554a0199873 100644 --- a/llvm/include/llvm-c/DebugInfo.h +++ b/llvm/include/llvm-c/DebugInfo.h @@ -16,8 +16,8 @@ #ifndef LLVM_C_DEBUGINFO_H #define LLVM_C_DEBUGINFO_H -#include "llvm-c/Core.h" #include "llvm-c/ExternC.h" +#include "llvm-c/Types.h" LLVM_C_EXTERN_C_BEGIN diff --git a/llvm/include/llvm/ADT/APFixedPoint.h b/llvm/include/llvm/ADT/APFixedPoint.h index d6349e6b2a88..92cabdd9f9e4 100644 --- a/llvm/include/llvm/ADT/APFixedPoint.h +++ b/llvm/include/llvm/ADT/APFixedPoint.h @@ -5,12 +5,12 @@ // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// -// +/// /// \file /// Defines the fixed point number interface. /// This is a class for abstracting various operations performed on fixed point /// types. -// +/// //===----------------------------------------------------------------------===// #ifndef LLVM_ADT_APFIXEDPOINT_H diff --git a/llvm/include/llvm/ADT/APFloat.h b/llvm/include/llvm/ADT/APFloat.h index 40e0e32c77a8..17b57de7b0aa 100644 --- a/llvm/include/llvm/ADT/APFloat.h +++ b/llvm/include/llvm/ADT/APFloat.h @@ -7,7 +7,6 @@ //===----------------------------------------------------------------------===// /// /// \file -/// \brief /// This file declares a class to represent arbitrary precision floating point /// values and provide a variety of arithmetic operations on them. /// diff --git a/llvm/include/llvm/ADT/APSInt.h b/llvm/include/llvm/ADT/APSInt.h index c1cf3c546070..7b6af436f577 100644 --- a/llvm/include/llvm/ADT/APSInt.h +++ b/llvm/include/llvm/ADT/APSInt.h @@ -5,10 +5,11 @@ // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// -// -// This file implements the APSInt class, which is a simple class that -// represents an arbitrary sized integer that knows its signedness. -// +/// +/// \file +/// This file implements the APSInt class, which is a simple class that +/// represents an arbitrary sized integer that knows its signedness. +/// //===----------------------------------------------------------------------===// #ifndef LLVM_ADT_APSINT_H diff --git a/llvm/include/llvm/ADT/Any.h b/llvm/include/llvm/ADT/Any.h index 1b4f2c2fa985..1c7ba0371781 100644 --- a/llvm/include/llvm/ADT/Any.h +++ b/llvm/include/llvm/ADT/Any.h @@ -5,11 +5,12 @@ // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// -// -// This file provides Any, a non-template class modeled in the spirit of -// std::any. The idea is to provide a type-safe replacement for C's void*. -// It can hold a value of any copy-constructible copy-assignable type -// +/// +/// \file +/// This file provides Any, a non-template class modeled in the spirit of +/// std::any. The idea is to provide a type-safe replacement for C's void*. +/// It can hold a value of any copy-constructible copy-assignable type +/// //===----------------------------------------------------------------------===// #ifndef LLVM_ADT_ANY_H diff --git a/llvm/include/llvm/ADT/BitVector.h b/llvm/include/llvm/ADT/BitVector.h index fff4a8f578d2..9540b3985963 100644 --- a/llvm/include/llvm/ADT/BitVector.h +++ b/llvm/include/llvm/ADT/BitVector.h @@ -5,9 +5,10 @@ // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// -// -// This file implements the BitVector class. -// +/// +/// \file +/// This file implements the BitVector class. +/// //===----------------------------------------------------------------------===// #ifndef LLVM_ADT_BITVECTOR_H diff --git a/llvm/include/llvm/ADT/BreadthFirstIterator.h b/llvm/include/llvm/ADT/BreadthFirstIterator.h index 7d728a23b19a..1312b5f91e83 100644 --- a/llvm/include/llvm/ADT/BreadthFirstIterator.h +++ b/llvm/include/llvm/ADT/BreadthFirstIterator.h @@ -5,13 +5,14 @@ // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// -// -// This file builds on the ADT/GraphTraits.h file to build a generic breadth -// first graph iterator. This file exposes the following functions/types: -// -// bf_begin/bf_end/bf_iterator -// * Normal breadth-first iteration - visit a graph level-by-level. -// +/// +/// \file +/// This file builds on the ADT/GraphTraits.h file to build a generic breadth +/// first graph iterator. This file exposes the following functions/types: +/// +/// bf_begin/bf_end/bf_iterator +/// * Normal breadth-first iteration - visit a graph level-by-level. +/// //===----------------------------------------------------------------------===// #ifndef LLVM_ADT_BREADTHFIRSTITERATOR_H diff --git a/llvm/include/llvm/ADT/CachedHashString.h b/llvm/include/llvm/ADT/CachedHashString.h index 785bd07b3a44..ebd40e320715 100644 --- a/llvm/include/llvm/ADT/CachedHashString.h +++ b/llvm/include/llvm/ADT/CachedHashString.h @@ -5,15 +5,16 @@ // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// -// -// This file defines CachedHashString and CachedHashStringRef. These are owning -// and not-owning string types that store their hash in addition to their string -// data. -// -// Unlike std::string, CachedHashString can be used in DenseSet/DenseMap -// (because, unlike std::string, CachedHashString lets us have empty and -// tombstone values). -// +/// +/// \file +/// This file defines CachedHashString and CachedHashStringRef. These are +/// owning and not-owning string types that store their hash in addition to +/// their string data. +/// +/// Unlike std::string, CachedHashString can be used in DenseSet/DenseMap +/// (because, unlike std::string, CachedHashString lets us have empty and +/// tombstone values). +/// //===----------------------------------------------------------------------===// #ifndef LLVM_ADT_CACHEDHASHSTRING_H diff --git a/llvm/include/llvm/ADT/CoalescingBitVector.h b/llvm/include/llvm/ADT/CoalescingBitVector.h index 6935c255a099..4940bc1c2c18 100644 --- a/llvm/include/llvm/ADT/CoalescingBitVector.h +++ b/llvm/include/llvm/ADT/CoalescingBitVector.h @@ -6,7 +6,8 @@ // //===----------------------------------------------------------------------===// /// -/// \file A bitvector that uses an IntervalMap to coalesce adjacent elements +/// \file +/// A bitvector that uses an IntervalMap to coalesce adjacent elements /// into intervals. /// //===----------------------------------------------------------------------===// diff --git a/llvm/include/llvm/ADT/DenseMap.h b/llvm/include/llvm/ADT/DenseMap.h index 595eabd0ffb4..7673b66ca42a 100644 --- a/llvm/include/llvm/ADT/DenseMap.h +++ b/llvm/include/llvm/ADT/DenseMap.h @@ -5,9 +5,10 @@ // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// -// -// This file defines the DenseMap class. -// +/// +/// \file +/// This file defines the DenseMap class. +/// //===----------------------------------------------------------------------===// #ifndef LLVM_ADT_DENSEMAP_H diff --git a/llvm/include/llvm/ADT/DenseMapInfo.h b/llvm/include/llvm/ADT/DenseMapInfo.h index 75b7371a3683..afd478f0b849 100644 --- a/llvm/include/llvm/ADT/DenseMapInfo.h +++ b/llvm/include/llvm/ADT/DenseMapInfo.h @@ -5,9 +5,10 @@ // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// -// -// This file defines DenseMapInfo traits for DenseMap. -// +/// +/// \file +/// This file defines DenseMapInfo traits for DenseMap. +/// //===----------------------------------------------------------------------===// #ifndef LLVM_ADT_DENSEMAPINFO_H diff --git a/llvm/include/llvm/ADT/DenseSet.h b/llvm/include/llvm/ADT/DenseSet.h index e767211a0900..b89c88626e43 100644 --- a/llvm/include/llvm/ADT/DenseSet.h +++ b/llvm/include/llvm/ADT/DenseSet.h @@ -5,9 +5,10 @@ // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// -// -// This file defines the DenseSet and SmallDenseSet classes. -// +/// +/// \file +/// This file defines the DenseSet and SmallDenseSet classes. +/// //===----------------------------------------------------------------------===// #ifndef LLVM_ADT_DENSESET_H diff --git a/llvm/include/llvm/ADT/DepthFirstIterator.h b/llvm/include/llvm/ADT/DepthFirstIterator.h index 42ac61d7cf52..cea6fbcd9d29 100644 --- a/llvm/include/llvm/ADT/DepthFirstIterator.h +++ b/llvm/include/llvm/ADT/DepthFirstIterator.h @@ -5,28 +5,30 @@ // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// -// -// This file builds on the ADT/GraphTraits.h file to build generic depth -// first graph iterator. This file exposes the following functions/types: -// -// df_begin/df_end/df_iterator -// * Normal depth-first iteration - visit a node and then all of its children. -// -// idf_begin/idf_end/idf_iterator -// * Depth-first iteration on the 'inverse' graph. -// -// df_ext_begin/df_ext_end/df_ext_iterator -// * Normal depth-first iteration - visit a node and then all of its children. -// This iterator stores the 'visited' set in an external set, which allows -// it to be more efficient, and allows external clients to use the set for -// other purposes. -// -// idf_ext_begin/idf_ext_end/idf_ext_iterator -// * Depth-first iteration on the 'inverse' graph. -// This iterator stores the 'visited' set in an external set, which allows -// it to be more efficient, and allows external clients to use the set for -// other purposes. -// +/// +/// \file +/// This file builds on the ADT/GraphTraits.h file to build generic depth +/// first graph iterator. This file exposes the following functions/types: +/// +/// df_begin/df_end/df_iterator +/// * Normal depth-first iteration - visit a node and then all of its +/// children. +/// +/// idf_begin/idf_end/idf_iterator +/// * Depth-first iteration on the 'inverse' graph. +/// +/// df_ext_begin/df_ext_end/df_ext_iterator +/// * Normal depth-first iteration - visit a node and then all of its +/// children. This iterator stores the 'visited' set in an external set, +/// which allows it to be more efficient, and allows external clients to +/// use the set for other purposes. +/// +/// idf_ext_begin/idf_ext_end/idf_ext_iterator +/// * Depth-first iteration on the 'inverse' graph. +/// This iterator stores the 'visited' set in an external set, which +/// allows it to be more efficient, and allows external clients to use +/// the set for other purposes. +/// //===----------------------------------------------------------------------===// #ifndef LLVM_ADT_DEPTHFIRSTITERATOR_H diff --git a/llvm/include/llvm/ADT/DirectedGraph.h b/llvm/include/llvm/ADT/DirectedGraph.h index e8bb9e6b2292..83c0bea6393c 100644 --- a/llvm/include/llvm/ADT/DirectedGraph.h +++ b/llvm/include/llvm/ADT/DirectedGraph.h @@ -5,10 +5,11 @@ // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// -// -// This file defines the interface and a base class implementation for a -// directed graph. -// +/// +/// \file +/// This file defines the interface and a base class implementation for a +/// directed graph. +/// //===----------------------------------------------------------------------===// #ifndef LLVM_ADT_DIRECTEDGRAPH_H diff --git a/llvm/include/llvm/ADT/EnumeratedArray.h b/llvm/include/llvm/ADT/EnumeratedArray.h index a66ec9d08c37..f54a50446c6e 100644 --- a/llvm/include/llvm/ADT/EnumeratedArray.h +++ b/llvm/include/llvm/ADT/EnumeratedArray.h @@ -5,9 +5,11 @@ // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// -// -// This file defines an array type that can be indexed using scoped enum values. -// +/// +/// \file +/// This file defines an array type that can be indexed using scoped enum +/// values. +/// //===----------------------------------------------------------------------===// #ifndef LLVM_ADT_ENUMERATEDARRAY_H diff --git a/llvm/include/llvm/ADT/EpochTracker.h b/llvm/include/llvm/ADT/EpochTracker.h index 7a2e4220afec..b06888494466 100644 --- a/llvm/include/llvm/ADT/EpochTracker.h +++ b/llvm/include/llvm/ADT/EpochTracker.h @@ -5,11 +5,12 @@ // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// -// -// This file defines the DebugEpochBase and DebugEpochBase::HandleBase classes. -// These can be used to write iterators that are fail-fast when LLVM is built -// with asserts enabled. -// +/// +/// \file +/// This file defines the DebugEpochBase and DebugEpochBase::HandleBase classes. +/// These can be used to write iterators that are fail-fast when LLVM is built +/// with asserts enabled. +/// //===----------------------------------------------------------------------===// #ifndef LLVM_ADT_EPOCHTRACKER_H diff --git a/llvm/include/llvm/ADT/EquivalenceClasses.h b/llvm/include/llvm/ADT/EquivalenceClasses.h index de6bb3bca7e3..f12b683ead2d 100644 --- a/llvm/include/llvm/ADT/EquivalenceClasses.h +++ b/llvm/include/llvm/ADT/EquivalenceClasses.h @@ -5,10 +5,11 @@ // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// -// -// Generic implementation of equivalence classes through the use Tarjan's -// efficient union-find algorithm. -// +/// +/// \file +/// Generic implementation of equivalence classes through the use Tarjan's +/// efficient union-find algorithm. +/// //===----------------------------------------------------------------------===// #ifndef LLVM_ADT_EQUIVALENCECLASSES_H diff --git a/llvm/include/llvm/ADT/FloatingPointMode.h b/llvm/include/llvm/ADT/FloatingPointMode.h index 62c127a49620..9cc69b8a8344 100644 --- a/llvm/include/llvm/ADT/FloatingPointMode.h +++ b/llvm/include/llvm/ADT/FloatingPointMode.h @@ -5,9 +5,10 @@ // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// -// -// Utilities for dealing with flags related to floating point mode controls. -// +/// +/// \file +/// Utilities for dealing with flags related to floating point mode controls. +/// //===----------------------------------------------------------------------===/ #ifndef LLVM_ADT_FLOATINGPOINTMODE_H diff --git a/llvm/include/llvm/ADT/FoldingSet.h b/llvm/include/llvm/ADT/FoldingSet.h index fb1cb03a4b5c..a8707f0ee81e 100644 --- a/llvm/include/llvm/ADT/FoldingSet.h +++ b/llvm/include/llvm/ADT/FoldingSet.h @@ -5,11 +5,12 @@ // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// -// -// This file defines a hash set that can be used to remove duplication of nodes -// in a graph. This code was originally created by Chris Lattner for use with -// SelectionDAGCSEMap, but was isolated to provide use across the llvm code set. -// +/// +/// \file +/// This file defines a hash set that can be used to remove duplication of nodes +/// in a graph. This code was originally created by Chris Lattner for use with +/// SelectionDAGCSEMap, but was isolated to provide use across the llvm code +/// set. //===----------------------------------------------------------------------===// #ifndef LLVM_ADT_FOLDINGSET_H diff --git a/llvm/include/llvm/ADT/GenericCycleImpl.h b/llvm/include/llvm/ADT/GenericCycleImpl.h index 5f29236eac47..d443f9e21a47 100644 --- a/llvm/include/llvm/ADT/GenericCycleImpl.h +++ b/llvm/include/llvm/ADT/GenericCycleImpl.h @@ -5,18 +5,19 @@ // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// -// -// This template implementation resides in a separate file so that it -// does not get injected into every .cpp file that includes the -// generic header. -// -// DO NOT INCLUDE THIS FILE WHEN MERELY USING CYCLEINFO. -// -// This file should only be included by files that implement a -// specialization of the relevant templates. Currently these are: -// - CycleAnalysis.cpp -// - MachineCycleAnalysis.cpp -// +/// +/// \file +/// This template implementation resides in a separate file so that it +/// does not get injected into every .cpp file that includes the +/// generic header. +/// +/// DO NOT INCLUDE THIS FILE WHEN MERELY USING CYCLEINFO. +/// +/// This file should only be included by files that implement a +/// specialization of the relevant templates. Currently these are: +/// - CycleAnalysis.cpp +/// - MachineCycleAnalysis.cpp +/// //===----------------------------------------------------------------------===// #ifndef LLVM_ADT_GENERICCYCLEIMPL_H @@ -77,7 +78,7 @@ template <typename ContextT> class GenericCycleInfoCompute { unsigned Start = 0; // DFS start; positive if block is found unsigned End = 0; // DFS end - DFSInfo() {} + DFSInfo() = default; explicit DFSInfo(unsigned Start) : Start(Start) {} /// Whether this node is an ancestor (or equal to) the node \p Other diff --git a/llvm/include/llvm/ADT/GenericCycleInfo.h b/llvm/include/llvm/ADT/GenericCycleInfo.h index 7768253e121d..d5f9cd9142ac 100644 --- a/llvm/include/llvm/ADT/GenericCycleInfo.h +++ b/llvm/include/llvm/ADT/GenericCycleInfo.h @@ -5,7 +5,7 @@ // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// -// +/// /// \file /// \brief Find all cycles in a control-flow graph, including irreducible loops. /// @@ -22,7 +22,7 @@ /// unique cycle C which is a superset of L. /// - In the absence of irreducible control flow, the cycles are /// exactly the natural loops in the program. -// +/// //===----------------------------------------------------------------------===// #ifndef LLVM_ADT_GENERICCYCLEINFO_H diff --git a/llvm/include/llvm/ADT/GraphTraits.h b/llvm/include/llvm/ADT/GraphTraits.h index 3ce91225d80d..3a7773592af3 100644 --- a/llvm/include/llvm/ADT/GraphTraits.h +++ b/llvm/include/llvm/ADT/GraphTraits.h @@ -5,13 +5,15 @@ // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// -// -// This file defines the little GraphTraits<X> template class that should be -// specialized by classes that want to be iteratable by generic graph iterators. -// -// This file also defines the marker class Inverse that is used to iterate over -// graphs in a graph defined, inverse ordering... -// +/// +/// \file +/// This file defines the little GraphTraits<X> template class that should be +/// specialized by classes that want to be iteratable by generic graph +/// iterators. +/// +/// This file also defines the marker class Inverse that is used to iterate over +/// graphs in a graph defined, inverse ordering... +/// //===----------------------------------------------------------------------===// #ifndef LLVM_ADT_GRAPHTRAITS_H diff --git a/llvm/include/llvm/ADT/ImmutableList.h b/llvm/include/llvm/ADT/ImmutableList.h index cf27c5a16d28..23f82691825c 100644 --- a/llvm/include/llvm/ADT/ImmutableList.h +++ b/llvm/include/llvm/ADT/ImmutableList.h @@ -5,9 +5,10 @@ // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// -// -// This file defines the ImmutableList class. -// +/// +/// \file +/// This file defines the ImmutableList class. +/// //===----------------------------------------------------------------------===// #ifndef LLVM_ADT_IMMUTABLELIST_H diff --git a/llvm/include/llvm/ADT/ImmutableMap.h b/llvm/include/llvm/ADT/ImmutableMap.h index f0e898cafaf9..c9351b3213dc 100644 --- a/llvm/include/llvm/ADT/ImmutableMap.h +++ b/llvm/include/llvm/ADT/ImmutableMap.h @@ -5,9 +5,10 @@ // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// -// -// This file defines the ImmutableMap class. -// +/// +/// \file +/// This file defines the ImmutableMap class. +/// //===----------------------------------------------------------------------===// #ifndef LLVM_ADT_IMMUTABLEMAP_H diff --git a/llvm/include/llvm/ADT/ImmutableSet.h b/llvm/include/llvm/ADT/ImmutableSet.h index 8cef5acbafaa..b513fe9ec011 100644 --- a/llvm/include/llvm/ADT/ImmutableSet.h +++ b/llvm/include/llvm/ADT/ImmutableSet.h @@ -5,9 +5,10 @@ // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// -// -// This file defines the ImutAVLTree and ImmutableSet classes. -// +/// +/// \file +/// This file defines the ImutAVLTree and ImmutableSet classes. +/// //===----------------------------------------------------------------------===// #ifndef LLVM_ADT_IMMUTABLESET_H diff --git a/llvm/include/llvm/ADT/IndexedMap.h b/llvm/include/llvm/ADT/IndexedMap.h index b44f16b91d76..5ac5f798269b 100644 --- a/llvm/include/llvm/ADT/IndexedMap.h +++ b/llvm/include/llvm/ADT/IndexedMap.h @@ -5,15 +5,16 @@ // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// -// -// This file implements an indexed map. The index map template takes two -// types. The first is the mapped type and the second is a functor -// that maps its argument to a size_t. On instantiation a "null" value -// can be provided to be used as a "does not exist" indicator in the -// map. A member function grow() is provided that given the value of -// the maximally indexed key (the argument of the functor) makes sure -// the map has enough space for it. -// +/// +/// \file +/// This file implements an indexed map. The index map template takes two +/// types. The first is the mapped type and the second is a functor +/// that maps its argument to a size_t. On instantiation a "null" value +/// can be provided to be used as a "does not exist" indicator in the +/// map. A member function grow() is provided that given the value of +/// the maximally indexed key (the argument of the functor) makes sure +/// the map has enough space for it. +/// //===----------------------------------------------------------------------===// #ifndef LLVM_ADT_INDEXEDMAP_H diff --git a/llvm/include/llvm/ADT/IntEqClasses.h b/llvm/include/llvm/ADT/IntEqClasses.h index 08f46a3079ef..84bb58cb736c 100644 --- a/llvm/include/llvm/ADT/IntEqClasses.h +++ b/llvm/include/llvm/ADT/IntEqClasses.h @@ -5,16 +5,17 @@ // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// -// -// Equivalence classes for small integers. This is a mapping of the integers -// 0 .. N-1 into M equivalence classes numbered 0 .. M-1. -// -// Initially each integer has its own equivalence class. Classes are joined by -// passing a representative member of each class to join(). -// -// Once the classes are built, compress() will number them 0 .. M-1 and prevent -// further changes. -// +/// +/// \file +/// Equivalence classes for small integers. This is a mapping of the integers +/// 0 .. N-1 into M equivalence classes numbered 0 .. M-1. +/// +/// Initially each integer has its own equivalence class. Classes are joined by +/// passing a representative member of each class to join(). +/// +/// Once the classes are built, compress() will number them 0 .. M-1 and prevent +/// further changes. +/// //===----------------------------------------------------------------------===// #ifndef LLVM_ADT_INTEQCLASSES_H diff --git a/llvm/include/llvm/ADT/IntervalMap.h b/llvm/include/llvm/ADT/IntervalMap.h index 3c107a3622a9..368ed46f98d2 100644 --- a/llvm/include/llvm/ADT/IntervalMap.h +++ b/llvm/include/llvm/ADT/IntervalMap.h @@ -5,30 +5,31 @@ // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// -// -// This file implements a coalescing interval map for small objects. -// -// KeyT objects are mapped to ValT objects. Intervals of keys that map to the -// same value are represented in a compressed form. -// -// Iterators provide ordered access to the compressed intervals rather than the -// individual keys, and insert and erase operations use key intervals as well. -// -// Like SmallVector, IntervalMap will store the first N intervals in the map -// object itself without any allocations. When space is exhausted it switches to -// a B+-tree representation with very small overhead for small key and value -// objects. -// -// A Traits class specifies how keys are compared. It also allows IntervalMap to -// work with both closed and half-open intervals. -// -// Keys and values are not stored next to each other in a std::pair, so we don't -// provide such a value_type. Dereferencing iterators only returns the mapped -// value. The interval bounds are accessible through the start() and stop() -// iterator methods. -// -// IntervalMap is optimized for small key and value objects, 4 or 8 bytes each -// is the optimal size. For large objects use std::map instead. +/// +/// \file +/// This file implements a coalescing interval map for small objects. +/// +/// KeyT objects are mapped to ValT objects. Intervals of keys that map to the +/// same value are represented in a compressed form. +/// +/// Iterators provide ordered access to the compressed intervals rather than the +/// individual keys, and insert and erase operations use key intervals as well. +/// +/// Like SmallVector, IntervalMap will store the first N intervals in the map +/// object itself without any allocations. When space is exhausted it switches +/// to a B+-tree representation with very small overhead for small key and +/// value objects. +/// +/// A Traits class specifies how keys are compared. It also allows IntervalMap +/// to work with both closed and half-open intervals. +/// +/// Keys and values are not stored next to each other in a std::pair, so we +/// don't provide such a value_type. Dereferencing iterators only returns the +/// mapped value. The interval bounds are accessible through the start() and +/// stop() iterator methods. +/// +/// IntervalMap is optimized for small key and value objects, 4 or 8 bytes +/// each is the optimal size. For large objects use std::map instead. // //===----------------------------------------------------------------------===// // diff --git a/llvm/include/llvm/ADT/IntrusiveRefCntPtr.h b/llvm/include/llvm/ADT/IntrusiveRefCntPtr.h index 9715c9d01b98..975535bb5676 100644 --- a/llvm/include/llvm/ADT/IntrusiveRefCntPtr.h +++ b/llvm/include/llvm/ADT/IntrusiveRefCntPtr.h @@ -5,51 +5,56 @@ // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// -// -// This file defines the RefCountedBase, ThreadSafeRefCountedBase, and -// IntrusiveRefCntPtr classes. -// -// IntrusiveRefCntPtr is a smart pointer to an object which maintains a -// reference count. (ThreadSafe)RefCountedBase is a mixin class that adds a -// refcount member variable and methods for updating the refcount. An object -// that inherits from (ThreadSafe)RefCountedBase deletes itself when its -// refcount hits zero. -// -// For example: -// -// class MyClass : public RefCountedBase<MyClass> {}; -// -// void foo() { -// // Constructing an IntrusiveRefCntPtr increases the pointee's refcount by -// // 1 (from 0 in this case). -// IntrusiveRefCntPtr<MyClass> Ptr1(new MyClass()); -// -// // Copying an IntrusiveRefCntPtr increases the pointee's refcount by 1. -// IntrusiveRefCntPtr<MyClass> Ptr2(Ptr1); -// -// // Constructing an IntrusiveRefCntPtr has no effect on the object's -// // refcount. After a move, the moved-from pointer is null. -// IntrusiveRefCntPtr<MyClass> Ptr3(std::move(Ptr1)); -// assert(Ptr1 == nullptr); -// -// // Clearing an IntrusiveRefCntPtr decreases the pointee's refcount by 1. -// Ptr2.reset(); -// -// // The object deletes itself when we return from the function, because -// // Ptr3's destructor decrements its refcount to 0. -// } -// -// You can use IntrusiveRefCntPtr with isa<T>(), dyn_cast<T>(), etc.: -// -// IntrusiveRefCntPtr<MyClass> Ptr(new MyClass()); -// OtherClass *Other = dyn_cast<OtherClass>(Ptr); // Ptr.get() not required -// -// IntrusiveRefCntPtr works with any class that -// -// - inherits from (ThreadSafe)RefCountedBase, -// - has Retain() and Release() methods, or -// - specializes IntrusiveRefCntPtrInfo. -// +/// +/// \file +/// This file defines the RefCountedBase, ThreadSafeRefCountedBase, and +/// IntrusiveRefCntPtr classes. +/// +/// IntrusiveRefCntPtr is a smart pointer to an object which maintains a +/// reference count. (ThreadSafe)RefCountedBase is a mixin class that adds a +/// refcount member variable and methods for updating the refcount. An object +/// that inherits from (ThreadSafe)RefCountedBase deletes itself when its +/// refcount hits zero. +/// +/// For example: +/// +/// ``` +/// class MyClass : public RefCountedBase<MyClass> {}; +/// +/// void foo() { +/// // Constructing an IntrusiveRefCntPtr increases the pointee's refcount +/// // by 1 (from 0 in this case). +/// IntrusiveRefCntPtr<MyClass> Ptr1(new MyClass()); +/// +/// // Copying an IntrusiveRefCntPtr increases the pointee's refcount by 1. +/// IntrusiveRefCntPtr<MyClass> Ptr2(Ptr1); +/// +/// // Constructing an IntrusiveRefCntPtr has no effect on the object's +/// // refcount. After a move, the moved-from pointer is null. +/// IntrusiveRefCntPtr<MyClass> Ptr3(std::move(Ptr1)); +/// assert(Ptr1 == nullptr); +/// +/// // Clearing an IntrusiveRefCntPtr decreases the pointee's refcount by 1. +/// Ptr2.reset(); +/// +/// // The object deletes itself when we return from the function, because +/// // Ptr3's destructor decrements its refcount to 0. +/// } +/// ``` +/// +/// You can use IntrusiveRefCntPtr with isa<T>(), dyn_cast<T>(), etc.: +/// +/// ``` +/// IntrusiveRefCntPtr<MyClass> Ptr(new MyClass()); +/// OtherClass *Other = dyn_cast<OtherClass>(Ptr); // Ptr.get() not required +/// ``` +/// +/// IntrusiveRefCntPtr works with any class that +/// +/// - inherits from (ThreadSafe)RefCountedBase, +/// - has Retain() and Release() methods, or +/// - specializes IntrusiveRefCntPtrInfo. +/// //===----------------------------------------------------------------------===// #ifndef LLVM_ADT_INTRUSIVEREFCNTPTR_H diff --git a/llvm/include/llvm/ADT/MapVector.h b/llvm/include/llvm/ADT/MapVector.h index d281166b3e19..c4e5c7e2bac5 100644 --- a/llvm/include/llvm/ADT/MapVector.h +++ b/llvm/include/llvm/ADT/MapVector.h @@ -5,12 +5,13 @@ // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// -// -// This file implements a map that provides insertion order iteration. The -// interface is purposefully minimal. The key is assumed to be cheap to copy -// and 2 copies are kept, one for indexing in a DenseMap, one for iteration in -// a std::vector. -// +/// +/// \file +/// This file implements a map that provides insertion order iteration. The +/// interface is purposefully minimal. The key is assumed to be cheap to copy +/// and 2 copies are kept, one for indexing in a DenseMap, one for iteration in +/// a std::vector. +/// //===----------------------------------------------------------------------===// #ifndef LLVM_ADT_MAPVECTOR_H diff --git a/llvm/include/llvm/ADT/None.h b/llvm/include/llvm/ADT/None.h index 004ca0ac50ac..1a66be4097df 100644 --- a/llvm/include/llvm/ADT/None.h +++ b/llvm/include/llvm/ADT/None.h @@ -5,11 +5,12 @@ // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// -// -// This file provides None, an enumerator for use in implicit constructors -// of various (usually templated) types to make such construction more -// terse. -// +/// +/// \file +/// This file provides None, an enumerator for use in implicit constructors +/// of various (usually templated) types to make such construction more +/// terse. +/// //===----------------------------------------------------------------------===// #ifndef LLVM_ADT_NONE_H diff --git a/llvm/include/llvm/ADT/Optional.h b/llvm/include/llvm/ADT/Optional.h index 7d6b3e92f6b2..e047b0fc6514 100644 --- a/llvm/include/llvm/ADT/Optional.h +++ b/llvm/include/llvm/ADT/Optional.h @@ -5,11 +5,12 @@ // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// -// -// This file provides Optional, a template class modeled in the spirit of -// OCaml's 'opt' variant. The idea is to strongly type whether or not -// a value can be optional. -// +/// +/// \file +/// This file provides Optional, a template class modeled in the spirit of +/// OCaml's 'opt' variant. The idea is to strongly type whether or not +/// a value can be optional. +/// //===----------------------------------------------------------------------===// #ifndef LLVM_ADT_OPTIONAL_H @@ -241,7 +242,7 @@ template <typename T> class Optional { public: using value_type = T; - constexpr Optional() {} + constexpr Optional() = default; constexpr Optional(NoneType) {} constexpr Optional(const T &y) : Storage(in_place, y) {} diff --git a/llvm/include/llvm/ADT/PackedVector.h b/llvm/include/llvm/ADT/PackedVector.h index ae7f8cc85743..b448685ab616 100644 --- a/llvm/include/llvm/ADT/PackedVector.h +++ b/llvm/include/llvm/ADT/PackedVector.h @@ -5,9 +5,10 @@ // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// -// -// This file implements the PackedVector class. -// +/// +/// \file +/// This file implements the PackedVector class. +/// //===----------------------------------------------------------------------===// #ifndef LLVM_ADT_PACKEDVECTOR_H diff --git a/llvm/include/llvm/ADT/PointerIntPair.h b/llvm/include/llvm/ADT/PointerIntPair.h index 393ace6b70fc..b7ddf8855605 100644 --- a/llvm/include/llvm/ADT/PointerIntPair.h +++ b/llvm/include/llvm/ADT/PointerIntPair.h @@ -5,9 +5,10 @@ // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// -// -// This file defines the PointerIntPair class. -// +/// +/// \file +/// This file defines the PointerIntPair class. +/// //===----------------------------------------------------------------------===// #ifndef LLVM_ADT_POINTERINTPAIR_H diff --git a/llvm/include/llvm/ADT/PointerUnion.h b/llvm/include/llvm/ADT/PointerUnion.h index 5ce2dbee4b3a..04d566bbc75e 100644 --- a/llvm/include/llvm/ADT/PointerUnion.h +++ b/llvm/include/llvm/ADT/PointerUnion.h @@ -5,10 +5,11 @@ // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// -// -// This file defines the PointerUnion class, which is a discriminated union of -// pointer types. -// +/// +/// \file +/// This file defines the PointerUnion class, which is a discriminated union of +/// pointer types. +/// //===----------------------------------------------------------------------===// #ifndef LLVM_ADT_POINTERUNION_H diff --git a/llvm/include/llvm/ADT/PostOrderIterator.h b/llvm/include/llvm/ADT/PostOrderIterator.h index 74314d39d825..d0366045fa09 100644 --- a/llvm/include/llvm/ADT/PostOrderIterator.h +++ b/llvm/include/llvm/ADT/PostOrderIterator.h @@ -5,11 +5,12 @@ // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// -// -// This file builds on the ADT/GraphTraits.h file to build a generic graph -// post order iterator. This should work over any graph type that has a -// GraphTraits specialization. -// +/// +/// \file +/// This file builds on the ADT/GraphTraits.h file to build a generic graph +/// post order iterator. This should work over any graph type that has a +/// GraphTraits specialization. +/// //===----------------------------------------------------------------------===// #ifndef LLVM_ADT_POSTORDERITERATOR_H diff --git a/llvm/include/llvm/ADT/PriorityQueue.h b/llvm/include/llvm/ADT/PriorityQueue.h index cf79ee10ba7f..f40c160f0f5e 100644 --- a/llvm/include/llvm/ADT/PriorityQueue.h +++ b/llvm/include/llvm/ADT/PriorityQueue.h @@ -5,9 +5,10 @@ // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// -// -// This file defines the PriorityQueue class. -// +/// +/// \file +/// This file defines the PriorityQueue class. +/// //===----------------------------------------------------------------------===// #ifndef LLVM_ADT_PRIORITYQUEUE_H diff --git a/llvm/include/llvm/ADT/STLArrayExtras.h b/llvm/include/llvm/ADT/STLArrayExtras.h new file mode 100644 index 000000000000..5b666641580e --- /dev/null +++ b/llvm/include/llvm/ADT/STLArrayExtras.h @@ -0,0 +1,35 @@ +//===- llvm/ADT/STLArrayExtras.h - additions to <array> ---------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains some templates that are useful if you are working with the +// STL at all. +// +// No library is required when using these functions. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ADT_STLARRAYEXTRAS_H +#define LLVM_ADT_STLARRAYEXTRAS_H + +#include <cstddef> + +namespace llvm { + +//===----------------------------------------------------------------------===// +// Extra additions for arrays +//===----------------------------------------------------------------------===// + +/// Find the length of an array. +template <class T, std::size_t N> +constexpr inline size_t array_lengthof(T (&)[N]) { + return N; +} + +} // end namespace llvm + +#endif // LLVM_ADT_STLARRAYEXTRAS_H diff --git a/llvm/include/llvm/ADT/STLExtras.h b/llvm/include/llvm/ADT/STLExtras.h index c3200c926518..e2972f4f902a 100644 --- a/llvm/include/llvm/ADT/STLExtras.h +++ b/llvm/include/llvm/ADT/STLExtras.h @@ -5,21 +5,23 @@ // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// -// -// This file contains some templates that are useful if you are working with the -// STL at all. -// -// No library is required when using these functions. -// +/// +/// \file +/// This file contains some templates that are useful if you are working with +/// the STL at all. +/// +/// No library is required when using these functions. +/// //===----------------------------------------------------------------------===// #ifndef LLVM_ADT_STLEXTRAS_H #define LLVM_ADT_STLEXTRAS_H -#include "llvm/ADT/identity.h" #include "llvm/ADT/Optional.h" +#include "llvm/ADT/STLArrayExtras.h" #include "llvm/ADT/STLForwardCompat.h" #include "llvm/ADT/STLFunctionalExtras.h" +#include "llvm/ADT/identity.h" #include "llvm/ADT/iterator.h" #include "llvm/ADT/iterator_range.h" #include "llvm/Config/abi-breaking.h" @@ -1410,7 +1412,7 @@ constexpr decltype(auto) makeVisitor(CallableTs &&...Callables) { } //===----------------------------------------------------------------------===// -// Extra additions for arrays +// Extra additions to <algorithm> //===----------------------------------------------------------------------===// // We have a copy here so that LLVM behaves the same when using different @@ -1430,12 +1432,6 @@ void shuffle(Iterator first, Iterator last, RNG &&g) { } } -/// Find the length of an array. -template <class T, std::size_t N> -constexpr inline size_t array_lengthof(T (&)[N]) { - return N; -} - /// Adapt std::less<T> for array_pod_sort. template<typename T> inline int array_pod_sort_comparator(const void *P1, const void *P2) { @@ -1563,10 +1559,6 @@ inline void sort(Container &&C, Compare Comp) { llvm::sort(adl_begin(C), adl_end(C), Comp); } -//===----------------------------------------------------------------------===// -// Extra additions to <algorithm> -//===----------------------------------------------------------------------===// - /// Get the size of a range. This is a wrapper function around std::distance /// which is only enabled when the operation is O(1). template <typename R> diff --git a/llvm/include/llvm/ADT/STLForwardCompat.h b/llvm/include/llvm/ADT/STLForwardCompat.h index 440b29df260c..0aa577d3ee1a 100644 --- a/llvm/include/llvm/ADT/STLForwardCompat.h +++ b/llvm/include/llvm/ADT/STLForwardCompat.h @@ -5,12 +5,13 @@ // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// -// -// This file contains library features backported from future STL versions. -// -// These should be replaced with their STL counterparts as the C++ version LLVM -// is compiled with is updated. -// +/// +/// \file +/// This file contains library features backported from future STL versions. +/// +/// These should be replaced with their STL counterparts as the C++ version LLVM +/// is compiled with is updated. +/// //===----------------------------------------------------------------------===// #ifndef LLVM_ADT_STLFORWARDCOMPAT_H diff --git a/llvm/include/llvm/ADT/ScopeExit.h b/llvm/include/llvm/ADT/ScopeExit.h index 61618818bae5..7f013f3f7979 100644 --- a/llvm/include/llvm/ADT/ScopeExit.h +++ b/llvm/include/llvm/ADT/ScopeExit.h @@ -5,10 +5,11 @@ // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// -// -// This file defines the make_scope_exit function, which executes user-defined -// cleanup logic at scope exit. -// +/// +/// \file +/// This file defines the make_scope_exit function, which executes user-defined +/// cleanup logic at scope exit. +/// //===----------------------------------------------------------------------===// #ifndef LLVM_ADT_SCOPEEXIT_H diff --git a/llvm/include/llvm/ADT/SetOperations.h b/llvm/include/llvm/ADT/SetOperations.h index 3e30b6bb83d3..c9462f077dc8 100644 --- a/llvm/include/llvm/ADT/SetOperations.h +++ b/llvm/include/llvm/ADT/SetOperations.h @@ -5,10 +5,11 @@ // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// -// -// This file defines generic set operations that may be used on set's of -// different types, and different element types. -// +/// +/// \file +/// This file defines generic set operations that may be used on set's of +/// different types, and different element types. +/// //===----------------------------------------------------------------------===// #ifndef LLVM_ADT_SETOPERATIONS_H diff --git a/llvm/include/llvm/ADT/SetVector.h b/llvm/include/llvm/ADT/SetVector.h index 82d5e98afb5d..08cf42f0b210 100644 --- a/llvm/include/llvm/ADT/SetVector.h +++ b/llvm/include/llvm/ADT/SetVector.h @@ -5,15 +5,16 @@ // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// -// -// This file implements a set that has insertion order iteration -// characteristics. This is useful for keeping a set of things that need to be -// visited later but in a deterministic order (insertion order). The interface -// is purposefully minimal. -// -// This file defines SetVector and SmallSetVector, which performs no allocations -// if the SetVector has less than a certain number of elements. -// +/// +/// \file +/// This file implements a set that has insertion order iteration +/// characteristics. This is useful for keeping a set of things that need to be +/// visited later but in a deterministic order (insertion order). The interface +/// is purposefully minimal. +/// +/// This file defines SetVector and SmallSetVector, which performs no +/// allocations if the SetVector has less than a certain number of elements. +/// //===----------------------------------------------------------------------===// #ifndef LLVM_ADT_SETVECTOR_H diff --git a/llvm/include/llvm/ADT/SmallBitVector.h b/llvm/include/llvm/ADT/SmallBitVector.h index 17be317a10d7..86e304cc6c02 100644 --- a/llvm/include/llvm/ADT/SmallBitVector.h +++ b/llvm/include/llvm/ADT/SmallBitVector.h @@ -5,9 +5,10 @@ // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// -// -// This file implements the SmallBitVector class. -// +/// +/// \file +/// This file implements the SmallBitVector class. +/// //===----------------------------------------------------------------------===// #ifndef LLVM_ADT_SMALLBITVECTOR_H diff --git a/llvm/include/llvm/ADT/SmallPtrSet.h b/llvm/include/llvm/ADT/SmallPtrSet.h index 981b741669b0..ef6dae68b4a6 100644 --- a/llvm/include/llvm/ADT/SmallPtrSet.h +++ b/llvm/include/llvm/ADT/SmallPtrSet.h @@ -5,9 +5,10 @@ // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// -// -// This file defines the SmallPtrSet class. See the doxygen comment for -// SmallPtrSetImplBase for more details on the algorithm used. +/// +/// \file +/// This file defines the SmallPtrSet class. See the doxygen comment for +/// SmallPtrSetImplBase for more details on the algorithm used. // //===----------------------------------------------------------------------===// diff --git a/llvm/include/llvm/ADT/SmallSet.h b/llvm/include/llvm/ADT/SmallSet.h index fe4f74eac85d..0eed85449c9d 100644 --- a/llvm/include/llvm/ADT/SmallSet.h +++ b/llvm/include/llvm/ADT/SmallSet.h @@ -5,9 +5,10 @@ // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// -// -// This file defines the SmallSet class. -// +/// +/// \file +/// This file defines the SmallSet class. +/// //===----------------------------------------------------------------------===// #ifndef LLVM_ADT_SMALLSET_H diff --git a/llvm/include/llvm/ADT/SmallString.h b/llvm/include/llvm/ADT/SmallString.h index 81243af1f97d..874968f0a13f 100644 --- a/llvm/include/llvm/ADT/SmallString.h +++ b/llvm/include/llvm/ADT/SmallString.h @@ -5,9 +5,10 @@ // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// -// -// This file defines the SmallString class. -// +/// +/// \file +/// This file defines the SmallString class. +/// //===----------------------------------------------------------------------===// #ifndef LLVM_ADT_SMALLSTRING_H diff --git a/llvm/include/llvm/ADT/SmallVector.h b/llvm/include/llvm/ADT/SmallVector.h index 466acb83d466..a4a790323a6b 100644 --- a/llvm/include/llvm/ADT/SmallVector.h +++ b/llvm/include/llvm/ADT/SmallVector.h @@ -5,9 +5,10 @@ // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// -// -// This file defines the SmallVector class. -// +/// +/// /file +/// This file defines the SmallVector class. +/// //===----------------------------------------------------------------------===// #ifndef LLVM_ADT_SMALLVECTOR_H @@ -567,6 +568,16 @@ protected: explicit SmallVectorImpl(unsigned N) : SmallVectorTemplateBase<T>(N) {} + void assignRemote(SmallVectorImpl &&RHS) { + this->destroy_range(this->begin(), this->end()); + if (!this->isSmall()) + free(this->begin()); + this->BeginX = RHS.BeginX; + this->Size = RHS.Size; + this->Capacity = RHS.Capacity; + RHS.resetToSmall(); + } + public: SmallVectorImpl(const SmallVectorImpl &) = delete; @@ -1031,12 +1042,7 @@ SmallVectorImpl<T> &SmallVectorImpl<T>::operator=(SmallVectorImpl<T> &&RHS) { // If the RHS isn't small, clear this vector and then steal its buffer. if (!RHS.isSmall()) { - this->destroy_range(this->begin(), this->end()); - if (!this->isSmall()) free(this->begin()); - this->BeginX = RHS.BeginX; - this->Size = RHS.Size; - this->Capacity = RHS.Capacity; - RHS.resetToSmall(); + this->assignRemote(std::move(RHS)); return *this; } @@ -1227,7 +1233,20 @@ public: } SmallVector &operator=(SmallVector &&RHS) { - SmallVectorImpl<T>::operator=(::std::move(RHS)); + if (N) { + SmallVectorImpl<T>::operator=(::std::move(RHS)); + return *this; + } + // SmallVectorImpl<T>::operator= does not leverage N==0. Optimize the + // case. + if (this == &RHS) + return *this; + if (RHS.empty()) { + this->destroy_range(this->begin(), this->end()); + this->Size = 0; + } else { + this->assignRemote(std::move(RHS)); + } return *this; } diff --git a/llvm/include/llvm/ADT/SparseBitVector.h b/llvm/include/llvm/ADT/SparseBitVector.h index 12850e14f4ed..a591896521ce 100644 --- a/llvm/include/llvm/ADT/SparseBitVector.h +++ b/llvm/include/llvm/ADT/SparseBitVector.h @@ -5,10 +5,11 @@ // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// -// -// This file defines the SparseBitVector class. See the doxygen comment for -// SparseBitVector for more details on the algorithm used. -// +/// +/// \file +/// This file defines the SparseBitVector class. See the doxygen comment for +/// SparseBitVector for more details on the algorithm used. +/// //===----------------------------------------------------------------------===// #ifndef LLVM_ADT_SPARSEBITVECTOR_H diff --git a/llvm/include/llvm/ADT/SparseMultiSet.h b/llvm/include/llvm/ADT/SparseMultiSet.h index f63cef936433..ef2a5ea5ed71 100644 --- a/llvm/include/llvm/ADT/SparseMultiSet.h +++ b/llvm/include/llvm/ADT/SparseMultiSet.h @@ -5,16 +5,17 @@ // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// -// -// This file defines the SparseMultiSet class, which adds multiset behavior to -// the SparseSet. -// -// A sparse multiset holds a small number of objects identified by integer keys -// from a moderately sized universe. The sparse multiset uses more memory than -// other containers in order to provide faster operations. Any key can map to -// multiple values. A SparseMultiSetNode class is provided, which serves as a -// convenient base class for the contents of a SparseMultiSet. -// +/// +/// \file +/// This file defines the SparseMultiSet class, which adds multiset behavior to +/// the SparseSet. +/// +/// A sparse multiset holds a small number of objects identified by integer keys +/// from a moderately sized universe. The sparse multiset uses more memory than +/// other containers in order to provide faster operations. Any key can map to +/// multiple values. A SparseMultiSetNode class is provided, which serves as a +/// convenient base class for the contents of a SparseMultiSet. +/// //===----------------------------------------------------------------------===// #ifndef LLVM_ADT_SPARSEMULTISET_H diff --git a/llvm/include/llvm/ADT/SparseSet.h b/llvm/include/llvm/ADT/SparseSet.h index e66d76ad88e1..5c7087b1bffe 100644 --- a/llvm/include/llvm/ADT/SparseSet.h +++ b/llvm/include/llvm/ADT/SparseSet.h @@ -5,15 +5,16 @@ // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// -// -// This file defines the SparseSet class derived from the version described in -// Briggs, Torczon, "An efficient representation for sparse sets", ACM Letters -// on Programming Languages and Systems, Volume 2 Issue 1-4, March-Dec. 1993. -// -// A sparse set holds a small number of objects identified by integer keys from -// a moderately sized universe. The sparse set uses more memory than other -// containers in order to provide faster operations. -// +/// +/// \file +/// This file defines the SparseSet class derived from the version described in +/// Briggs, Torczon, "An efficient representation for sparse sets", ACM Letters +/// on Programming Languages and Systems, Volume 2 Issue 1-4, March-Dec. 1993. +/// +/// A sparse set holds a small number of objects identified by integer keys from +/// a moderately sized universe. The sparse set uses more memory than other +/// containers in order to provide faster operations. +/// //===----------------------------------------------------------------------===// #ifndef LLVM_ADT_SPARSESET_H diff --git a/llvm/include/llvm/ADT/Statistic.h b/llvm/include/llvm/ADT/Statistic.h index 528d2cdcf61b..c39e161bcbcd 100644 --- a/llvm/include/llvm/ADT/Statistic.h +++ b/llvm/include/llvm/ADT/Statistic.h @@ -5,21 +5,22 @@ // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// -// -// This file defines the 'Statistic' class, which is designed to be an easy way -// to expose various metrics from passes. These statistics are printed at the -// end of a run (from llvm_shutdown), when the -stats command line option is -// passed on the command line. -// -// This is useful for reporting information like the number of instructions -// simplified, optimized or removed by various transformations, like this: -// -// static Statistic NumInstsKilled("gcse", "Number of instructions killed"); -// -// Later, in the code: ++NumInstsKilled; -// -// NOTE: Statistics *must* be declared as global variables. -// +/// +/// \file +/// This file defines the 'Statistic' class, which is designed to be an easy way +/// to expose various metrics from passes. These statistics are printed at the +/// end of a run (from llvm_shutdown), when the -stats command line option is +/// passed on the command line. +/// +/// This is useful for reporting information like the number of instructions +/// simplified, optimized or removed by various transformations, like this: +/// +/// static Statistic NumInstsKilled("gcse", "Number of instructions killed"); +/// +/// Later, in the code: ++NumInstsKilled; +/// +/// NOTE: Statistics *must* be declared as global variables. +/// //===----------------------------------------------------------------------===// #ifndef LLVM_ADT_STATISTIC_H diff --git a/llvm/include/llvm/ADT/StringExtras.h b/llvm/include/llvm/ADT/StringExtras.h index 81a0954226d6..ee6c33924e96 100644 --- a/llvm/include/llvm/ADT/StringExtras.h +++ b/llvm/include/llvm/ADT/StringExtras.h @@ -5,9 +5,10 @@ // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// -// -// This file contains some functions that are useful when dealing with strings. -// +/// +/// \file +/// This file contains some functions that are useful when dealing with strings. +/// //===----------------------------------------------------------------------===// #ifndef LLVM_ADT_STRINGEXTRAS_H @@ -148,13 +149,14 @@ inline char toUpper(char x) { return x; } -inline std::string utohexstr(uint64_t X, bool LowerCase = false) { +inline std::string utohexstr(uint64_t X, bool LowerCase = false, + unsigned Width = 0) { char Buffer[17]; char *BufPtr = std::end(Buffer); if (X == 0) *--BufPtr = '0'; - while (X) { + for (unsigned i = 0; Width ? (i < Width) : X; ++i) { unsigned char Mod = static_cast<unsigned char>(X) & 15; *--BufPtr = hexdigit(Mod, LowerCase); X >>= 4; diff --git a/llvm/include/llvm/ADT/StringMap.h b/llvm/include/llvm/ADT/StringMap.h index 562a2ff1a192..23248093c67e 100644 --- a/llvm/include/llvm/ADT/StringMap.h +++ b/llvm/include/llvm/ADT/StringMap.h @@ -5,9 +5,10 @@ // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// -// -// This file defines the StringMap class. -// +/// +/// \file +/// This file defines the StringMap class. +/// //===----------------------------------------------------------------------===// #ifndef LLVM_ADT_STRINGMAP_H diff --git a/llvm/include/llvm/ADT/StringMapEntry.h b/llvm/include/llvm/ADT/StringMapEntry.h index 120d4f3ca4bc..6e13c8618c85 100644 --- a/llvm/include/llvm/ADT/StringMapEntry.h +++ b/llvm/include/llvm/ADT/StringMapEntry.h @@ -5,11 +5,12 @@ // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// -// -// This file defines the StringMapEntry class - it is intended to be a low -// dependency implementation detail of StringMap that is more suitable for -// inclusion in public headers than StringMap.h itself is. -// +/// +/// \file +/// This file defines the StringMapEntry class - it is intended to be a low +/// dependency implementation detail of StringMap that is more suitable for +/// inclusion in public headers than StringMap.h itself is. +/// //===----------------------------------------------------------------------===// #ifndef LLVM_ADT_STRINGMAPENTRY_H diff --git a/llvm/include/llvm/ADT/StringSet.h b/llvm/include/llvm/ADT/StringSet.h index c4245175544b..4a499463d983 100644 --- a/llvm/include/llvm/ADT/StringSet.h +++ b/llvm/include/llvm/ADT/StringSet.h @@ -5,9 +5,10 @@ // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// -// -// StringSet - A set-like wrapper for the StringMap. -// +/// +/// \file +/// StringSet - A set-like wrapper for the StringMap. +/// //===----------------------------------------------------------------------===// #ifndef LLVM_ADT_STRINGSET_H diff --git a/llvm/include/llvm/ADT/StringSwitch.h b/llvm/include/llvm/ADT/StringSwitch.h index 4b7882d7ca10..95ab1df8d297 100644 --- a/llvm/include/llvm/ADT/StringSwitch.h +++ b/llvm/include/llvm/ADT/StringSwitch.h @@ -4,10 +4,11 @@ // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception //===----------------------------------------------------------------------===/ -// -// This file implements the StringSwitch template, which mimics a switch() -// statement whose cases are string literals. -// +/// +/// \file +/// This file implements the StringSwitch template, which mimics a switch() +/// statement whose cases are string literals. +/// //===----------------------------------------------------------------------===/ #ifndef LLVM_ADT_STRINGSWITCH_H #define LLVM_ADT_STRINGSWITCH_H diff --git a/llvm/include/llvm/ADT/Triple.h b/llvm/include/llvm/ADT/Triple.h index 0f0a7b08b5d3..42277c013035 100644 --- a/llvm/include/llvm/ADT/Triple.h +++ b/llvm/include/llvm/ADT/Triple.h @@ -721,6 +721,41 @@ public: isOSBinFormatELF(); } + /// Tests whether the target is T32. + bool isArmT32() const { + switch (getSubArch()) { + case Triple::ARMSubArch_v8m_baseline: + case Triple::ARMSubArch_v7s: + case Triple::ARMSubArch_v7k: + case Triple::ARMSubArch_v7ve: + case Triple::ARMSubArch_v6: + case Triple::ARMSubArch_v6m: + case Triple::ARMSubArch_v6k: + case Triple::ARMSubArch_v6t2: + case Triple::ARMSubArch_v5: + case Triple::ARMSubArch_v5te: + case Triple::ARMSubArch_v4t: + return false; + default: + return true; + } + } + + /// Tests whether the target is an M-class. + bool isArmMClass() const { + switch (getSubArch()) { + case Triple::ARMSubArch_v6m: + case Triple::ARMSubArch_v7m: + case Triple::ARMSubArch_v7em: + case Triple::ARMSubArch_v8m_mainline: + case Triple::ARMSubArch_v8m_baseline: + case Triple::ARMSubArch_v8_1m_mainline: + return true; + default: + return false; + } + } + /// Tests whether the target is AArch64 (little and big endian). bool isAArch64() const { return getArch() == Triple::aarch64 || getArch() == Triple::aarch64_be || diff --git a/llvm/include/llvm/ADT/TypeSwitch.h b/llvm/include/llvm/ADT/TypeSwitch.h index 3b7598f3251d..892a7d43b317 100644 --- a/llvm/include/llvm/ADT/TypeSwitch.h +++ b/llvm/include/llvm/ADT/TypeSwitch.h @@ -5,10 +5,11 @@ // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// -// -// This file implements the TypeSwitch template, which mimics a switch() -// statement whose cases are type names. -// +/// +/// \file +/// This file implements the TypeSwitch template, which mimics a switch() +/// statement whose cases are type names. +/// //===-----------------------------------------------------------------------===/ #ifndef LLVM_ADT_TYPESWITCH_H diff --git a/llvm/include/llvm/ADT/Waymarking.h b/llvm/include/llvm/ADT/Waymarking.h deleted file mode 100644 index 2efbc6f05495..000000000000 --- a/llvm/include/llvm/ADT/Waymarking.h +++ /dev/null @@ -1,322 +0,0 @@ -//===- Waymarking.h - Array waymarking algorithm ----------------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// Utility to backtrace an array's head, from a pointer into it. For the -// backtrace to work, we use "Waymarks", which are special tags embedded into -// the array's elements. -// -// A Tag of n-bits (in size) is composed as follows: -// -// bits: | n-1 | n-2 ... 0 | -// .---------.------------------------------------. -// |Stop Mask|(2^(n-1))-ary numeric system - digit| -// '---------'------------------------------------' -// -// Backtracing is done as follows: -// Walk back (starting from a given pointer to an element into the array), until -// a tag with a "Stop Mask" is reached. Then start calculating the "Offset" from -// the array's head, by picking up digits along the way, until another stop is -// reached. The "Offset" is then subtracted from the current pointer, and the -// result is the array's head. -// A special case - if we first encounter a Tag with a Stop and a zero digit, -// then this is already the head. -// -// For example: -// In case of 2 bits: -// -// Tags: -// x0 - binary digit 0 -// x1 - binary digit 1 -// 1x - stop and calculate (s) -// -// Array: -// .---.---.---.---.---.---.---.---.---.---.---.---.---.---.---.---. -// head -> |s0 |s1 | 0 |s1 | 0 | 0 |s1 | 1 | 1 |s1 | 0 | 1 | 0 |s1 | 0 | 1 | -// '---'---'---'---'---'---'---'---'---'---'---'---'---'---'---'---' -// |-1 |-2 |-4 |-7 |-10 |-14 -// <_ | | | | | | -// <_____ | | | | | -// <_____________ | | | | -// <_________________________ | | | -// <_____________________________________ | | -// <_____________________________________________________ | -// -// -// In case of 3 bits: -// -// Tags: -// x00 - quaternary digit 0 -// x01 - quaternary digit 1 -// x10 - quaternary digit 2 -// x11 - quaternary digit 3 -// 1xy - stop and calculate (s) -// -// Array: -// .---.---.---.---.---.---.---.---.---.---.---.---.---.---.---.---. -// head -> |s0 |s1 |s2 |s3 | 0 |s1 | 2 |s1 | 0 |s2 | 2 |s2 | 0 |s3 | 2 |s3 | -// '---'---'---'---'---'---'---'---'---'---'---'---'---'---'---'---' -// |-1 |-2 |-3 |-4 |-6 |-8 |-10 |-12 |-14 |-16 -// <_ | | | | | | | | | | -// <_____ | | | | | | | | | -// <_________ | | | | | | | | -// <_____________ | | | | | | | -// <_____________________ | | | | | | -// <_____________________________ | | | | | -// <_____________________________________ | | | | -// <_____________________________________________ | | | -// <_____________________________________________________ | | -// <_____________________________________________________________ | -// -// -// The API introduce 2 functions: -// 1. fillWaymarks -// 2. followWaymarks -// -// Example: -// int N = 10; -// int M = 5; -// int **A = new int *[N + M]; // Define the array. -// for (int I = 0; I < N + M; ++I) -// A[I] = new int(I); -// -// fillWaymarks(A, A + N); // Set the waymarks for the first N elements -// // of the array. -// // Note that it must be done AFTER we fill -// // the array's elements. -// -// ... // Elements which are not in the range -// // [A, A+N) will not be marked, and we won't -// // be able to call followWaymarks on them. -// -// ... // Elements which will be changed after the -// // call to fillWaymarks, will have to be -// // retagged. -// -// fillWaymarks(A + N, A + N + M, N); // Set the waymarks of the remaining M -// // elements. -// ... -// int **It = A + N + 1; -// int **B = followWaymarks(It); // Find the head of the array containing It. -// assert(B == A); -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_ADT_WAYMARKING_H -#define LLVM_ADT_WAYMARKING_H - -#include "llvm/ADT/STLExtras.h" -#include "llvm/Support/PointerLikeTypeTraits.h" - -namespace llvm { - -namespace detail { - -template <unsigned NumBits> struct WaymarkingTraits { - enum : unsigned { - // The number of bits of a Waymarking Tag. - NUM_BITS = NumBits, - - // A Tag is composed from a Mark and a Stop mask. - MARK_SIZE = NUM_BITS - 1, - STOP_MASK = (1 << MARK_SIZE), - MARK_MASK = (STOP_MASK - 1), - TAG_MASK = (MARK_MASK | STOP_MASK), - - // The number of pre-computed tags (for fast fill). - NUM_STATIC_TAGS = 32 - }; - -private: - // Add a new tag, calculated from Count and Stop, to the Vals pack, while - // continuing recursively to decrease Len down to 0. - template <unsigned Len, bool Stop, unsigned Count, uint8_t... Vals> - struct AddTag; - - // Delegate to the specialized AddTag according to the need of a Stop mask. - template <unsigned Len, unsigned Count, uint8_t... Vals> struct GenTag { - typedef - typename AddTag<Len, (Count <= MARK_MASK), Count, Vals...>::Xdata Xdata; - }; - - // Start adding tags while calculating the next Count, which is actually the - // number of already calculated tags (equivalent to the position in the - // array). - template <unsigned Len, uint8_t... Vals> struct GenOffset { - typedef typename GenTag<Len, sizeof...(Vals), Vals...>::Xdata Xdata; - }; - - // Add the tag and remove it from Count. - template <unsigned Len, unsigned Count, uint8_t... Vals> - struct AddTag<Len, false, Count, Vals...> { - typedef typename GenTag<Len - 1, (Count >> MARK_SIZE), Vals..., - Count & MARK_MASK>::Xdata Xdata; - }; - - // We have reached the end of this Count, so start with a new Count. - template <unsigned Len, unsigned Count, uint8_t... Vals> - struct AddTag<Len, true, Count, Vals...> { - typedef typename GenOffset<Len - 1, Vals..., - (Count & MARK_MASK) | STOP_MASK>::Xdata Xdata; - }; - - template <unsigned Count, uint8_t... Vals> struct TagsData { - // The remaining number for calculating the next tag, following the last one - // in Values. - static const unsigned Remain = Count; - - // The array of ordered pre-computed Tags. - static const uint8_t Values[sizeof...(Vals)]; - }; - - // Specialize the case when Len equals 0, as the recursion stop condition. - template <unsigned Count, uint8_t... Vals> - struct AddTag<0, false, Count, Vals...> { - typedef TagsData<Count, Vals...> Xdata; - }; - - template <unsigned Count, uint8_t... Vals> - struct AddTag<0, true, Count, Vals...> { - typedef TagsData<Count, Vals...> Xdata; - }; - -public: - typedef typename GenOffset<NUM_STATIC_TAGS>::Xdata Tags; -}; - -template <unsigned NumBits> -template <unsigned Count, uint8_t... Vals> -const uint8_t WaymarkingTraits<NumBits>::TagsData< - Count, Vals...>::Values[sizeof...(Vals)] = {Vals...}; - -} // end namespace detail - -/// This class is responsible for tagging (and retrieving the tag of) a given -/// element of type T. -template <class T, class WTraits = detail::WaymarkingTraits< - PointerLikeTypeTraits<T>::NumLowBitsAvailable>> -struct Waymarker { - using Traits = WTraits; - static void setWaymark(T &N, unsigned Tag) { N.setWaymark(Tag); } - static unsigned getWaymark(const T &N) { return N.getWaymark(); } -}; - -template <class T, class WTraits> struct Waymarker<T *, WTraits> { - using Traits = WTraits; - static void setWaymark(T *&N, unsigned Tag) { - reinterpret_cast<uintptr_t &>(N) |= static_cast<uintptr_t>(Tag); - } - static unsigned getWaymark(const T *N) { - return static_cast<unsigned>(reinterpret_cast<uintptr_t>(N)) & - Traits::TAG_MASK; - } -}; - -/// Sets up the waymarking algorithm's tags for a given range [Begin, End). -/// -/// \param Begin The beginning of the range to mark with tags (inclusive). -/// \param End The ending of the range to mark with tags (exclusive). -/// \param Offset The position in the supposed tags array from which to start -/// marking the given range. -template <class TIter, class Marker = Waymarker< - typename std::iterator_traits<TIter>::value_type>> -void fillWaymarks(TIter Begin, TIter End, size_t Offset = 0) { - if (Begin == End) - return; - - size_t Count = Marker::Traits::Tags::Remain; - if (Offset <= Marker::Traits::NUM_STATIC_TAGS) { - // Start by filling the pre-calculated tags, starting from the given offset. - while (Offset != Marker::Traits::NUM_STATIC_TAGS) { - Marker::setWaymark(*Begin, Marker::Traits::Tags::Values[Offset]); - - ++Offset; - ++Begin; - - if (Begin == End) - return; - } - } else { - // The given offset is larger than the number of pre-computed tags, so we - // must do it the hard way. - // Calculate the next remaining Count, as if we have filled the tags up to - // the given offset. - size_t Off = Marker::Traits::NUM_STATIC_TAGS; - do { - ++Off; - - // If the count can fit into the tag, then the counting must stop. - if (Count <= Marker::Traits::MARK_MASK) { - Count = Off; - } else - Count >>= Marker::Traits::MARK_SIZE; - } while (Off != Offset); - } - - // By now, we have the matching remaining Count for the current offset. - do { - ++Offset; - - unsigned Tag = Count & Marker::Traits::MARK_MASK; - - // If the count can fit into the tag, then the counting must stop. - if (Count <= Marker::Traits::MARK_MASK) { - Tag |= Marker::Traits::STOP_MASK; - Count = Offset; - } else - Count >>= Marker::Traits::MARK_SIZE; - - Marker::setWaymark(*Begin, Tag); - ++Begin; - } while (Begin != End); -} - -/// Sets up the waymarking algorithm's tags for a given range. -/// -/// \param Range The range to mark with tags. -/// \param Offset The position in the supposed tags array from which to start -/// marking the given range. -template <typename R, class Marker = Waymarker<typename std::remove_reference< - decltype(*std::begin(std::declval<R &>()))>::type>> -void fillWaymarks(R &&Range, size_t Offset = 0) { - return fillWaymarks<decltype(std::begin(std::declval<R &>())), Marker>( - adl_begin(Range), adl_end(Range), Offset); -} - -/// Retrieves the element marked with tag of only STOP_MASK, by following the -/// waymarks. This is the first element in a range passed to a previous call to -/// \c fillWaymarks with \c Offset 0. -/// -/// For the trivial usage of calling \c fillWaymarks(Array), and \I is an -/// iterator inside \c Array, this function retrieves the head of \c Array, by -/// following the waymarks. -/// -/// \param I The iterator into an array which was marked by the waymarking tags -/// (by a previous call to \c fillWaymarks). -template <class TIter, class Marker = Waymarker< - typename std::iterator_traits<TIter>::value_type>> -TIter followWaymarks(TIter I) { - unsigned Tag; - do - Tag = Marker::getWaymark(*I--); - while (!(Tag & Marker::Traits::STOP_MASK)); - - // Special case for the first Use. - if (Tag != Marker::Traits::STOP_MASK) { - ptrdiff_t Offset = Tag & Marker::Traits::MARK_MASK; - while (!((Tag = Marker::getWaymark(*I)) & Marker::Traits::STOP_MASK)) { - Offset = (Offset << Marker::Traits::MARK_SIZE) + Tag; - --I; - } - I -= Offset; - } - return ++I; -} - -} // end namespace llvm - -#endif // LLVM_ADT_WAYMARKING_H diff --git a/llvm/include/llvm/ADT/bit.h b/llvm/include/llvm/ADT/bit.h index d76bc6c6046c..49b27c89e5fe 100644 --- a/llvm/include/llvm/ADT/bit.h +++ b/llvm/include/llvm/ADT/bit.h @@ -5,9 +5,10 @@ // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// -// -// This file implements the C++20 <bit> header. -// +/// +/// \file +/// This file implements the C++20 <bit> header. +/// //===----------------------------------------------------------------------===// #ifndef LLVM_ADT_BIT_H diff --git a/llvm/include/llvm/ADT/edit_distance.h b/llvm/include/llvm/ADT/edit_distance.h index 4f5134008692..c480c1e7cd78 100644 --- a/llvm/include/llvm/ADT/edit_distance.h +++ b/llvm/include/llvm/ADT/edit_distance.h @@ -5,11 +5,12 @@ // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// -// -// This file defines a Levenshtein distance function that works for any two -// sequences, with each element of each sequence being analogous to a character -// in a string. -// +/// +/// \file +/// This file defines a Levenshtein distance function that works for any two +/// sequences, with each element of each sequence being analogous to a character +/// in a string. +/// //===----------------------------------------------------------------------===// #ifndef LLVM_ADT_EDIT_DISTANCE_H diff --git a/llvm/include/llvm/ADT/ilist.h b/llvm/include/llvm/ADT/ilist.h index b3aa26f2454d..9913b7cccbdd 100644 --- a/llvm/include/llvm/ADT/ilist.h +++ b/llvm/include/llvm/ADT/ilist.h @@ -5,19 +5,20 @@ // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// -// -// This file defines classes to implement an intrusive doubly linked list class -// (i.e. each node of the list must contain a next and previous field for the -// list. -// -// The ilist class itself should be a plug in replacement for list. This list -// replacement does not provide a constant time size() method, so be careful to -// use empty() when you really want to know if it's empty. -// -// The ilist class is implemented as a circular list. The list itself contains -// a sentinel node, whose Next points at begin() and whose Prev points at -// rbegin(). The sentinel node itself serves as end() and rend(). -// +/// +/// \file +/// This file defines classes to implement an intrusive doubly linked list class +/// (i.e. each node of the list must contain a next and previous field for the +/// list. +/// +/// The ilist class itself should be a plug in replacement for list. This list +/// replacement does not provide a constant time size() method, so be careful to +/// use empty() when you really want to know if it's empty. +/// +/// The ilist class is implemented as a circular list. The list itself contains +/// a sentinel node, whose Next points at begin() and whose Prev points at +/// rbegin(). The sentinel node itself serves as end() and rend(). +/// //===----------------------------------------------------------------------===// #ifndef LLVM_ADT_ILIST_H diff --git a/llvm/include/llvm/ADT/ilist_node.h b/llvm/include/llvm/ADT/ilist_node.h index e040d9630a1e..7856b1c0d410 100644 --- a/llvm/include/llvm/ADT/ilist_node.h +++ b/llvm/include/llvm/ADT/ilist_node.h @@ -5,10 +5,11 @@ // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// -// -// This file defines the ilist_node class template, which is a convenient -// base class for creating classes that can be used with ilists. -// +/// +/// \file +/// This file defines the ilist_node class template, which is a convenient +/// base class for creating classes that can be used with ilists. +/// //===----------------------------------------------------------------------===// #ifndef LLVM_ADT_ILIST_NODE_H diff --git a/llvm/include/llvm/Analysis/AliasAnalysisEvaluator.h b/llvm/include/llvm/Analysis/AliasAnalysisEvaluator.h index 043b1b7ca2dc..2dd2e7ca916d 100644 --- a/llvm/include/llvm/Analysis/AliasAnalysisEvaluator.h +++ b/llvm/include/llvm/Analysis/AliasAnalysisEvaluator.h @@ -26,6 +26,7 @@ #include "llvm/IR/Function.h" #include "llvm/IR/PassManager.h" +#include "llvm/Pass.h" namespace llvm { class AAResults; diff --git a/llvm/include/llvm/Analysis/CycleAnalysis.h b/llvm/include/llvm/Analysis/CycleAnalysis.h index e16b908d6a10..539d29eb5e9c 100644 --- a/llvm/include/llvm/Analysis/CycleAnalysis.h +++ b/llvm/include/llvm/Analysis/CycleAnalysis.h @@ -18,6 +18,7 @@ #include "llvm/ADT/GenericCycleInfo.h" #include "llvm/IR/PassManager.h" #include "llvm/IR/SSAContext.h" +#include "llvm/Pass.h" namespace llvm { extern template class GenericCycleInfo<SSAContext>; diff --git a/llvm/include/llvm/Analysis/DDG.h b/llvm/include/llvm/Analysis/DDG.h index 4ea589ec7efc..c5107da2a017 100644 --- a/llvm/include/llvm/Analysis/DDG.h +++ b/llvm/include/llvm/Analysis/DDG.h @@ -53,7 +53,7 @@ public: DDGNode() = delete; DDGNode(const NodeKind K) : Kind(K) {} - DDGNode(const DDGNode &N) : DDGNodeBase(N), Kind(N.Kind) {} + DDGNode(const DDGNode &N) = default; DDGNode(DDGNode &&N) : DDGNodeBase(std::move(N)), Kind(N.Kind) {} virtual ~DDGNode() = 0; @@ -93,7 +93,7 @@ public: RootDDGNode() : DDGNode(NodeKind::Root) {} RootDDGNode(const RootDDGNode &N) = delete; RootDDGNode(RootDDGNode &&N) : DDGNode(std::move(N)) {} - ~RootDDGNode() {} + ~RootDDGNode() = default; /// Define classof to be able to use isa<>, cast<>, dyn_cast<>, etc. static bool classof(const DDGNode *N) { @@ -113,11 +113,7 @@ public: SimpleDDGNode(SimpleDDGNode &&N); ~SimpleDDGNode(); - SimpleDDGNode &operator=(const SimpleDDGNode &N) { - DDGNode::operator=(N); - InstList = N.InstList; - return *this; - } + SimpleDDGNode &operator=(const SimpleDDGNode &N) = default; SimpleDDGNode &operator=(SimpleDDGNode &&N) { DDGNode::operator=(std::move(N)); @@ -179,11 +175,7 @@ public: PiBlockDDGNode(PiBlockDDGNode &&N); ~PiBlockDDGNode(); - PiBlockDDGNode &operator=(const PiBlockDDGNode &N) { - DDGNode::operator=(N); - NodeList = N.NodeList; - return *this; - } + PiBlockDDGNode &operator=(const PiBlockDDGNode &N) = default; PiBlockDDGNode &operator=(PiBlockDDGNode &&N) { DDGNode::operator=(std::move(N)); @@ -231,11 +223,7 @@ public: DDGEdge(DDGNode &N, EdgeKind K) : DDGEdgeBase(N), Kind(K) {} DDGEdge(const DDGEdge &E) : DDGEdgeBase(E), Kind(E.getKind()) {} DDGEdge(DDGEdge &&E) : DDGEdgeBase(std::move(E)), Kind(E.Kind) {} - DDGEdge &operator=(const DDGEdge &E) { - DDGEdgeBase::operator=(E); - Kind = E.Kind; - return *this; - } + DDGEdge &operator=(const DDGEdge &E) = default; DDGEdge &operator=(DDGEdge &&E) { DDGEdgeBase::operator=(std::move(E)); @@ -272,7 +260,7 @@ public: : Name(N), DI(DepInfo), Root(nullptr) {} DependenceGraphInfo(DependenceGraphInfo &&G) : Name(std::move(G.Name)), DI(std::move(G.DI)), Root(G.Root) {} - virtual ~DependenceGraphInfo() {} + virtual ~DependenceGraphInfo() = default; /// Return the label that is used to name this graph. StringRef getName() const { return Name; } diff --git a/llvm/include/llvm/Analysis/DependenceAnalysis.h b/llvm/include/llvm/Analysis/DependenceAnalysis.h index 8c852e85b04a..638f4869d677 100644 --- a/llvm/include/llvm/Analysis/DependenceAnalysis.h +++ b/llvm/include/llvm/Analysis/DependenceAnalysis.h @@ -76,7 +76,7 @@ namespace llvm { public: Dependence(Instruction *Source, Instruction *Destination) : Src(Source), Dst(Destination) {} - virtual ~Dependence() {} + virtual ~Dependence() = default; /// Dependence::DVEntry - Each level in the distance/direction vector /// has a direction (or perhaps a union of several directions), and diff --git a/llvm/include/llvm/Analysis/DependenceGraphBuilder.h b/llvm/include/llvm/Analysis/DependenceGraphBuilder.h index 332829cbc8a9..e0dbdcdaa749 100644 --- a/llvm/include/llvm/Analysis/DependenceGraphBuilder.h +++ b/llvm/include/llvm/Analysis/DependenceGraphBuilder.h @@ -43,7 +43,7 @@ public: AbstractDependenceGraphBuilder(GraphType &G, DependenceInfo &D, const BasicBlockListType &BBs) : Graph(G), DI(D), BBList(BBs) {} - virtual ~AbstractDependenceGraphBuilder() {} + virtual ~AbstractDependenceGraphBuilder() = default; /// The main entry to the graph construction algorithm. It starts by /// creating nodes in increasing order of granularity and then diff --git a/llvm/include/llvm/Analysis/IRSimilarityIdentifier.h b/llvm/include/llvm/Analysis/IRSimilarityIdentifier.h index 7b81d5754930..90ab2833e428 100644 --- a/llvm/include/llvm/Analysis/IRSimilarityIdentifier.h +++ b/llvm/include/llvm/Analysis/IRSimilarityIdentifier.h @@ -262,7 +262,20 @@ struct IRInstructionData llvm::hash_value(ID.Inst->getType()), llvm::hash_value(ID.getPredicate()), llvm::hash_combine_range(OperTypes.begin(), OperTypes.end())); - else if (isa<CallInst>(ID.Inst)) { + + if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(ID.Inst)) { + // To hash intrinsics, we use the opcode, and types like the other + // instructions, but also, the Intrinsic ID, and the Name of the + // intrinsic. + Intrinsic::ID IntrinsicID = II->getIntrinsicID(); + return llvm::hash_combine( + llvm::hash_value(ID.Inst->getOpcode()), + llvm::hash_value(ID.Inst->getType()), llvm::hash_value(IntrinsicID), + llvm::hash_value(*ID.CalleeName), + llvm::hash_combine_range(OperTypes.begin(), OperTypes.end())); + } + + if (isa<CallInst>(ID.Inst)) { std::string FunctionName = *ID.CalleeName; return llvm::hash_combine( llvm::hash_value(ID.Inst->getOpcode()), @@ -270,6 +283,7 @@ struct IRInstructionData llvm::hash_value(ID.Inst->getType()), llvm::hash_value(FunctionName), llvm::hash_combine_range(OperTypes.begin(), OperTypes.end())); } + return llvm::hash_combine( llvm::hash_value(ID.Inst->getOpcode()), llvm::hash_value(ID.Inst->getType()), @@ -499,7 +513,7 @@ struct IRInstructionMapper { /// be analyzed for similarity. struct InstructionClassification : public InstVisitor<InstructionClassification, InstrType> { - InstructionClassification() {} + InstructionClassification() = default; // TODO: Determine a scheme to resolve when the label is similar enough. InstrType visitBranchInst(BranchInst &BI) { @@ -525,8 +539,17 @@ struct IRInstructionMapper { // analyzed for similarity as it has no bearing on the outcome of the // program. InstrType visitDbgInfoIntrinsic(DbgInfoIntrinsic &DII) { return Invisible; } - // TODO: Handle specific intrinsics. - InstrType visitIntrinsicInst(IntrinsicInst &II) { return Illegal; } + InstrType visitIntrinsicInst(IntrinsicInst &II) { + // These are disabled due to complications in the CodeExtractor when + // outlining these instructions. For instance, It is unclear what we + // should do when moving only the start or end lifetime instruction into + // an outlined function. Also, assume-like intrinsics could be removed + // from the region, removing arguments, causing discrepencies in the + // number of inputs between different regions. + if (II.isLifetimeStartOrEnd() || II.isAssumeLikeIntrinsic()) + return Illegal; + return EnableIntrinsics ? Legal : Illegal; + } // We only allow call instructions where the function has a name and // is not an indirect call. InstrType visitCallInst(CallInst &CI) { @@ -553,6 +576,10 @@ struct IRInstructionMapper { // The flag variable that lets the classifier know whether we should // allow indirect calls to be considered legal instructions. bool EnableIndirectCalls = false; + + // Flag that lets the classifier know whether we should allow intrinsics to + // be checked for similarity. + bool EnableIntrinsics = false; }; /// Maps an Instruction to a member of InstrType. @@ -939,10 +966,12 @@ class IRSimilarityIdentifier { public: IRSimilarityIdentifier(bool MatchBranches = true, bool MatchIndirectCalls = true, - bool MatchCallsWithName = false) + bool MatchCallsWithName = false, + bool MatchIntrinsics = true) : Mapper(&InstDataAllocator, &InstDataListAllocator), EnableBranches(MatchBranches), EnableIndirectCalls(MatchIndirectCalls), - EnableMatchingCallsByName(MatchCallsWithName) {} + EnableMatchingCallsByName(MatchCallsWithName), + EnableIntrinsics(MatchIntrinsics) {} private: /// Map the instructions in the module to unsigned integers, using mapping @@ -1031,6 +1060,10 @@ private: /// convention, attributes and type signature. bool EnableMatchingCallsByName = true; + /// The flag variable that marks whether we should check intrinsics for + /// similarity. + bool EnableIntrinsics = true; + /// The SimilarityGroups found with the most recent run of \ref /// findSimilarity. None if there is no recent run. Optional<SimilarityGroupList> SimilarityCandidates; diff --git a/llvm/include/llvm/Analysis/IndirectCallVisitor.h b/llvm/include/llvm/Analysis/IndirectCallVisitor.h index eb72f2c5d14d..0825e19ecd2d 100644 --- a/llvm/include/llvm/Analysis/IndirectCallVisitor.h +++ b/llvm/include/llvm/Analysis/IndirectCallVisitor.h @@ -19,7 +19,7 @@ namespace llvm { // Visitor class that finds all indirect call. struct PGOIndirectCallVisitor : public InstVisitor<PGOIndirectCallVisitor> { std::vector<CallBase *> IndirectCalls; - PGOIndirectCallVisitor() {} + PGOIndirectCallVisitor() = default; void visitCallBase(CallBase &Call) { if (Call.isIndirectCall()) diff --git a/llvm/include/llvm/Analysis/InlineOrder.h b/llvm/include/llvm/Analysis/InlineOrder.h index feefa9b9ddd1..84252bcf1b06 100644 --- a/llvm/include/llvm/Analysis/InlineOrder.h +++ b/llvm/include/llvm/Analysis/InlineOrder.h @@ -26,7 +26,7 @@ public: using reference = T &; using const_reference = const T &; - virtual ~InlineOrder() {} + virtual ~InlineOrder() = default; virtual size_t size() = 0; diff --git a/llvm/include/llvm/Analysis/LazyCallGraph.h b/llvm/include/llvm/Analysis/LazyCallGraph.h index eb8f66bada59..c0404d37d04d 100644 --- a/llvm/include/llvm/Analysis/LazyCallGraph.h +++ b/llvm/include/llvm/Analysis/LazyCallGraph.h @@ -1203,7 +1203,7 @@ private: } }; -inline LazyCallGraph::Edge::Edge() {} +inline LazyCallGraph::Edge::Edge() = default; inline LazyCallGraph::Edge::Edge(Node &N, Kind K) : Value(&N, K) {} inline LazyCallGraph::Edge::operator bool() const { diff --git a/llvm/include/llvm/Analysis/LazyValueInfo.h b/llvm/include/llvm/Analysis/LazyValueInfo.h index 57f732cc854b..754391e10630 100644 --- a/llvm/include/llvm/Analysis/LazyValueInfo.h +++ b/llvm/include/llvm/Analysis/LazyValueInfo.h @@ -38,7 +38,7 @@ class LazyValueInfo { void operator=(const LazyValueInfo&) = delete; public: ~LazyValueInfo(); - LazyValueInfo() {} + LazyValueInfo() = default; LazyValueInfo(AssumptionCache *AC_, const DataLayout *DL_, TargetLibraryInfo *TLI_) : AC(AC_), DL(DL_), TLI(TLI_) {} diff --git a/llvm/include/llvm/Analysis/Loads.h b/llvm/include/llvm/Analysis/Loads.h index 3db501c51a17..09bf98d324ed 100644 --- a/llvm/include/llvm/Analysis/Loads.h +++ b/llvm/include/llvm/Analysis/Loads.h @@ -42,8 +42,7 @@ bool isDereferenceablePointer(const Value *V, Type *Ty, /// performs context-sensitive analysis and returns true if the pointer is /// dereferenceable at the specified instruction. bool isDereferenceableAndAlignedPointer(const Value *V, Type *Ty, - MaybeAlign Alignment, - const DataLayout &DL, + Align Alignment, const DataLayout &DL, const Instruction *CtxI = nullptr, const DominatorTree *DT = nullptr, const TargetLibraryInfo *TLI = nullptr); diff --git a/llvm/include/llvm/Analysis/LoopInfo.h b/llvm/include/llvm/Analysis/LoopInfo.h index b2326c4714dd..a0ffdb07a7ec 100644 --- a/llvm/include/llvm/Analysis/LoopInfo.h +++ b/llvm/include/llvm/Analysis/LoopInfo.h @@ -535,7 +535,7 @@ public: DebugLoc End; public: - LocRange() {} + LocRange() = default; LocRange(DebugLoc Start) : Start(Start), End(Start) {} LocRange(DebugLoc Start, DebugLoc End) : Start(std::move(Start)), End(std::move(End)) {} @@ -900,7 +900,7 @@ template <class BlockT, class LoopT> class LoopInfoBase { LoopInfoBase(const LoopInfoBase &) = delete; public: - LoopInfoBase() {} + LoopInfoBase() = default; ~LoopInfoBase() { releaseMemory(); } LoopInfoBase(LoopInfoBase &&Arg) @@ -1092,7 +1092,7 @@ class LoopInfo : public LoopInfoBase<BasicBlock, Loop> { LoopInfo(const LoopInfo &) = delete; public: - LoopInfo() {} + LoopInfo() = default; explicit LoopInfo(const DominatorTreeBase<BasicBlock, false> &DomTree); LoopInfo(LoopInfo &&Arg) : BaseT(std::move(static_cast<BaseT &>(Arg))) {} @@ -1336,6 +1336,10 @@ bool hasMustProgress(const Loop *L); /// be infinite without side effects without also being undefined) bool isMustProgress(const Loop *L); +/// Return true if this loop can be assumed to run for a finite number of +/// iterations. +bool isFinite(const Loop *L); + /// Return whether an MDNode might represent an access group. /// /// Access group metadata nodes have to be distinct and empty. Being diff --git a/llvm/include/llvm/Analysis/MLInlineAdvisor.h b/llvm/include/llvm/Analysis/MLInlineAdvisor.h index 05411d9c99a2..b1a81d5e7030 100644 --- a/llvm/include/llvm/Analysis/MLInlineAdvisor.h +++ b/llvm/include/llvm/Analysis/MLInlineAdvisor.h @@ -15,6 +15,7 @@ #include "llvm/IR/PassManager.h" #include <deque> +#include <map> #include <memory> namespace llvm { diff --git a/llvm/include/llvm/Analysis/MemoryDependenceAnalysis.h b/llvm/include/llvm/Analysis/MemoryDependenceAnalysis.h index cb522cf731d3..feb22c250979 100644 --- a/llvm/include/llvm/Analysis/MemoryDependenceAnalysis.h +++ b/llvm/include/llvm/Analysis/MemoryDependenceAnalysis.h @@ -23,6 +23,7 @@ #include "llvm/IR/PassManager.h" #include "llvm/IR/PredIteratorCache.h" #include "llvm/IR/ValueHandle.h" +#include "llvm/Pass.h" namespace llvm { diff --git a/llvm/include/llvm/Analysis/MustExecute.h b/llvm/include/llvm/Analysis/MustExecute.h index df489aaa534d..18a0bfee5730 100644 --- a/llvm/include/llvm/Analysis/MustExecute.h +++ b/llvm/include/llvm/Analysis/MustExecute.h @@ -281,9 +281,7 @@ struct MustBeExecutedIterator { using ExplorerTy = MustBeExecutedContextExplorer; - MustBeExecutedIterator(const MustBeExecutedIterator &Other) - : Visited(Other.Visited), Explorer(Other.Explorer), - CurInst(Other.CurInst), Head(Other.Head), Tail(Other.Tail) {} + MustBeExecutedIterator(const MustBeExecutedIterator &Other) = default; MustBeExecutedIterator(MustBeExecutedIterator &&Other) : Visited(std::move(Other.Visited)), Explorer(Other.Explorer), @@ -299,7 +297,7 @@ struct MustBeExecutedIterator { return *this; } - ~MustBeExecutedIterator() {} + ~MustBeExecutedIterator() = default; /// Pre- and post-increment operators. ///{ diff --git a/llvm/include/llvm/Analysis/ObjCARCUtil.h b/llvm/include/llvm/Analysis/ObjCARCUtil.h index 1d330ca58a87..385fa5422926 100644 --- a/llvm/include/llvm/Analysis/ObjCARCUtil.h +++ b/llvm/include/llvm/Analysis/ObjCARCUtil.h @@ -42,7 +42,7 @@ inline bool hasAttachedCallOpBundle(const CallBase *CB) { /// which is the address of the ARC runtime function. inline Optional<Function *> getAttachedARCFunction(const CallBase *CB) { auto B = CB->getOperandBundle(LLVMContext::OB_clang_arc_attachedcall); - if (!B.hasValue() || B->Inputs.size() == 0) + if (!B) return None; return cast<Function>(B->Inputs[0]); diff --git a/llvm/include/llvm/Analysis/ScalarEvolution.h b/llvm/include/llvm/Analysis/ScalarEvolution.h index 1e6dac44cf2b..b16aa7017719 100644 --- a/llvm/include/llvm/Analysis/ScalarEvolution.h +++ b/llvm/include/llvm/Analysis/ScalarEvolution.h @@ -1111,9 +1111,11 @@ public: /// Simplify LHS and RHS in a comparison with predicate Pred. Return true /// iff any changes were made. If the operands are provably equal or /// unequal, LHS and RHS are set to the same value and Pred is set to either - /// ICMP_EQ or ICMP_NE. + /// ICMP_EQ or ICMP_NE. ControllingFiniteLoop is set if this comparison + /// controls the exit of a loop known to have a finite number of iterations. bool SimplifyICmpOperands(ICmpInst::Predicate &Pred, const SCEV *&LHS, - const SCEV *&RHS, unsigned Depth = 0); + const SCEV *&RHS, unsigned Depth = 0, + bool ControllingFiniteLoop = false); /// Return the "disposition" of the given SCEV with respect to the given /// loop. diff --git a/llvm/include/llvm/Analysis/SparsePropagation.h b/llvm/include/llvm/Analysis/SparsePropagation.h index 27c58c0afa8a..6eb6d5518a41 100644 --- a/llvm/include/llvm/Analysis/SparsePropagation.h +++ b/llvm/include/llvm/Analysis/SparsePropagation.h @@ -14,6 +14,7 @@ #ifndef LLVM_ANALYSIS_SPARSEPROPAGATION_H #define LLVM_ANALYSIS_SPARSEPROPAGATION_H +#include "llvm/ADT/SmallPtrSet.h" #include "llvm/IR/Instructions.h" #include "llvm/Support/Debug.h" #include <set> diff --git a/llvm/include/llvm/Analysis/TargetLibraryInfo.h b/llvm/include/llvm/Analysis/TargetLibraryInfo.h index 6e3e1380535e..17d1e3f770c1 100644 --- a/llvm/include/llvm/Analysis/TargetLibraryInfo.h +++ b/llvm/include/llvm/Analysis/TargetLibraryInfo.h @@ -254,15 +254,10 @@ public: } // Provide value semantics. - TargetLibraryInfo(const TargetLibraryInfo &TLI) - : Impl(TLI.Impl), OverrideAsUnavailable(TLI.OverrideAsUnavailable) {} + TargetLibraryInfo(const TargetLibraryInfo &TLI) = default; TargetLibraryInfo(TargetLibraryInfo &&TLI) : Impl(TLI.Impl), OverrideAsUnavailable(TLI.OverrideAsUnavailable) {} - TargetLibraryInfo &operator=(const TargetLibraryInfo &TLI) { - Impl = TLI.Impl; - OverrideAsUnavailable = TLI.OverrideAsUnavailable; - return *this; - } + TargetLibraryInfo &operator=(const TargetLibraryInfo &TLI) = default; TargetLibraryInfo &operator=(TargetLibraryInfo &&TLI) { Impl = TLI.Impl; OverrideAsUnavailable = TLI.OverrideAsUnavailable; @@ -445,7 +440,7 @@ public: /// /// This will use the module's triple to construct the library info for that /// module. - TargetLibraryAnalysis() {} + TargetLibraryAnalysis() = default; /// Construct a library analysis with baseline Module-level info. /// diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h index 34ef9cc61c4f..7412e050322e 100644 --- a/llvm/include/llvm/Analysis/TargetTransformInfo.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h @@ -1789,7 +1789,7 @@ class TargetTransformInfo::Model final : public TargetTransformInfo::Concept { public: Model(T Impl) : Impl(std::move(Impl)) {} - ~Model() override {} + ~Model() override = default; const DataLayout &getDataLayout() const override { return Impl.getDataLayout(); diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h index 4b9ef7c57ffc..a32744f8d58b 100644 --- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h @@ -42,8 +42,7 @@ protected: public: // Provide value semantics. MSVC requires that we spell all of these out. - TargetTransformInfoImplBase(const TargetTransformInfoImplBase &Arg) - : DL(Arg.DL) {} + TargetTransformInfoImplBase(const TargetTransformInfoImplBase &Arg) = default; TargetTransformInfoImplBase(TargetTransformInfoImplBase &&Arg) : DL(Arg.DL) {} const DataLayout &getDataLayout() const { return DL; } diff --git a/llvm/include/llvm/BinaryFormat/ELF.h b/llvm/include/llvm/BinaryFormat/ELF.h index 8840929174d6..5d3b1270b538 100644 --- a/llvm/include/llvm/BinaryFormat/ELF.h +++ b/llvm/include/llvm/BinaryFormat/ELF.h @@ -372,7 +372,8 @@ enum { // was never defined for V1. ELFABIVERSION_AMDGPU_HSA_V2 = 0, ELFABIVERSION_AMDGPU_HSA_V3 = 1, - ELFABIVERSION_AMDGPU_HSA_V4 = 2 + ELFABIVERSION_AMDGPU_HSA_V4 = 2, + ELFABIVERSION_AMDGPU_HSA_V5 = 3 }; #define ELF_RELOC(name, value) name = value, diff --git a/llvm/include/llvm/BinaryFormat/MsgPackDocument.h b/llvm/include/llvm/BinaryFormat/MsgPackDocument.h index 6d7aca89ee5b..448c7a4e0034 100644 --- a/llvm/include/llvm/BinaryFormat/MsgPackDocument.h +++ b/llvm/include/llvm/BinaryFormat/MsgPackDocument.h @@ -218,7 +218,7 @@ private: /// A DocNode that is a map. class MapDocNode : public DocNode { public: - MapDocNode() {} + MapDocNode() = default; MapDocNode(DocNode &N) : DocNode(N) { assert(getKind() == Type::Map); } // Map access methods. @@ -248,7 +248,7 @@ public: /// A DocNode that is an array. class ArrayDocNode : public DocNode { public: - ArrayDocNode() {} + ArrayDocNode() = default; ArrayDocNode(DocNode &N) : DocNode(N) { assert(getKind() == Type::Array); } // Array access methods. diff --git a/llvm/include/llvm/BinaryFormat/Swift.def b/llvm/include/llvm/BinaryFormat/Swift.def new file mode 100644 index 000000000000..6160e2551432 --- /dev/null +++ b/llvm/include/llvm/BinaryFormat/Swift.def @@ -0,0 +1,26 @@ +//===- llvm/BinaryFormat/Swift.def - Swift definitions ---------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Macros for running through Swift enumerators. +// +//===----------------------------------------------------------------------===// + +#if !(defined HANDLE_SWIFT_SECTION) +#error "Missing macro definition of HANDLE_SWIFT_SECTION" +#endif + +#ifndef HANDLE_SWIFT_SECTION +#define HANDLE_SWIFT_SECTION(KIND, MACHO, ELF, COFF) +#endif + +HANDLE_SWIFT_SECTION(fieldmd, "__swift5_fieldmd", "swift5_fieldmd", ".sw5flmd") +HANDLE_SWIFT_SECTION(assocty, "__swift5_assocty", "swift5_assocty", ".sw5asty") +HANDLE_SWIFT_SECTION(builtin, "__swift5_builtin", "swift5_builtin", ".sw5bltn") +HANDLE_SWIFT_SECTION(capture, "__swift5_capture", "swift5_capture", ".sw5cptr") +HANDLE_SWIFT_SECTION(typeref, "__swift5_typeref", "swift5_typeref", ".sw5tyrf") +HANDLE_SWIFT_SECTION(reflstr, "__swift5_reflstr", "swift5_reflstr", ".sw5rfst") diff --git a/llvm/include/llvm/BinaryFormat/Swift.h b/llvm/include/llvm/BinaryFormat/Swift.h new file mode 100644 index 000000000000..68c04f11196e --- /dev/null +++ b/llvm/include/llvm/BinaryFormat/Swift.h @@ -0,0 +1,24 @@ +//===-- llvm/BinaryFormat/Swift.h ---Swift Constants-------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// + +#ifndef LLVM_BINARYFORMAT_SWIFT_H +#define LLVM_BINARYFORMAT_SWIFT_H + +namespace llvm { +namespace binaryformat { + +enum Swift5ReflectionSectionKind { +#define HANDLE_SWIFT_SECTION(KIND, MACHO, ELF, COFF) KIND, +#include "llvm/BinaryFormat/Swift.def" +#undef HANDLE_SWIFT_SECTION + unknown, + last = unknown +}; +} // end of namespace binaryformat +} // end of namespace llvm + +#endif diff --git a/llvm/include/llvm/Bitcode/BitcodeWriter.h b/llvm/include/llvm/Bitcode/BitcodeWriter.h index 7ad2d37a2a35..96f25fce8ddb 100644 --- a/llvm/include/llvm/Bitcode/BitcodeWriter.h +++ b/llvm/include/llvm/Bitcode/BitcodeWriter.h @@ -139,7 +139,7 @@ class raw_ostream; /// /// ModHash is for use in ThinLTO incremental build, generated while the IR /// bitcode file writing. - void WriteThinLinkBitcodeToFile(const Module &M, raw_ostream &Out, + void writeThinLinkBitcodeToFile(const Module &M, raw_ostream &Out, const ModuleSummaryIndex &Index, const ModuleHash &ModHash); @@ -148,7 +148,7 @@ class raw_ostream; /// writing the combined index file for ThinLTO. When writing a subset of the /// index for a distributed backend, provide the \p ModuleToSummariesForIndex /// map. - void WriteIndexToFile(const ModuleSummaryIndex &Index, raw_ostream &Out, + void writeIndexToFile(const ModuleSummaryIndex &Index, raw_ostream &Out, const std::map<std::string, GVSummaryMapTy> *ModuleToSummariesForIndex = nullptr); @@ -161,7 +161,7 @@ class raw_ostream; /// If EmbedCmdline is set, the command line is also exported in /// the corresponding section (__LLVM,_cmdline / .llvmcmd) - even if CmdArgs /// were empty. - void EmbedBitcodeInModule(Module &M, MemoryBufferRef Buf, bool EmbedBitcode, + void embedBitcodeInModule(Module &M, MemoryBufferRef Buf, bool EmbedBitcode, bool EmbedCmdline, const std::vector<uint8_t> &CmdArgs); diff --git a/llvm/include/llvm/Bitstream/BitstreamReader.h b/llvm/include/llvm/Bitstream/BitstreamReader.h index 0393d1a51866..37b7c4d73cff 100644 --- a/llvm/include/llvm/Bitstream/BitstreamReader.h +++ b/llvm/include/llvm/Bitstream/BitstreamReader.h @@ -20,8 +20,7 @@ #include "llvm/Support/Endian.h" #include "llvm/Support/Error.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/MathExtras.h" -#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/MemoryBufferRef.h" #include <algorithm> #include <cassert> #include <climits> diff --git a/llvm/include/llvm/CodeGen/DIE.h b/llvm/include/llvm/CodeGen/DIE.h index 32df448b91a1..7f7372630dbe 100644 --- a/llvm/include/llvm/CodeGen/DIE.h +++ b/llvm/include/llvm/CodeGen/DIE.h @@ -886,8 +886,8 @@ class DIEUnit { DIE Die; /// The section this unit will be emitted in. This may or may not be set to /// a valid section depending on the client that is emitting DWARF. - MCSection *Section; - uint64_t Offset; /// .debug_info or .debug_types absolute section offset. + MCSection *Section = nullptr; + uint64_t Offset = 0; /// .debug_info or .debug_types absolute section offset. protected: virtual ~DIEUnit() = default; diff --git a/llvm/include/llvm/CodeGen/FastISel.h b/llvm/include/llvm/CodeGen/FastISel.h index 9c7e688da6a7..775698a66ada 100644 --- a/llvm/include/llvm/CodeGen/FastISel.h +++ b/llvm/include/llvm/CodeGen/FastISel.h @@ -217,12 +217,12 @@ protected: /// for use in the current block. It resets to EmitStartPt when it makes sense /// (for example, it's usually profitable to avoid function calls between the /// definition and the use) - MachineInstr *LastLocalValue; + MachineInstr *LastLocalValue = nullptr; /// The top most instruction in the current block that is allowed for /// emitting local variables. LastLocalValue resets to EmitStartPt when it /// makes sense (for example, on function calls) - MachineInstr *EmitStartPt; + MachineInstr *EmitStartPt = nullptr; public: virtual ~FastISel(); diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CallLowering.h b/llvm/include/llvm/CodeGen/GlobalISel/CallLowering.h index 3a4b3ee18e1b..f9663fadb868 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/CallLowering.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/CallLowering.h @@ -95,7 +95,7 @@ public: bool IsFixed = true) : ArgInfo(Regs, OrigValue.getType(), OrigIndex, Flags, IsFixed, &OrigValue) {} - ArgInfo() {} + ArgInfo() = default; }; struct CallLoweringInfo { diff --git a/llvm/include/llvm/CodeGen/GlobalISel/GISelChangeObserver.h b/llvm/include/llvm/CodeGen/GlobalISel/GISelChangeObserver.h index 79d71b2c8982..70945fcecfe5 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/GISelChangeObserver.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/GISelChangeObserver.h @@ -30,7 +30,7 @@ class GISelChangeObserver { SmallPtrSet<MachineInstr *, 4> ChangingAllUsesOfReg; public: - virtual ~GISelChangeObserver() {} + virtual ~GISelChangeObserver() = default; /// An instruction is about to be erased. virtual void erasingInstr(MachineInstr &MI) = 0; diff --git a/llvm/include/llvm/CodeGen/GlobalISel/LegacyLegalizerInfo.h b/llvm/include/llvm/CodeGen/GlobalISel/LegacyLegalizerInfo.h index f6704df3f49d..3cacdc99dbf8 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/LegacyLegalizerInfo.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/LegacyLegalizerInfo.h @@ -465,7 +465,7 @@ private: ScalarSizeChangeStrategies[LastOp - FirstOp + 1]; SmallVector<SizeChangeStrategy, 1> VectorElementSizeChangeStrategies[LastOp - FirstOp + 1]; - bool TablesInitialized; + bool TablesInitialized = false; // Data structures used by getAction: SmallVector<SizeAndActionsVec, 1> ScalarActions[LastOp - FirstOp + 1]; diff --git a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h index 9507c3411b5c..17cb53dd2d5b 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h @@ -403,9 +403,9 @@ public: class LegalizeRuleSet { /// When non-zero, the opcode we are an alias of - unsigned AliasOf; + unsigned AliasOf = 0; /// If true, there is another opcode that aliases this one - bool IsAliasedByAnother; + bool IsAliasedByAnother = false; SmallVector<LegalizeRule, 2> Rules; #ifndef NDEBUG @@ -432,16 +432,6 @@ class LegalizeRuleSet { return TypeIdx; } - unsigned immIdx(unsigned ImmIdx) { - assert(ImmIdx <= (MCOI::OPERAND_LAST_GENERIC_IMM - - MCOI::OPERAND_FIRST_GENERIC_IMM) && - "Imm Index is out of bounds"); -#ifndef NDEBUG - ImmIdxsCovered.set(ImmIdx); -#endif - return ImmIdx; - } - void markAllIdxsAsCovered() { #ifndef NDEBUG TypeIdxsCovered.set(); @@ -556,7 +546,7 @@ class LegalizeRuleSet { } public: - LegalizeRuleSet() : AliasOf(0), IsAliasedByAnother(false) {} + LegalizeRuleSet() = default; bool isAliasedByAnother() { return IsAliasedByAnother; } void setIsAliasedByAnother() { IsAliasedByAnother = true; } @@ -568,6 +558,16 @@ public: } unsigned getAlias() const { return AliasOf; } + unsigned immIdx(unsigned ImmIdx) { + assert(ImmIdx <= (MCOI::OPERAND_LAST_GENERIC_IMM - + MCOI::OPERAND_FIRST_GENERIC_IMM) && + "Imm Index is out of bounds"); +#ifndef NDEBUG + ImmIdxsCovered.set(ImmIdx); +#endif + return ImmIdx; + } + /// The instruction is legal if predicate is true. LegalizeRuleSet &legalIf(LegalityPredicate Predicate) { // We have no choice but conservatively assume that the free-form @@ -824,11 +824,22 @@ public: LegalizeRuleSet &customForCartesianProduct(std::initializer_list<LLT> Types) { return actionForCartesianProduct(LegalizeAction::Custom, Types); } + /// The instruction is custom when type indexes 0 and 1 are both in their + /// respective lists. LegalizeRuleSet & customForCartesianProduct(std::initializer_list<LLT> Types0, std::initializer_list<LLT> Types1) { return actionForCartesianProduct(LegalizeAction::Custom, Types0, Types1); } + /// The instruction is custom when when type indexes 0, 1, and 2 are all in + /// their respective lists. + LegalizeRuleSet & + customForCartesianProduct(std::initializer_list<LLT> Types0, + std::initializer_list<LLT> Types1, + std::initializer_list<LLT> Types2) { + return actionForCartesianProduct(LegalizeAction::Custom, Types0, Types1, + Types2); + } /// Unconditionally custom lower. LegalizeRuleSet &custom() { diff --git a/llvm/include/llvm/CodeGen/IntrinsicLowering.h b/llvm/include/llvm/CodeGen/IntrinsicLowering.h index 8593f54f3961..06512f2dc560 100644 --- a/llvm/include/llvm/CodeGen/IntrinsicLowering.h +++ b/llvm/include/llvm/CodeGen/IntrinsicLowering.h @@ -24,10 +24,10 @@ class DataLayout; class IntrinsicLowering { const DataLayout &DL; - bool Warned; + bool Warned = false; public: - explicit IntrinsicLowering(const DataLayout &DL) : DL(DL), Warned(false) {} + explicit IntrinsicLowering(const DataLayout &DL) : DL(DL) {} /// Replace a call to the specified intrinsic function. /// If an intrinsic function must be implemented by the code generator diff --git a/llvm/include/llvm/CodeGen/LoopTraversal.h b/llvm/include/llvm/CodeGen/LoopTraversal.h index e5810ef1ef26..93d140cabd0d 100644 --- a/llvm/include/llvm/CodeGen/LoopTraversal.h +++ b/llvm/include/llvm/CodeGen/LoopTraversal.h @@ -98,7 +98,7 @@ public: bool Done = true) : MBB(BB), PrimaryPass(Primary), IsDone(Done) {} }; - LoopTraversal() {} + LoopTraversal() = default; /// Identifies basic blocks that are part of loops and should to be /// visited twice and returns efficient traversal order for all the blocks. diff --git a/llvm/include/llvm/CodeGen/MIRFormatter.h b/llvm/include/llvm/CodeGen/MIRFormatter.h index 3f145ff224ad..fb276ff117af 100644 --- a/llvm/include/llvm/CodeGen/MIRFormatter.h +++ b/llvm/include/llvm/CodeGen/MIRFormatter.h @@ -30,7 +30,7 @@ public: typedef function_ref<bool(StringRef::iterator Loc, const Twine &)> ErrorCallbackType; - MIRFormatter() {} + MIRFormatter() = default; virtual ~MIRFormatter() = default; /// Implement target specific printing for machine operand immediate value, so diff --git a/llvm/include/llvm/CodeGen/MIRYamlMapping.h b/llvm/include/llvm/CodeGen/MIRYamlMapping.h index 05a375bc251b..02eb5d24271d 100644 --- a/llvm/include/llvm/CodeGen/MIRYamlMapping.h +++ b/llvm/include/llvm/CodeGen/MIRYamlMapping.h @@ -392,7 +392,7 @@ struct FrameIndex { bool IsFixed; SMRange SourceRange; - FrameIndex() {} + FrameIndex() = default; FrameIndex(int FI, const llvm::MachineFrameInfo &MFI); Expected<int> getFI(const llvm::MachineFrameInfo &MFI) const; @@ -671,7 +671,7 @@ template <> struct MappingTraits<MachineFrameInfo> { /// Targets should override this in a way that mirrors the implementation of /// llvm::MachineFunctionInfo. struct MachineFunctionInfo { - virtual ~MachineFunctionInfo() {} + virtual ~MachineFunctionInfo() = default; virtual void mappingImpl(IO &YamlIO) {} }; diff --git a/llvm/include/llvm/CodeGen/MachineFrameInfo.h b/llvm/include/llvm/CodeGen/MachineFrameInfo.h index 5df468102a8a..864ca73180af 100644 --- a/llvm/include/llvm/CodeGen/MachineFrameInfo.h +++ b/llvm/include/llvm/CodeGen/MachineFrameInfo.h @@ -49,14 +49,13 @@ class CalleeSavedInfo { /// The long-term solution is to model the liveness of callee-saved registers /// by implicit uses on the return instructions, however, the required /// changes in the ARM backend would be quite extensive. - bool Restored; + bool Restored = true; /// Flag indicating whether the register is spilled to stack or another /// register. - bool SpilledToReg; + bool SpilledToReg = false; public: - explicit CalleeSavedInfo(unsigned R, int FI = 0) - : Reg(R), FrameIdx(FI), Restored(true), SpilledToReg(false) {} + explicit CalleeSavedInfo(unsigned R, int FI = 0) : Reg(R), FrameIdx(FI) {} // Accessors. Register getReg() const { return Reg; } @@ -180,14 +179,14 @@ private: /// If true, the object has been sign-extended. bool isSExt = false; - uint8_t SSPLayout; + uint8_t SSPLayout = SSPLK_None; StackObject(uint64_t Size, Align Alignment, int64_t SPOffset, bool IsImmutable, bool IsSpillSlot, const AllocaInst *Alloca, bool IsAliased, uint8_t StackID = 0) : SPOffset(SPOffset), Size(Size), Alignment(Alignment), isImmutable(IsImmutable), isSpillSlot(IsSpillSlot), StackID(StackID), - Alloca(Alloca), isAliased(IsAliased), SSPLayout(SSPLK_None) {} + Alloca(Alloca), isAliased(IsAliased) {} }; /// The alignment of the stack. diff --git a/llvm/include/llvm/CodeGen/MachineModuleSlotTracker.h b/llvm/include/llvm/CodeGen/MachineModuleSlotTracker.h index 0bd0a31abcae..fc7635edd82c 100644 --- a/llvm/include/llvm/CodeGen/MachineModuleSlotTracker.h +++ b/llvm/include/llvm/CodeGen/MachineModuleSlotTracker.h @@ -22,7 +22,7 @@ class Module; class MachineModuleSlotTracker : public ModuleSlotTracker { const Function &TheFunction; const MachineModuleInfo &TheMMI; - unsigned MDNStartSlot, MDNEndSlot; + unsigned MDNStartSlot = 0, MDNEndSlot = 0; void processMachineFunctionMetadata(AbstractSlotTrackerStorage *AST, const MachineFunction &MF); diff --git a/llvm/include/llvm/CodeGen/MachineOperand.h b/llvm/include/llvm/CodeGen/MachineOperand.h index f17904d54cdd..eded28183ea2 100644 --- a/llvm/include/llvm/CodeGen/MachineOperand.h +++ b/llvm/include/llvm/CodeGen/MachineOperand.h @@ -162,7 +162,7 @@ private: /// ParentMI - This is the instruction that this operand is embedded into. /// This is valid for all operand types, when the operand is in an instr. - MachineInstr *ParentMI; + MachineInstr *ParentMI = nullptr; /// Contents union - This contains the payload for the various operand types. union ContentsUnion { @@ -200,7 +200,7 @@ private: } Contents; explicit MachineOperand(MachineOperandType K) - : OpKind(K), SubReg_TargetFlags(0), ParentMI(nullptr) { + : OpKind(K), SubReg_TargetFlags(0) { // Assert that the layout is what we expect. It's easy to grow this object. static_assert(alignof(MachineOperand) <= alignof(int64_t), "MachineOperand shouldn't be more than 8 byte aligned"); diff --git a/llvm/include/llvm/CodeGen/MachineOutliner.h b/llvm/include/llvm/CodeGen/MachineOutliner.h index 3e597e728fef..08b76295dbf2 100644 --- a/llvm/include/llvm/CodeGen/MachineOutliner.h +++ b/llvm/include/llvm/CodeGen/MachineOutliner.h @@ -124,7 +124,7 @@ public: unsigned FunctionIdx, unsigned Flags) : StartIdx(StartIdx), Len(Len), FirstInst(FirstInst), LastInst(LastInst), MBB(MBB), FunctionIdx(FunctionIdx), Flags(Flags) {} - Candidate() {} + Candidate() = default; /// Used to ensure that \p Candidates are outlined in an order that /// preserves the start and end indices of other \p Candidates. @@ -218,7 +218,7 @@ public: C.Benefit = B; } - OutlinedFunction() {} + OutlinedFunction() = default; }; } // namespace outliner } // namespace llvm diff --git a/llvm/include/llvm/CodeGen/MachineRegisterInfo.h b/llvm/include/llvm/CodeGen/MachineRegisterInfo.h index dbabfe5f0f32..94ae6fe02e9c 100644 --- a/llvm/include/llvm/CodeGen/MachineRegisterInfo.h +++ b/llvm/include/llvm/CodeGen/MachineRegisterInfo.h @@ -84,7 +84,7 @@ private: /// The flag is true upon \p UpdatedCSRs initialization /// and false otherwise. - bool IsUpdatedCSRsInitialized; + bool IsUpdatedCSRsInitialized = false; /// Contains the updated callee saved register list. /// As opposed to the static list defined in register info, diff --git a/llvm/include/llvm/CodeGen/ReplaceWithVeclib.h b/llvm/include/llvm/CodeGen/ReplaceWithVeclib.h index d0fadd55d481..7c0ebe7191e4 100644 --- a/llvm/include/llvm/CodeGen/ReplaceWithVeclib.h +++ b/llvm/include/llvm/CodeGen/ReplaceWithVeclib.h @@ -16,6 +16,7 @@ #include "llvm/IR/PassManager.h" #include "llvm/InitializePasses.h" +#include "llvm/Pass.h" namespace llvm { struct ReplaceWithVeclib : public PassInfoMixin<ReplaceWithVeclib> { diff --git a/llvm/include/llvm/CodeGen/SelectionDAGISel.h b/llvm/include/llvm/CodeGen/SelectionDAGISel.h index 94ba6ad91517..9cea197724cc 100644 --- a/llvm/include/llvm/CodeGen/SelectionDAGISel.h +++ b/llvm/include/llvm/CodeGen/SelectionDAGISel.h @@ -46,8 +46,8 @@ public: MachineRegisterInfo *RegInfo; SelectionDAG *CurDAG; std::unique_ptr<SelectionDAGBuilder> SDB; - AAResults *AA; - GCFunctionInfo *GFI; + AAResults *AA = nullptr; + GCFunctionInfo *GFI = nullptr; CodeGenOpt::Level OptLevel; const TargetInstrInfo *TII; const TargetLowering *TLI; @@ -199,7 +199,7 @@ public: protected: /// DAGSize - Size of DAG being instruction selected. /// - unsigned DAGSize; + unsigned DAGSize = 0; /// ReplaceUses - replace all uses of the old node F with the use /// of the new node T. diff --git a/llvm/include/llvm/CodeGen/SelectionDAGNodes.h b/llvm/include/llvm/CodeGen/SelectionDAGNodes.h index cd62c47abce9..04c6b50197d4 100644 --- a/llvm/include/llvm/CodeGen/SelectionDAGNodes.h +++ b/llvm/include/llvm/CodeGen/SelectionDAGNodes.h @@ -741,11 +741,9 @@ public: using reference = value_type &; use_iterator() = default; - use_iterator(const use_iterator &I) : Op(I.Op) {} + use_iterator(const use_iterator &I) = default; - bool operator==(const use_iterator &x) const { - return Op == x.Op; - } + bool operator==(const use_iterator &x) const { return Op == x.Op; } bool operator!=(const use_iterator &x) const { return !operator==(x); } diff --git a/llvm/include/llvm/CodeGen/SlotIndexes.h b/llvm/include/llvm/CodeGen/SlotIndexes.h index b2133de93ea2..e8d618a24f9b 100644 --- a/llvm/include/llvm/CodeGen/SlotIndexes.h +++ b/llvm/include/llvm/CodeGen/SlotIndexes.h @@ -319,7 +319,7 @@ class raw_ostream; using IndexList = ilist<IndexListEntry>; IndexList indexList; - MachineFunction *mf; + MachineFunction *mf = nullptr; using Mi2IndexMap = DenseMap<const MachineInstr *, SlotIndex>; Mi2IndexMap mi2iMap; diff --git a/llvm/include/llvm/CodeGen/SwitchLoweringUtils.h b/llvm/include/llvm/CodeGen/SwitchLoweringUtils.h index bc22d7789856..47bedd9befc8 100644 --- a/llvm/include/llvm/CodeGen/SwitchLoweringUtils.h +++ b/llvm/include/llvm/CodeGen/SwitchLoweringUtils.h @@ -183,12 +183,12 @@ struct JumpTableHeader { const Value *SValue; MachineBasicBlock *HeaderBB; bool Emitted; - bool FallthroughUnreachable; + bool FallthroughUnreachable = false; JumpTableHeader(APInt F, APInt L, const Value *SV, MachineBasicBlock *H, bool E = false) : First(std::move(F)), Last(std::move(L)), SValue(SV), HeaderBB(H), - Emitted(E), FallthroughUnreachable(false) {} + Emitted(E) {} }; using JumpTableBlock = std::pair<JumpTableHeader, JumpTable>; @@ -218,14 +218,14 @@ struct BitTestBlock { BitTestInfo Cases; BranchProbability Prob; BranchProbability DefaultProb; - bool FallthroughUnreachable; + bool FallthroughUnreachable = false; BitTestBlock(APInt F, APInt R, const Value *SV, unsigned Rg, MVT RgVT, bool E, bool CR, MachineBasicBlock *P, MachineBasicBlock *D, BitTestInfo C, BranchProbability Pr) : First(std::move(F)), Range(std::move(R)), SValue(SV), Reg(Rg), RegVT(RgVT), Emitted(E), ContiguousRange(CR), Parent(P), Default(D), - Cases(std::move(C)), Prob(Pr), FallthroughUnreachable(false) {} + Cases(std::move(C)), Prob(Pr) {} }; /// Return the range of values within a range. diff --git a/llvm/include/llvm/CodeGen/TargetCallingConv.h b/llvm/include/llvm/CodeGen/TargetCallingConv.h index 7713dd0800c0..62365330379d 100644 --- a/llvm/include/llvm/CodeGen/TargetCallingConv.h +++ b/llvm/include/llvm/CodeGen/TargetCallingConv.h @@ -53,9 +53,9 @@ namespace ISD { unsigned IsCopyElisionCandidate : 1; ///< Argument copy elision candidate unsigned IsPointer : 1; - unsigned ByValOrByRefSize; ///< Byval or byref struct size + unsigned ByValOrByRefSize = 0; ///< Byval or byref struct size - unsigned PointerAddrSpace; ///< Address space of pointer argument + unsigned PointerAddrSpace = 0; ///< Address space of pointer argument public: ArgFlagsTy() @@ -65,8 +65,7 @@ namespace ISD { IsSwiftError(0), IsCFGuardTarget(0), IsHva(0), IsHvaStart(0), IsSecArgPass(0), MemAlign(0), OrigAlign(0), IsInConsecutiveRegsLast(0), IsInConsecutiveRegs(0), - IsCopyElisionCandidate(0), IsPointer(0), ByValOrByRefSize(0), - PointerAddrSpace(0) { + IsCopyElisionCandidate(0), IsPointer(0) { static_assert(sizeof(*this) == 3 * sizeof(unsigned), "flags are too big"); } diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h index bec191570594..3861648a5feb 100644 --- a/llvm/include/llvm/CodeGen/TargetLowering.h +++ b/llvm/include/llvm/CodeGen/TargetLowering.h @@ -3485,13 +3485,19 @@ public: bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits, DAGCombinerInfo &DCI) const; + /// Helper wrapper around SimplifyDemandedBits. + /// Adds Op back to the worklist upon success. + bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits, + const APInt &DemandedElts, + DAGCombinerInfo &DCI) const; + /// More limited version of SimplifyDemandedBits that can be used to "look /// through" ops that don't contribute to the DemandedBits/DemandedElts - /// bitwise ops etc. SDValue SimplifyMultipleUseDemandedBits(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, SelectionDAG &DAG, - unsigned Depth) const; + unsigned Depth = 0) const; /// Helper wrapper around SimplifyMultipleUseDemandedBits, demanding all /// elements. @@ -3676,11 +3682,11 @@ public: /// Return if the N is a constant or constant vector equal to the true value /// from getBooleanContents(). - bool isConstTrueVal(const SDNode *N) const; + bool isConstTrueVal(SDValue N) const; /// Return if the N is a constant or constant vector equal to the false value /// from getBooleanContents(). - bool isConstFalseVal(const SDNode *N) const; + bool isConstFalseVal(SDValue N) const; /// Return if \p N is a True value when extended to \p VT. bool isExtendedTrueVal(const ConstantSDNode *N, EVT VT, bool SExt) const; diff --git a/llvm/include/llvm/CodeGen/VirtRegMap.h b/llvm/include/llvm/CodeGen/VirtRegMap.h index 4953d88340b1..42e8d294a637 100644 --- a/llvm/include/llvm/CodeGen/VirtRegMap.h +++ b/llvm/include/llvm/CodeGen/VirtRegMap.h @@ -39,10 +39,10 @@ class TargetInstrInfo; }; private: - MachineRegisterInfo *MRI; - const TargetInstrInfo *TII; - const TargetRegisterInfo *TRI; - MachineFunction *MF; + MachineRegisterInfo *MRI = nullptr; + const TargetInstrInfo *TII = nullptr; + const TargetRegisterInfo *TRI = nullptr; + MachineFunction *MF = nullptr; /// Virt2PhysMap - This is a virtual to physical register /// mapping. Each virtual register is required to have an entry in @@ -72,8 +72,7 @@ class TargetInstrInfo; static char ID; VirtRegMap() - : MachineFunctionPass(ID), MRI(nullptr), TII(nullptr), TRI(nullptr), - MF(nullptr), Virt2PhysMap(NO_PHYS_REG), + : MachineFunctionPass(ID), Virt2PhysMap(NO_PHYS_REG), Virt2StackSlotMap(NO_STACK_SLOT), Virt2SplitMap(0) {} VirtRegMap(const VirtRegMap &) = delete; VirtRegMap &operator=(const VirtRegMap &) = delete; diff --git a/llvm/include/llvm/DWARFLinker/DWARFStreamer.h b/llvm/include/llvm/DWARFLinker/DWARFStreamer.h index 9a5c6bcaf83f..fc8c59904cfb 100644 --- a/llvm/include/llvm/DWARFLinker/DWARFStreamer.h +++ b/llvm/include/llvm/DWARFLinker/DWARFStreamer.h @@ -9,6 +9,7 @@ #ifndef LLVM_DWARFLINKER_DWARFSTREAMER_H #define LLVM_DWARFLINKER_DWARFSTREAMER_H +#include "llvm/BinaryFormat/Swift.h" #include "llvm/CodeGen/AccelTable.h" #include "llvm/CodeGen/AsmPrinter.h" #include "llvm/DWARFLinker/DWARFLinker.h" @@ -48,7 +49,7 @@ public: : OutFile(OutFile), OutFileType(OutFileType), Translator(Translator), ErrorHandler(Error), WarningHandler(Warning) {} - bool init(Triple TheTriple); + bool init(Triple TheTriple, StringRef Swift5ReflectionSegmentName); /// Dump the file to the disk. void finish(); @@ -85,6 +86,11 @@ public: /// Emit the swift_ast section stored in \p Buffer. void emitSwiftAST(StringRef Buffer); + /// Emit the swift reflection section stored in \p Buffer. + void emitSwiftReflectionSection( + llvm::binaryformat::Swift5ReflectionSectionKind ReflSectionKind, + StringRef Buffer, uint32_t Alignment, uint32_t Size); + /// Emit debug_ranges for \p FuncRange by translating the /// original \p Entries. void emitRangesEntries( diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugFrame.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugFrame.h index 536583e20640..8167aaaeffb5 100644 --- a/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugFrame.h +++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugFrame.h @@ -535,7 +535,7 @@ public: : Kind(K), IsDWARF64(IsDWARF64), Offset(Offset), Length(Length), CFIs(CodeAlign, DataAlign, Arch) {} - virtual ~FrameEntry() {} + virtual ~FrameEntry() = default; FrameKind getKind() const { return Kind; } uint64_t getOffset() const { return Offset; } diff --git a/llvm/include/llvm/DebugInfo/GSYM/StringTable.h b/llvm/include/llvm/DebugInfo/GSYM/StringTable.h index 6dd90499c203..d920335d373e 100644 --- a/llvm/include/llvm/DebugInfo/GSYM/StringTable.h +++ b/llvm/include/llvm/DebugInfo/GSYM/StringTable.h @@ -20,7 +20,7 @@ namespace gsym { /// string at offset zero. Strings must be UTF8 NULL terminated strings. struct StringTable { StringRef Data; - StringTable() {} + StringTable() = default; StringTable(StringRef D) : Data(D) {} StringRef operator[](size_t Offset) const { return getString(Offset); } StringRef getString(uint32_t Offset) const { diff --git a/llvm/include/llvm/DebugInfo/Symbolize/DIPrinter.h b/llvm/include/llvm/DebugInfo/Symbolize/DIPrinter.h index 779dc885372d..91748e15ba65 100644 --- a/llvm/include/llvm/DebugInfo/Symbolize/DIPrinter.h +++ b/llvm/include/llvm/DebugInfo/Symbolize/DIPrinter.h @@ -39,8 +39,8 @@ struct Request { class DIPrinter { public: - DIPrinter() {} - virtual ~DIPrinter() {} + DIPrinter() = default; + virtual ~DIPrinter() = default; virtual void print(const Request &Request, const DILineInfo &Info) = 0; virtual void print(const Request &Request, const DIInliningInfo &Info) = 0; diff --git a/llvm/include/llvm/Demangle/ItaniumDemangle.h b/llvm/include/llvm/Demangle/ItaniumDemangle.h index 28545ed06836..760319544a02 100644 --- a/llvm/include/llvm/Demangle/ItaniumDemangle.h +++ b/llvm/include/llvm/Demangle/ItaniumDemangle.h @@ -1,15 +1,15 @@ -// Do not edit! -*- read-only -*- -// See README.txt for instructions -//===------------------------- ItaniumDemangle.h ----------------*- C++ -*-===// -// +//===--- ItaniumDemangle.h -----------*- mode:c++;eval:(read-only-mode) -*-===// +// Do not edit! See README.txt. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // -// Generic itanium demangler library. This file has two byte-per-byte identical -// copies in the source tree, one in libcxxabi, and the other in llvm. +// Generic itanium demangler library. +// There are two copies of this file in the source tree. The one under +// libcxxabi is the original and the one under llvm is the copy. Use +// cp-to-llvm.sh to update the copy. See README.txt for more details. // //===----------------------------------------------------------------------===// diff --git a/llvm/include/llvm/Demangle/README.txt b/llvm/include/llvm/Demangle/README.txt index 514ff6dd16f2..76470f61f959 100644 --- a/llvm/include/llvm/Demangle/README.txt +++ b/llvm/include/llvm/Demangle/README.txt @@ -4,41 +4,50 @@ Itanium Name Demangler Library Introduction ------------ -This directory contains the generic itanium name demangler library. The main -purpose of the library is to demangle C++ symbols, i.e. convert the string -"_Z1fv" into "f()". You can also use the CRTP base ManglingParser to perform -some simple analysis on the mangled name, or (in LLVM) use the opaque -ItaniumPartialDemangler to query the demangled AST. +This directory contains the generic itanium name demangler +library. The main purpose of the library is to demangle C++ symbols, +i.e. convert the string "_Z1fv" into "f()". You can also use the CRTP +base ManglingParser to perform some simple analysis on the mangled +name, or (in LLVM) use the opaque ItaniumPartialDemangler to query the +demangled AST. Why are there multiple copies of the this library in the source tree? --------------------------------------------------------------------- -This directory is mirrored between libcxxabi/demangle and -llvm/include/llvm/Demangle. The simple reason for this is that both projects -need to demangle symbols, but neither can depend on each other. libcxxabi needs -the demangler to implement __cxa_demangle, which is part of the itanium ABI -spec. LLVM needs a copy for a bunch of places, but doesn't want to use the -system's __cxa_demangle because it a) might not be available (i.e., on Windows), -and b) probably isn't that up-to-date on the latest language features. - -The copy of the demangler in LLVM has some extra stuff that aren't needed in -libcxxabi (ie, the MSVC demangler, ItaniumPartialDemangler), which depend on the -shared generic components. Despite these differences, we want to keep the "core" -generic demangling library identical between both copies to simplify development -and testing. - -If you're working on the generic library, then do the work first in libcxxabi, -then run the cp-to-llvm.sh script in src/demangle. This script takes as an -argument the path to llvm, and re-copies the changes you made to libcxxabi over. -Note that this script just blindly overwrites all changes to the generic library -in llvm, so be careful. - -Because the core demangler needs to work in libcxxabi, everything needs to be -declared in an anonymous namespace (see DEMANGLE_NAMESPACE_BEGIN), and you can't -introduce any code that depends on the libcxx dylib. - -Hopefully, when LLVM becomes a monorepo, we can de-duplicate this code, and have -both LLVM and libcxxabi depend on a shared demangler library. +The canonical sources are in libcxxabi/src/demangle and some of the +files are copied to llvm/include/llvm/Demangle. The simple reason for +this comes from before the monorepo, and both [sub]projects need to +demangle symbols, but neither can depend on each other. + +* libcxxabi needs the demangler to implement __cxa_demangle, which is + part of the itanium ABI spec. + +* LLVM needs a copy for a bunch of places, and cannot rely on the + system's __cxa_demangle because it a) might not be available (i.e., + on Windows), and b) may not be up-to-date on the latest language + features. + +The copy of the demangler in LLVM has some extra stuff that aren't +needed in libcxxabi (ie, the MSVC demangler, ItaniumPartialDemangler), +which depend on the shared generic components. Despite these +differences, we want to keep the "core" generic demangling library +identical between both copies to simplify development and testing. + +If you're working on the generic library, then do the work first in +libcxxabi, then run the cp-to-llvm.sh script in src/demangle. This +script takes as an optional argument the path to llvm, and copies the +changes you made to libcxxabi over. Note that this script just +blindly overwrites all changes to the generic library in llvm, so be +careful. + +Because the core demangler needs to work in libcxxabi, everything +needs to be declared in an anonymous namespace (see +DEMANGLE_NAMESPACE_BEGIN), and you can't introduce any code that +depends on the libcxx dylib. + +FIXME: Now that LLVM is a monorepo, it should be possible to +de-duplicate this code, and have both LLVM and libcxxabi depend on a +shared demangler library. Testing ------- diff --git a/llvm/include/llvm/Demangle/StringView.h b/llvm/include/llvm/Demangle/StringView.h index 323282f69c26..6bbb8837fed1 100644 --- a/llvm/include/llvm/Demangle/StringView.h +++ b/llvm/include/llvm/Demangle/StringView.h @@ -1,7 +1,5 @@ -// Do not edit! -*- read-only -*- -// See README.txt for instructions -//===--- StringView.h -------------------------------------------*- C++ -*-===// -// +//===--- StringView.h ----------------*- mode:c++;eval:(read-only-mode) -*-===// +// Do not edit! See README.txt. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -9,6 +7,9 @@ //===----------------------------------------------------------------------===// // // FIXME: Use std::string_view instead when we support C++17. +// There are two copies of this file in the source tree. The one under +// libcxxabi is the original and the one under llvm is the copy. Use +// cp-to-llvm.sh to update the copy. See README.txt for more details. // //===----------------------------------------------------------------------===// diff --git a/llvm/include/llvm/Demangle/Utility.h b/llvm/include/llvm/Demangle/Utility.h index bec019da8680..1cf7e8f1df45 100644 --- a/llvm/include/llvm/Demangle/Utility.h +++ b/llvm/include/llvm/Demangle/Utility.h @@ -1,14 +1,15 @@ -// Do not edit! -*- read-only -*- -// See README.txt for instructions -//===--- Utility.h ----------------------------------------------*- C++ -*-===// -// +//===--- Utility.h -------------------*- mode:c++;eval:(read-only-mode) -*-===// +// Do not edit! See README.txt. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // -// Provide some utility classes for use in the demangler(s). +// Provide some utility classes for use in the demangler. +// There are two copies of this file in the source tree. The one in libcxxabi +// is the original and the one in llvm is the copy. Use cp-to-llvm.sh to update +// the copy. See README.txt for more details. // //===----------------------------------------------------------------------===// diff --git a/llvm/include/llvm/ExecutionEngine/JITLink/JITLink.h b/llvm/include/llvm/ExecutionEngine/JITLink/JITLink.h index ddbb3e76f145..25f1349f15f2 100644 --- a/llvm/include/llvm/ExecutionEngine/JITLink/JITLink.h +++ b/llvm/include/llvm/ExecutionEngine/JITLink/JITLink.h @@ -1636,7 +1636,7 @@ using AsyncLookupResult = DenseMap<StringRef, JITEvaluatedSymbol>; /// or an error if resolution failed. class JITLinkAsyncLookupContinuation { public: - virtual ~JITLinkAsyncLookupContinuation() {} + virtual ~JITLinkAsyncLookupContinuation() = default; virtual void run(Expected<AsyncLookupResult> LR) = 0; private: diff --git a/llvm/include/llvm/ExecutionEngine/Orc/Core.h b/llvm/include/llvm/ExecutionEngine/Orc/Core.h index d0168f79e3d8..c4647148f287 100644 --- a/llvm/include/llvm/ExecutionEngine/Orc/Core.h +++ b/llvm/include/llvm/ExecutionEngine/Orc/Core.h @@ -686,7 +686,7 @@ public: MaterializationUnit(Interface I) : SymbolFlags(std::move(I.SymbolFlags)), InitSymbol(std::move(I.InitSymbol)) {} - virtual ~MaterializationUnit() {} + virtual ~MaterializationUnit() = default; /// Return the name of this materialization unit. Useful for debugging /// output. diff --git a/llvm/include/llvm/ExecutionEngine/Orc/DebuggerSupportPlugin.h b/llvm/include/llvm/ExecutionEngine/Orc/DebuggerSupportPlugin.h index d2bf8330695f..253b1c876782 100644 --- a/llvm/include/llvm/ExecutionEngine/Orc/DebuggerSupportPlugin.h +++ b/llvm/include/llvm/ExecutionEngine/Orc/DebuggerSupportPlugin.h @@ -29,7 +29,7 @@ class GDBJITDebugInfoRegistrationPlugin : public ObjectLinkingLayer::Plugin { public: class DebugSectionSynthesizer { public: - virtual ~DebugSectionSynthesizer() {} + virtual ~DebugSectionSynthesizer() = default; virtual Error startSynthesis() = 0; virtual Error completeSynthesisAndRegister() = 0; }; diff --git a/llvm/include/llvm/ExecutionEngine/Orc/EPCDebugObjectRegistrar.h b/llvm/include/llvm/ExecutionEngine/Orc/EPCDebugObjectRegistrar.h index 940d0d28ae83..ac7051b5b75c 100644 --- a/llvm/include/llvm/ExecutionEngine/Orc/EPCDebugObjectRegistrar.h +++ b/llvm/include/llvm/ExecutionEngine/Orc/EPCDebugObjectRegistrar.h @@ -34,7 +34,7 @@ class ExecutionSession; class DebugObjectRegistrar { public: virtual Error registerDebugObject(ExecutorAddrRange TargetMem) = 0; - virtual ~DebugObjectRegistrar() {} + virtual ~DebugObjectRegistrar() = default; }; /// Use ExecutorProcessControl to register debug objects locally or in a remote diff --git a/llvm/include/llvm/ExecutionEngine/Orc/TaskDispatch.h b/llvm/include/llvm/ExecutionEngine/Orc/TaskDispatch.h index c57264e59655..8c287f9fec0e 100644 --- a/llvm/include/llvm/ExecutionEngine/Orc/TaskDispatch.h +++ b/llvm/include/llvm/ExecutionEngine/Orc/TaskDispatch.h @@ -35,7 +35,7 @@ class Task : public RTTIExtends<Task, RTTIRoot> { public: static char ID; - virtual ~Task() {} + virtual ~Task() = default; /// Description of the task to be performed. Used for logging. virtual void printDescription(raw_ostream &OS) = 0; diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPConstants.h b/llvm/include/llvm/Frontend/OpenMP/OMPConstants.h index 2178acc90e2c..bee90281e086 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMPConstants.h +++ b/llvm/include/llvm/Frontend/OpenMP/OMPConstants.h @@ -113,6 +113,9 @@ enum class AddressSpace : unsigned { Local = 5, }; +/// \note This needs to be kept in sync with interop.h enum kmp_interop_type_t.: +enum class OMPInteropType { Unknown, Target, TargetSync }; + } // end namespace omp } // end namespace llvm diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h index 85dd28ec3159..f60debe8411c 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h +++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h @@ -1003,6 +1003,55 @@ public: llvm::ConstantInt *Size, const llvm::Twine &Name = Twine("")); + /// Create a runtime call for __tgt_interop_init + /// + /// \param Loc The insert and source location description. + /// \param InteropVar variable to be allocated + /// \param InteropType type of interop operation + /// \param Device devide to which offloading will occur + /// \param NumDependences number of dependence variables + /// \param DependenceAddress pointer to dependence variables + /// \param HaveNowaitClause does nowait clause exist + /// + /// \returns CallInst to the __tgt_interop_init call + CallInst *createOMPInteropInit(const LocationDescription &Loc, + Value *InteropVar, + omp::OMPInteropType InteropType, Value *Device, + Value *NumDependences, + Value *DependenceAddress, + bool HaveNowaitClause); + + /// Create a runtime call for __tgt_interop_destroy + /// + /// \param Loc The insert and source location description. + /// \param InteropVar variable to be allocated + /// \param Device devide to which offloading will occur + /// \param NumDependences number of dependence variables + /// \param DependenceAddress pointer to dependence variables + /// \param HaveNowaitClause does nowait clause exist + /// + /// \returns CallInst to the __tgt_interop_destroy call + CallInst *createOMPInteropDestroy(const LocationDescription &Loc, + Value *InteropVar, Value *Device, + Value *NumDependences, + Value *DependenceAddress, + bool HaveNowaitClause); + + /// Create a runtime call for __tgt_interop_use + /// + /// \param Loc The insert and source location description. + /// \param InteropVar variable to be allocated + /// \param Device devide to which offloading will occur + /// \param NumDependences number of dependence variables + /// \param DependenceAddress pointer to dependence variables + /// \param HaveNowaitClause does nowait clause exist + /// + /// \returns CallInst to the __tgt_interop_use call + CallInst *createOMPInteropUse(const LocationDescription &Loc, + Value *InteropVar, Value *Device, + Value *NumDependences, Value *DependenceAddress, + bool HaveNowaitClause); + /// The `omp target` interface /// /// For more information about the usage of this interface, @@ -1167,6 +1216,7 @@ private: /// /// \param AllocIP Instruction to create AllocaInst before. /// \param X The target atomic pointer to be updated + /// \param XElemTy The element type of the atomic pointer. /// \param Expr The value to update X with. /// \param AO Atomic ordering of the generated atomic /// instructions. @@ -1183,12 +1233,11 @@ private: /// /// \returns A pair of the old value of X before the update, and the value /// used for the update. - std::pair<Value *, Value *> emitAtomicUpdate(Instruction *AllocIP, Value *X, - Value *Expr, AtomicOrdering AO, - AtomicRMWInst::BinOp RMWOp, - AtomicUpdateCallbackTy &UpdateOp, - bool VolatileX, - bool IsXBinopExpr); + std::pair<Value *, Value *> + emitAtomicUpdate(Instruction *AllocIP, Value *X, Type *XElemTy, Value *Expr, + AtomicOrdering AO, AtomicRMWInst::BinOp RMWOp, + AtomicUpdateCallbackTy &UpdateOp, bool VolatileX, + bool IsXBinopExpr); /// Emit the binary op. described by \p RMWOp, using \p Src1 and \p Src2 . /// @@ -1200,6 +1249,7 @@ public: /// a struct to pack relevant information while generating atomic Ops struct AtomicOpValue { Value *Var = nullptr; + Type *ElemTy = nullptr; bool IsSigned = false; bool IsVolatile = false; }; diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def index d2b70edd4d87..0c3cb3f43105 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def +++ b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def @@ -386,6 +386,13 @@ __OMP_RTL(__kmpc_aligned_alloc, false, VoidPtr, /* Int */ Int32, SizeTy, SizeTy, VoidPtr) __OMP_RTL(__kmpc_free, false, Void, /* Int */ Int32, VoidPtr, VoidPtr) +__OMP_RTL(__tgt_interop_init, false, Void, IdentPtr, Int32, VoidPtrPtr, Int64, + Int32, Int32, VoidPtr, Int32) +__OMP_RTL(__tgt_interop_destroy, false, Void, IdentPtr, Int32, VoidPtrPtr, + Int32, Int32, VoidPtr, Int32) +__OMP_RTL(__tgt_interop_use, false, Void, IdentPtr, Int32, VoidPtrPtr, Int32, + Int32, VoidPtr, Int32) + __OMP_RTL(__kmpc_init_allocator, false, /* omp_allocator_handle_t */ VoidPtr, /* Int */ Int32, /* omp_memespace_handle_t */ VoidPtr, /* Int */ Int32, /* omp_alloctrait_t */ VoidPtr) diff --git a/llvm/include/llvm/IR/AbstractCallSite.h b/llvm/include/llvm/IR/AbstractCallSite.h index 31df4c75b6e7..69048554a05c 100644 --- a/llvm/include/llvm/IR/AbstractCallSite.h +++ b/llvm/include/llvm/IR/AbstractCallSite.h @@ -14,11 +14,11 @@ #ifndef LLVM_IR_ABSTRACTCALLSITE_H #define LLVM_IR_ABSTRACTCALLSITE_H +#include "llvm/IR/Argument.h" +#include "llvm/IR/Constants.h" #include "llvm/IR/Function.h" #include "llvm/IR/InstrTypes.h" -#include "llvm/IR/Instruction.h" #include "llvm/IR/Use.h" -#include "llvm/IR/User.h" #include "llvm/IR/Value.h" #include "llvm/Support/Casting.h" #include <cassert> diff --git a/llvm/include/llvm/IR/Attributes.h b/llvm/include/llvm/IR/Attributes.h index 5e2cfe6d81ac..74b60f1e3d05 100644 --- a/llvm/include/llvm/IR/Attributes.h +++ b/llvm/include/llvm/IR/Attributes.h @@ -20,7 +20,6 @@ #include "llvm/ADT/Optional.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/StringRef.h" -#include "llvm/ADT/iterator_range.h" #include "llvm/Config/llvm-config.h" #include "llvm/Support/Alignment.h" #include "llvm/Support/PointerLikeTypeTraits.h" diff --git a/llvm/include/llvm/IR/CFG.h b/llvm/include/llvm/IR/CFG.h index b872e2626981..0ee584f8af7e 100644 --- a/llvm/include/llvm/IR/CFG.h +++ b/llvm/include/llvm/IR/CFG.h @@ -22,6 +22,7 @@ #include "llvm/ADT/GraphTraits.h" #include "llvm/ADT/iterator.h" #include "llvm/ADT/iterator_range.h" +#include "llvm/IR/BasicBlock.h" #include "llvm/IR/Function.h" #include "llvm/IR/Value.h" #include "llvm/Support/Casting.h" @@ -31,7 +32,6 @@ namespace llvm { -class BasicBlock; class Instruction; class Use; diff --git a/llvm/include/llvm/IR/DIBuilder.h b/llvm/include/llvm/IR/DIBuilder.h index f36c9e620d43..fc461fc3f49f 100644 --- a/llvm/include/llvm/IR/DIBuilder.h +++ b/llvm/include/llvm/IR/DIBuilder.h @@ -21,7 +21,6 @@ #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" -#include "llvm/IR/DebugInfo.h" #include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/TrackingMDRef.h" #include "llvm/Support/Casting.h" diff --git a/llvm/include/llvm/IR/DebugInfoMetadata.h b/llvm/include/llvm/IR/DebugInfoMetadata.h index ba2568042c41..96569179060f 100644 --- a/llvm/include/llvm/IR/DebugInfoMetadata.h +++ b/llvm/include/llvm/IR/DebugInfoMetadata.h @@ -33,7 +33,6 @@ #include <cstddef> #include <cstdint> #include <iterator> -#include <type_traits> #include <vector> // Helper macros for defining get() overrides. diff --git a/llvm/include/llvm/IR/DiagnosticInfo.h b/llvm/include/llvm/IR/DiagnosticInfo.h index 73b0be43e136..1ea1d9787d61 100644 --- a/llvm/include/llvm/IR/DiagnosticInfo.h +++ b/llvm/include/llvm/IR/DiagnosticInfo.h @@ -15,14 +15,16 @@ #define LLVM_IR_DIAGNOSTICINFO_H #include "llvm-c/Types.h" +#include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Twine.h" #include "llvm/IR/DebugLoc.h" #include "llvm/Support/CBindingWrapping.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/SourceMgr.h" #include "llvm/Support/TypeSize.h" -#include "llvm/Support/YAMLTraits.h" #include <algorithm> #include <cstdint> #include <functional> @@ -33,13 +35,15 @@ namespace llvm { // Forward declarations. class DiagnosticPrinter; +class DIFile; +class DISubprogram; class CallInst; class Function; class Instruction; class InstructionCost; -class LLVMContext; class Module; -class SMDiagnostic; +class Type; +class Value; /// Defines the different supported severity of a diagnostic. enum DiagnosticSeverity : char { @@ -1049,18 +1053,20 @@ static DiagnosticSeverity getDiagnosticSeverity(SourceMgr::DiagKind DK) { /// Diagnostic information for SMDiagnostic reporting. class DiagnosticInfoSrcMgr : public DiagnosticInfo { const SMDiagnostic &Diagnostic; + StringRef ModName; // For inlineasm !srcloc translation. bool InlineAsmDiag; unsigned LocCookie; public: - DiagnosticInfoSrcMgr(const SMDiagnostic &Diagnostic, + DiagnosticInfoSrcMgr(const SMDiagnostic &Diagnostic, StringRef ModName, bool InlineAsmDiag = true, unsigned LocCookie = 0) : DiagnosticInfo(DK_SrcMgr, getDiagnosticSeverity(Diagnostic.getKind())), - Diagnostic(Diagnostic), InlineAsmDiag(InlineAsmDiag), + Diagnostic(Diagnostic), ModName(ModName), InlineAsmDiag(InlineAsmDiag), LocCookie(LocCookie) {} + StringRef getModuleName() const { return ModName; } bool isInlineAsmDiag() const { return InlineAsmDiag; } const SMDiagnostic &getSMDiag() const { return Diagnostic; } unsigned getLocCookie() const { return LocCookie; } diff --git a/llvm/include/llvm/IR/Dominators.h b/llvm/include/llvm/IR/Dominators.h index 475355af5647..d13a5856df3b 100644 --- a/llvm/include/llvm/IR/Dominators.h +++ b/llvm/include/llvm/IR/Dominators.h @@ -14,23 +14,34 @@ #ifndef LLVM_IR_DOMINATORS_H #define LLVM_IR_DOMINATORS_H +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseMapInfo.h" #include "llvm/ADT/DepthFirstIterator.h" -#include "llvm/ADT/GraphTraits.h" #include "llvm/ADT/Hashing.h" +#include "llvm/ADT/PointerIntPair.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Twine.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/CFG.h" #include "llvm/IR/PassManager.h" +#include "llvm/IR/Use.h" #include "llvm/Pass.h" +#include "llvm/Support/CFGDiff.h" +#include "llvm/Support/CFGUpdate.h" #include "llvm/Support/GenericDomTree.h" +#include "llvm/Support/GenericDomTreeConstruction.h" #include <utility> +#include <vector> namespace llvm { class Function; class Instruction; class Module; +class Value; class raw_ostream; +template <class GraphType> struct GraphTraits; extern template class DomTreeNodeBase<BasicBlock>; extern template class DominatorTreeBase<BasicBlock, false>; // DomTree diff --git a/llvm/include/llvm/IR/IRBuilder.h b/llvm/include/llvm/IR/IRBuilder.h index 53f517480ca1..a1789759960d 100644 --- a/llvm/include/llvm/IR/IRBuilder.h +++ b/llvm/include/llvm/IR/IRBuilder.h @@ -28,12 +28,13 @@ #include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/DebugLoc.h" #include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/FPEnv.h" #include "llvm/IR/Function.h" #include "llvm/IR/GlobalVariable.h" #include "llvm/IR/InstrTypes.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" -#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Intrinsics.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" #include "llvm/IR/Operator.h" @@ -44,7 +45,6 @@ #include "llvm/Support/CBindingWrapping.h" #include "llvm/Support/Casting.h" #include <cassert> -#include <cstddef> #include <cstdint> #include <functional> #include <utility> @@ -52,7 +52,6 @@ namespace llvm { class APInt; -class MDNode; class Use; /// This provides the default implementation of the IRBuilder diff --git a/llvm/include/llvm/IR/IRPrintingPasses.h b/llvm/include/llvm/IR/IRPrintingPasses.h index 2e62be7cd1ec..3fba5b81e37a 100644 --- a/llvm/include/llvm/IR/IRPrintingPasses.h +++ b/llvm/include/llvm/IR/IRPrintingPasses.h @@ -24,6 +24,11 @@ namespace llvm { class raw_ostream; class StringRef; +class Function; +class FunctionPass; +class Module; +class ModulePass; +class Pass; /// Create and return a pass that writes the module to the specified /// \c raw_ostream. diff --git a/llvm/include/llvm/IR/InstrTypes.h b/llvm/include/llvm/IR/InstrTypes.h index b3d2a2c8ed9d..589926c0faf1 100644 --- a/llvm/include/llvm/IR/InstrTypes.h +++ b/llvm/include/llvm/IR/InstrTypes.h @@ -1393,10 +1393,13 @@ public: const Use &getCalledOperandUse() const { return Op<CalledOperandOpEndIdx>(); } Use &getCalledOperandUse() { return Op<CalledOperandOpEndIdx>(); } - /// Returns the function called, or null if this is an - /// indirect function invocation. + /// Returns the function called, or null if this is an indirect function + /// invocation or the function signature does not match the call signature. Function *getCalledFunction() const { - return dyn_cast_or_null<Function>(getCalledOperand()); + if (auto *F = dyn_cast_or_null<Function>(getCalledOperand())) + if (F->getValueType() == getFunctionType()) + return F; + return nullptr; } /// Return true if the callsite is an indirect call. diff --git a/llvm/include/llvm/IR/Instruction.h b/llvm/include/llvm/IR/Instruction.h index 9878082ffffa..1937ffd36f7b 100644 --- a/llvm/include/llvm/IR/Instruction.h +++ b/llvm/include/llvm/IR/Instruction.h @@ -25,8 +25,6 @@ #include "llvm/IR/Value.h" #include "llvm/Support/AtomicOrdering.h" #include "llvm/Support/Casting.h" -#include <algorithm> -#include <cassert> #include <cstdint> #include <utility> diff --git a/llvm/include/llvm/IR/Instructions.h b/llvm/include/llvm/IR/Instructions.h index 84ebb461ebef..5929cff3b4fb 100644 --- a/llvm/include/llvm/IR/Instructions.h +++ b/llvm/include/llvm/IR/Instructions.h @@ -27,11 +27,9 @@ #include "llvm/ADT/iterator_range.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/BasicBlock.h" -#include "llvm/IR/CallingConv.h" #include "llvm/IR/CFG.h" #include "llvm/IR/Constant.h" #include "llvm/IR/DerivedTypes.h" -#include "llvm/IR/Function.h" #include "llvm/IR/InstrTypes.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/OperandTraits.h" @@ -52,7 +50,6 @@ namespace llvm { class APInt; class ConstantInt; class DataLayout; -class LLVMContext; //===----------------------------------------------------------------------===// // AllocaInst Class diff --git a/llvm/include/llvm/IR/IntrinsicInst.h b/llvm/include/llvm/IR/IntrinsicInst.h index f4e571e86493..01dada25a285 100644 --- a/llvm/include/llvm/IR/IntrinsicInst.h +++ b/llvm/include/llvm/IR/IntrinsicInst.h @@ -1194,6 +1194,17 @@ public: ConstantInt *getIndex() const; }; +/// This represents the llvm.instrprof.cover intrinsic. +class InstrProfCoverInst : public InstrProfInstBase { +public: + static bool classof(const IntrinsicInst *I) { + return I->getIntrinsicID() == Intrinsic::instrprof_cover; + } + static bool classof(const Value *V) { + return isa<IntrinsicInst>(V) && classof(cast<IntrinsicInst>(V)); + } +}; + /// This represents the llvm.instrprof.increment intrinsic. class InstrProfIncrementInst : public InstrProfInstBase { public: diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td index 3e40bbf39dd4..f5248e82ad21 100644 --- a/llvm/include/llvm/IR/Intrinsics.td +++ b/llvm/include/llvm/IR/Intrinsics.td @@ -582,6 +582,10 @@ def int_experimental_noalias_scope_decl def int_stackprotector : DefaultAttrsIntrinsic<[], [llvm_ptr_ty, llvm_ptrptr_ty], []>; def int_stackguard : DefaultAttrsIntrinsic<[llvm_ptr_ty], [], []>; +// A cover for instrumentation based profiling. +def int_instrprof_cover : Intrinsic<[], [llvm_ptr_ty, llvm_i64_ty, + llvm_i32_ty, llvm_i32_ty]>; + // A counter increment for instrumentation based profiling. def int_instrprof_increment : Intrinsic<[], [llvm_ptr_ty, llvm_i64_ty, diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td index e610c28a5923..a65ddff07a29 100644 --- a/llvm/include/llvm/IR/IntrinsicsAArch64.td +++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td @@ -897,6 +897,14 @@ def int_aarch64_stgp : DefaultAttrsIntrinsic<[], [llvm_ptr_ty, llvm_i64_ty, llv [IntrWriteMem, IntrArgMemOnly, NoCapture<ArgIndex<0>>, WriteOnly<ArgIndex<0>>]>; } +//===----------------------------------------------------------------------===// +// Memory Operations (MOPS) Intrinsics +let TargetPrefix = "aarch64" in { + // Sizes are chosen to correspond to the llvm.memset intrinsic: ptr, i8, i64 + def int_aarch64_mops_memset_tag : DefaultAttrsIntrinsic<[llvm_ptr_ty], [llvm_ptr_ty, llvm_i8_ty, llvm_i64_ty], + [IntrWriteMem, IntrArgMemOnly, NoCapture<ArgIndex<0>>, WriteOnly<ArgIndex<0>>]>; +} + // Transactional Memory Extension (TME) Intrinsics let TargetPrefix = "aarch64" in { def int_aarch64_tstart : GCCBuiltin<"__builtin_arm_tstart">, diff --git a/llvm/include/llvm/IR/LLVMContext.h b/llvm/include/llvm/IR/LLVMContext.h index d165a405ce22..446bcecf1c64 100644 --- a/llvm/include/llvm/IR/LLVMContext.h +++ b/llvm/include/llvm/IR/LLVMContext.h @@ -36,7 +36,6 @@ template <typename T> class StringMapEntry; class StringRef; class Twine; class LLVMRemarkStreamer; -class raw_ostream; namespace remarks { class RemarkStreamer; diff --git a/llvm/include/llvm/IR/LLVMRemarkStreamer.h b/llvm/include/llvm/IR/LLVMRemarkStreamer.h index e7627e993370..094ead273eed 100644 --- a/llvm/include/llvm/IR/LLVMRemarkStreamer.h +++ b/llvm/include/llvm/IR/LLVMRemarkStreamer.h @@ -14,14 +14,20 @@ #ifndef LLVM_IR_LLVMREMARKSTREAMER_H #define LLVM_IR_LLVMREMARKSTREAMER_H -#include "llvm/IR/DiagnosticInfo.h" -#include "llvm/Remarks/RemarkStreamer.h" +#include "llvm/Remarks/Remark.h" #include "llvm/Support/Error.h" -#include "llvm/Support/ToolOutputFile.h" #include <memory> #include <string> namespace llvm { + +class DiagnosticInfoOptimizationBase; +class LLVMContext; +class ToolOutputFile; +namespace remarks { +class RemarkStreamer; +} + /// Streamer for LLVM remarks which has logic for dealing with DiagnosticInfo /// objects. class LLVMRemarkStreamer { diff --git a/llvm/include/llvm/IR/LegacyPassManager.h b/llvm/include/llvm/IR/LegacyPassManager.h index 2459f0a5450a..b3a4820ba0e4 100644 --- a/llvm/include/llvm/IR/LegacyPassManager.h +++ b/llvm/include/llvm/IR/LegacyPassManager.h @@ -16,11 +16,11 @@ #ifndef LLVM_IR_LEGACYPASSMANAGER_H #define LLVM_IR_LEGACYPASSMANAGER_H -#include "llvm/Pass.h" #include "llvm/Support/CBindingWrapping.h" namespace llvm { +class Function; class Pass; class Module; diff --git a/llvm/include/llvm/IR/MDBuilder.h b/llvm/include/llvm/IR/MDBuilder.h index 51be8667f1c1..42829388b79a 100644 --- a/llvm/include/llvm/IR/MDBuilder.h +++ b/llvm/include/llvm/IR/MDBuilder.h @@ -16,7 +16,6 @@ #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/StringRef.h" -#include "llvm/IR/Constants.h" #include "llvm/IR/GlobalValue.h" #include "llvm/Support/DataTypes.h" #include <utility> @@ -28,6 +27,7 @@ template <typename T> class ArrayRef; class LLVMContext; class Constant; class ConstantAsMetadata; +class Function; class MDNode; class MDString; class Metadata; diff --git a/llvm/include/llvm/IR/Metadata.h b/llvm/include/llvm/IR/Metadata.h index 26d70b4db2d5..7965884990e5 100644 --- a/llvm/include/llvm/IR/Metadata.h +++ b/llvm/include/llvm/IR/Metadata.h @@ -20,9 +20,7 @@ #include "llvm/ADT/DenseMapInfo.h" #include "llvm/ADT/None.h" #include "llvm/ADT/PointerUnion.h" -#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/ilist_node.h" #include "llvm/ADT/iterator_range.h" @@ -46,6 +44,8 @@ namespace llvm { class Module; class ModuleSlotTracker; class raw_ostream; +template <typename T> class StringMapEntry; +template <typename ValueTy> class StringMapEntryStorage; class Type; enum LLVMConstants : uint32_t { @@ -682,6 +682,10 @@ struct AAMDNodes { // Shift tbaa.struct Metadata node to start off bytes later static MDNode *shiftTBAAStruct(MDNode *M, size_t off); + // Extend tbaa Metadata node to apply to a series of bytes of length len. + // A size of -1 denotes an unknown size. + static MDNode *extendToTBAA(MDNode *TBAA, ssize_t len); + /// Given two sets of AAMDNodes that apply to the same pointer, /// give the best AAMDNodes that are compatible with both (i.e. a set of /// nodes whose allowable aliasing conclusions are a subset of those @@ -708,6 +712,21 @@ struct AAMDNodes { return Result; } + /// Create a new AAMDNode that describes this AAMDNode after extending it to + /// apply to a series of bytes of length Len. A size of -1 denotes an unknown + /// size. + AAMDNodes extendTo(ssize_t Len) const { + AAMDNodes Result; + Result.TBAA = TBAA ? extendToTBAA(TBAA, Len) : nullptr; + // tbaa.struct contains (offset, size, type) triples. Extending the length + // of the tbaa.struct doesn't require changing this (though more information + // could be provided by adding more triples at subsequent lengths). + Result.TBAAStruct = TBAAStruct; + Result.Scope = Scope; + Result.NoAlias = NoAlias; + return Result; + } + /// Given two sets of AAMDNodes applying to potentially different locations, /// determine the best AAMDNodes that apply to both. AAMDNodes merge(const AAMDNodes &Other) const; diff --git a/llvm/include/llvm/IR/ModuleSummaryIndex.h b/llvm/include/llvm/IR/ModuleSummaryIndex.h index ec1d5ef79eed..b76bc879fb45 100644 --- a/llvm/include/llvm/IR/ModuleSummaryIndex.h +++ b/llvm/include/llvm/IR/ModuleSummaryIndex.h @@ -22,7 +22,6 @@ #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringRef.h" -#include "llvm/ADT/TinyPtrVector.h" #include "llvm/IR/ConstantRange.h" #include "llvm/IR/GlobalValue.h" #include "llvm/IR/Module.h" diff --git a/llvm/include/llvm/IR/PassInstrumentation.h b/llvm/include/llvm/IR/PassInstrumentation.h index 8e81f30b2289..27dd075bbdb2 100644 --- a/llvm/include/llvm/IR/PassInstrumentation.h +++ b/llvm/include/llvm/IR/PassInstrumentation.h @@ -86,7 +86,7 @@ public: using AnalysesClearedFunc = void(StringRef); public: - PassInstrumentationCallbacks() {} + PassInstrumentationCallbacks() = default; /// Copying PassInstrumentationCallbacks is not intended. PassInstrumentationCallbacks(const PassInstrumentationCallbacks &) = delete; diff --git a/llvm/include/llvm/IR/PassManager.h b/llvm/include/llvm/IR/PassManager.h index e88d2233daba..12f9052a9edd 100644 --- a/llvm/include/llvm/IR/PassManager.h +++ b/llvm/include/llvm/IR/PassManager.h @@ -46,11 +46,8 @@ #include "llvm/IR/Module.h" #include "llvm/IR/PassInstrumentation.h" #include "llvm/IR/PassManagerInternal.h" -#include "llvm/Pass.h" -#include "llvm/Support/Debug.h" #include "llvm/Support/TimeProfiler.h" #include "llvm/Support/TypeName.h" -#include <algorithm> #include <cassert> #include <cstring> #include <iterator> @@ -473,7 +470,7 @@ class PassManager : public PassInfoMixin< PassManager<IRUnitT, AnalysisManagerT, ExtraArgTs...>> { public: /// Construct a pass manager. - explicit PassManager() {} + explicit PassManager() = default; // FIXME: These are equivalent to the default move constructor/move // assignment. However, using = default triggers linker errors due to the diff --git a/llvm/include/llvm/IR/PassManagerImpl.h b/llvm/include/llvm/IR/PassManagerImpl.h index bb4fbe98b082..3c94cf2811f6 100644 --- a/llvm/include/llvm/IR/PassManagerImpl.h +++ b/llvm/include/llvm/IR/PassManagerImpl.h @@ -20,7 +20,7 @@ namespace llvm { template <typename IRUnitT, typename... ExtraArgTs> -inline AnalysisManager<IRUnitT, ExtraArgTs...>::AnalysisManager() {} +inline AnalysisManager<IRUnitT, ExtraArgTs...>::AnalysisManager() = default; template <typename IRUnitT, typename... ExtraArgTs> inline AnalysisManager<IRUnitT, ExtraArgTs...>::AnalysisManager( diff --git a/llvm/include/llvm/IR/PassTimingInfo.h b/llvm/include/llvm/IR/PassTimingInfo.h index e44321b4af66..49a83605c47a 100644 --- a/llvm/include/llvm/IR/PassTimingInfo.h +++ b/llvm/include/llvm/IR/PassTimingInfo.h @@ -15,8 +15,6 @@ #ifndef LLVM_IR_PASSTIMINGINFO_H #define LLVM_IR_PASSTIMINGINFO_H -#include "llvm/ADT/Any.h" -#include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringRef.h" diff --git a/llvm/include/llvm/IR/ReplaceConstant.h b/llvm/include/llvm/IR/ReplaceConstant.h index 5ad1d0a6f920..1d6b10d9a78b 100644 --- a/llvm/include/llvm/IR/ReplaceConstant.h +++ b/llvm/include/llvm/IR/ReplaceConstant.h @@ -14,13 +14,16 @@ #ifndef LLVM_IR_REPLACECONSTANT_H #define LLVM_IR_REPLACECONSTANT_H -#include "llvm/IR/Constants.h" -#include "llvm/IR/Instruction.h" #include <map> #include <vector> namespace llvm { +class ConstantExpr; +class Instruction; +class Use; +template <typename PtrType> class SmallPtrSetImpl; + /// The given instruction \p I contains given constant expression \p CE as one /// of its operands, possibly nested within constant expression trees. Convert /// all reachable paths from contant expression operands of \p I to \p CE into diff --git a/llvm/include/llvm/IR/SSAContext.h b/llvm/include/llvm/IR/SSAContext.h index 8879512610c2..8ca23e3ee077 100644 --- a/llvm/include/llvm/IR/SSAContext.h +++ b/llvm/include/llvm/IR/SSAContext.h @@ -15,18 +15,15 @@ #ifndef LLVM_IR_SSACONTEXT_H #define LLVM_IR_SSACONTEXT_H -#include "llvm/ADT/GenericSSAContext.h" -#include "llvm/IR/ModuleSlotTracker.h" #include "llvm/Support/Printable.h" -#include <memory> - namespace llvm { class BasicBlock; class Function; class Instruction; class Value; template <typename, bool> class DominatorTreeBase; +template <typename _FunctionT> class GenericSSAContext; template <> class GenericSSAContext<Function> { Function *F; diff --git a/llvm/include/llvm/IR/SafepointIRVerifier.h b/llvm/include/llvm/IR/SafepointIRVerifier.h index 76b147e690be..246d236adb38 100644 --- a/llvm/include/llvm/IR/SafepointIRVerifier.h +++ b/llvm/include/llvm/IR/SafepointIRVerifier.h @@ -37,7 +37,7 @@ FunctionPass *createSafepointIRVerifierPass(); class SafepointIRVerifierPass : public PassInfoMixin<SafepointIRVerifierPass> { public: - explicit SafepointIRVerifierPass() {} + explicit SafepointIRVerifierPass() = default; PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); }; diff --git a/llvm/include/llvm/IR/Statepoint.h b/llvm/include/llvm/IR/Statepoint.h index a254a67e6b1f..da9c732ad818 100644 --- a/llvm/include/llvm/IR/Statepoint.h +++ b/llvm/include/llvm/IR/Statepoint.h @@ -19,10 +19,9 @@ #include "llvm/ADT/Optional.h" #include "llvm/ADT/iterator_range.h" #include "llvm/IR/Attributes.h" -#include "llvm/IR/BasicBlock.h" #include "llvm/IR/Constants.h" #include "llvm/IR/Function.h" -#include "llvm/IR/Instruction.h" +#include "llvm/IR/InstrTypes.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Intrinsics.h" @@ -204,11 +203,6 @@ public: /// For example this could happen due to relocations on unwinding /// path of invoke. inline std::vector<const GCRelocateInst *> getGCRelocates() const; - - /// Returns pair of boolean flags. The first one is true is there is - /// a gc.result intrinsic in the same block as statepoint. The second flag - /// is true if there is an intrinsic outside of the block with statepoint. - inline std::pair<bool, bool> getGCResultLocality() const; }; std::vector<const GCRelocateInst *> GCStatepointInst::getGCRelocates() const { @@ -236,18 +230,6 @@ std::vector<const GCRelocateInst *> GCStatepointInst::getGCRelocates() const { return Result; } -std::pair<bool, bool> GCStatepointInst::getGCResultLocality() const { - std::pair<bool, bool> Res(false, false); - for (auto *U : users()) - if (auto *GRI = dyn_cast<GCResultInst>(U)) { - if (GRI->getParent() == this->getParent()) - Res.first = true; - else - Res.second = true; - } - return Res; -} - /// Call sites that get wrapped by a gc.statepoint (currently only in /// RewriteStatepointsForGC and potentially in other passes in the future) can /// have attributes that describe properties of gc.statepoint call they will be diff --git a/llvm/include/llvm/IR/Type.h b/llvm/include/llvm/IR/Type.h index 98c97375ad7b..e4e8a5529c87 100644 --- a/llvm/include/llvm/IR/Type.h +++ b/llvm/include/llvm/IR/Type.h @@ -15,7 +15,6 @@ #define LLVM_IR_TYPE_H #include "llvm/ADT/ArrayRef.h" -#include "llvm/ADT/SmallPtrSet.h" #include "llvm/Support/CBindingWrapping.h" #include "llvm/Support/Casting.h" #include "llvm/Support/Compiler.h" @@ -33,6 +32,7 @@ class LLVMContext; class PointerType; class raw_ostream; class StringRef; +template <typename PtrType> class SmallPtrSetImpl; /// The instances of the Type class are immutable: once they are created, /// they are never changed. Also note that only one instance of a particular diff --git a/llvm/include/llvm/IR/Use.h b/llvm/include/llvm/IR/Use.h index 917db2679c55..64b86f3a4396 100644 --- a/llvm/include/llvm/IR/Use.h +++ b/llvm/include/llvm/IR/Use.h @@ -25,7 +25,6 @@ #define LLVM_IR_USE_H #include "llvm-c/Types.h" -#include "llvm/ADT/PointerIntPair.h" #include "llvm/Support/CBindingWrapping.h" #include "llvm/Support/Compiler.h" diff --git a/llvm/include/llvm/InterfaceStub/IFSStub.h b/llvm/include/llvm/InterfaceStub/IFSStub.h index 5b16b8304692..8c3cd171b1a2 100644 --- a/llvm/include/llvm/InterfaceStub/IFSStub.h +++ b/llvm/include/llvm/InterfaceStub/IFSStub.h @@ -95,7 +95,7 @@ struct IFSStub { std::vector<std::string> NeededLibs; std::vector<IFSSymbol> Symbols; - IFSStub() {} + IFSStub() = default; IFSStub(const IFSStub &Stub); IFSStub(IFSStub &&Stub); }; @@ -106,7 +106,7 @@ struct IFSStub { // This class makes it possible to map a second traits so the same data // structure can be used for 2 different yaml schema. struct IFSStubTriple : IFSStub { - IFSStubTriple() {} + IFSStubTriple() = default; IFSStubTriple(const IFSStub &Stub); IFSStubTriple(const IFSStubTriple &Stub); IFSStubTriple(IFSStubTriple &&Stub); diff --git a/llvm/include/llvm/LineEditor/LineEditor.h b/llvm/include/llvm/LineEditor/LineEditor.h index 0beaf1bb23a9..9f4ea5bee139 100644 --- a/llvm/include/llvm/LineEditor/LineEditor.h +++ b/llvm/include/llvm/LineEditor/LineEditor.h @@ -64,7 +64,7 @@ public: /// A possible completion at a given cursor position. struct Completion { - Completion() {} + Completion() = default; Completion(const std::string &TypedText, const std::string &DisplayText) : TypedText(TypedText), DisplayText(DisplayText) {} diff --git a/llvm/include/llvm/MC/MCContext.h b/llvm/include/llvm/MC/MCContext.h index 88d86d5b675a..d2307d692278 100644 --- a/llvm/include/llvm/MC/MCContext.h +++ b/llvm/include/llvm/MC/MCContext.h @@ -80,6 +80,10 @@ namespace llvm { private: Environment Env; + /// The name of the Segment where Swift5 Reflection Section data will be + /// outputted + StringRef Swift5ReflectionSegmentName; + /// The triple for this object. Triple TT; @@ -399,13 +403,17 @@ namespace llvm { const MCRegisterInfo *MRI, const MCSubtargetInfo *MSTI, const SourceMgr *Mgr = nullptr, MCTargetOptions const *TargetOpts = nullptr, - bool DoAutoReset = true); + bool DoAutoReset = true, + StringRef Swift5ReflSegmentName = {}); MCContext(const MCContext &) = delete; MCContext &operator=(const MCContext &) = delete; ~MCContext(); Environment getObjectFileType() const { return Env; } + const StringRef &getSwift5ReflectionSegmentName() const { + return Swift5ReflectionSegmentName; + } const Triple &getTargetTriple() const { return TT; } const SourceMgr *getSourceManager() const { return SrcMgr; } diff --git a/llvm/include/llvm/MC/MCObjectFileInfo.h b/llvm/include/llvm/MC/MCObjectFileInfo.h index 5e0cccaba77f..3c1d10c4e62f 100644 --- a/llvm/include/llvm/MC/MCObjectFileInfo.h +++ b/llvm/include/llvm/MC/MCObjectFileInfo.h @@ -15,6 +15,7 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/Triple.h" +#include "llvm/BinaryFormat/Swift.h" #include "llvm/MC/MCSymbol.h" #include "llvm/Support/CodeGen.h" #include "llvm/Support/VersionTuple.h" @@ -228,6 +229,10 @@ protected: MCSection *ReadOnly8Section = nullptr; MCSection *ReadOnly16Section = nullptr; + // Swift5 Reflection Data Sections + std::array<MCSection *, binaryformat::Swift5ReflectionSectionKind::last> + Swift5ReflectionSections = {}; + public: void initMCObjectFileInfo(MCContext &MCCtx, bool PIC, bool LargeCodeModel = false); @@ -423,6 +428,15 @@ public: bool isPositionIndependent() const { return PositionIndependent; } + // Swift5 Reflection Data Sections + MCSection *getSwift5ReflectionSection( + llvm::binaryformat::Swift5ReflectionSectionKind ReflSectionKind) { + return ReflSectionKind != + llvm::binaryformat::Swift5ReflectionSectionKind::unknown + ? Swift5ReflectionSections[ReflSectionKind] + : nullptr; + } + private: bool PositionIndependent = false; MCContext *Ctx = nullptr; diff --git a/llvm/include/llvm/MC/MCPseudoProbe.h b/llvm/include/llvm/MC/MCPseudoProbe.h index 17b7446baae8..9ff68f4236ca 100644 --- a/llvm/include/llvm/MC/MCPseudoProbe.h +++ b/llvm/include/llvm/MC/MCPseudoProbe.h @@ -268,7 +268,7 @@ public: // Used for decoding uint32_t ChildrenToProcess = 0; - MCDecodedPseudoProbeInlineTree(){}; + MCDecodedPseudoProbeInlineTree() = default; MCDecodedPseudoProbeInlineTree(const InlineSite &Site) : ISite(Site){}; // Return false if it's a dummy inline site diff --git a/llvm/include/llvm/MCA/CustomBehaviour.h b/llvm/include/llvm/MCA/CustomBehaviour.h index 5b993c6a5345..c4be5312ea19 100644 --- a/llvm/include/llvm/MCA/CustomBehaviour.h +++ b/llvm/include/llvm/MCA/CustomBehaviour.h @@ -41,7 +41,7 @@ public: InstrPostProcess(const MCSubtargetInfo &STI, const MCInstrInfo &MCII) : STI(STI), MCII(MCII) {} - virtual ~InstrPostProcess() {} + virtual ~InstrPostProcess() = default; /// This method can be overriden by targets to modify the mca::Instruction /// object after it has been lowered from the MCInst. diff --git a/llvm/include/llvm/MCA/HWEventListener.h b/llvm/include/llvm/MCA/HWEventListener.h index 5b5b83cccd9c..8298e0705d33 100644 --- a/llvm/include/llvm/MCA/HWEventListener.h +++ b/llvm/include/llvm/MCA/HWEventListener.h @@ -176,7 +176,7 @@ public: virtual void onReleasedBuffers(const InstRef &Inst, ArrayRef<unsigned> Buffers) {} - virtual ~HWEventListener() {} + virtual ~HWEventListener() = default; private: virtual void anchor(); diff --git a/llvm/include/llvm/MCA/HardwareUnits/ResourceManager.h b/llvm/include/llvm/MCA/HardwareUnits/ResourceManager.h index 7467fd6754f0..1c909b01a390 100644 --- a/llvm/include/llvm/MCA/HardwareUnits/ResourceManager.h +++ b/llvm/include/llvm/MCA/HardwareUnits/ResourceManager.h @@ -49,7 +49,7 @@ class ResourceStrategy { ResourceStrategy &operator=(const ResourceStrategy &) = delete; public: - ResourceStrategy() {} + ResourceStrategy() = default; virtual ~ResourceStrategy(); /// Selects a processor resource unit from a ReadyMask. diff --git a/llvm/include/llvm/Object/Archive.h b/llvm/include/llvm/Object/Archive.h index 5a5fc90f18bd..b792cbc3d9ac 100644 --- a/llvm/include/llvm/Object/Archive.h +++ b/llvm/include/llvm/Object/Archive.h @@ -45,7 +45,7 @@ protected: public: friend class Archive; virtual std::unique_ptr<AbstractArchiveMemberHeader> clone() const = 0; - virtual ~AbstractArchiveMemberHeader(){}; + virtual ~AbstractArchiveMemberHeader() = default; /// Get the name without looking up long names. virtual Expected<StringRef> getRawName() const = 0; diff --git a/llvm/include/llvm/Object/ELFTypes.h b/llvm/include/llvm/Object/ELFTypes.h index e59a63d93989..c674b80c814d 100644 --- a/llvm/include/llvm/Object/ELFTypes.h +++ b/llvm/include/llvm/Object/ELFTypes.h @@ -699,7 +699,7 @@ private: } } - Elf_Note_Iterator_Impl() {} + Elf_Note_Iterator_Impl() = default; explicit Elf_Note_Iterator_Impl(Error &Err) : Err(&Err) {} Elf_Note_Iterator_Impl(const uint8_t *Start, size_t Size, Error &Err) : RemainingSize(Size), Err(&Err) { diff --git a/llvm/include/llvm/Object/MachO.h b/llvm/include/llvm/Object/MachO.h index ede742c47f97..49a0706b84be 100644 --- a/llvm/include/llvm/Object/MachO.h +++ b/llvm/include/llvm/Object/MachO.h @@ -22,6 +22,7 @@ #include "llvm/ADT/Triple.h" #include "llvm/ADT/iterator_range.h" #include "llvm/BinaryFormat/MachO.h" +#include "llvm/BinaryFormat/Swift.h" #include "llvm/MC/SubtargetFeature.h" #include "llvm/Object/Binary.h" #include "llvm/Object/ObjectFile.h" @@ -583,6 +584,9 @@ public: StringRef mapDebugSectionName(StringRef Name) const override; + llvm::binaryformat::Swift5ReflectionSectionKind + mapReflectionSectionNameToEnumValue(StringRef SectionName) const override; + bool hasPageZeroSegment() const { return HasPageZeroSegment; } static bool classof(const Binary *v) { diff --git a/llvm/include/llvm/Object/ObjectFile.h b/llvm/include/llvm/Object/ObjectFile.h index 12704b1fc88e..950c38a599d5 100644 --- a/llvm/include/llvm/Object/ObjectFile.h +++ b/llvm/include/llvm/Object/ObjectFile.h @@ -18,6 +18,7 @@ #include "llvm/ADT/Triple.h" #include "llvm/ADT/iterator_range.h" #include "llvm/BinaryFormat/Magic.h" +#include "llvm/BinaryFormat/Swift.h" #include "llvm/Object/Binary.h" #include "llvm/Object/Error.h" #include "llvm/Object/SymbolicFile.h" @@ -290,6 +291,11 @@ protected: virtual void getRelocationTypeName(DataRefImpl Rel, SmallVectorImpl<char> &Result) const = 0; + virtual llvm::binaryformat::Swift5ReflectionSectionKind + mapReflectionSectionNameToEnumValue(StringRef SectionName) const { + return llvm::binaryformat::Swift5ReflectionSectionKind::unknown; + }; + Expected<uint64_t> getSymbolValue(DataRefImpl Symb) const; public: diff --git a/llvm/include/llvm/Passes/StandardInstrumentations.h b/llvm/include/llvm/Passes/StandardInstrumentations.h index 9eb754a4d824..561cd54fa998 100644 --- a/llvm/include/llvm/Passes/StandardInstrumentations.h +++ b/llvm/include/llvm/Passes/StandardInstrumentations.h @@ -75,7 +75,7 @@ private: class OptBisectInstrumentation { public: - OptBisectInstrumentation() {} + OptBisectInstrumentation() = default; void registerCallbacks(PassInstrumentationCallbacks &PIC); }; diff --git a/llvm/include/llvm/ProfileData/InstrProf.h b/llvm/include/llvm/ProfileData/InstrProf.h index 4d3bb0e8ff10..a416eb28906e 100644 --- a/llvm/include/llvm/ProfileData/InstrProf.h +++ b/llvm/include/llvm/ProfileData/InstrProf.h @@ -16,6 +16,7 @@ #define LLVM_PROFILEDATA_INSTRPROF_H #include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/BitmaskEnum.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/StringSet.h" @@ -277,6 +278,18 @@ void createPGOFuncNameMetadata(Function &F, StringRef PGOFuncName); /// the duplicated profile variables for Comdat functions. bool needsComdatForCounter(const Function &F, const Module &M); +/// An enum describing the attributes of an instrumented profile. +enum class InstrProfKind { + Unknown = 0x0, + FE = 0x1, // A frontend clang profile, incompatible with other attrs. + IR = 0x2, // An IR-level profile (default when -fprofile-generate is used). + BB = 0x4, // A profile with entry basic block instrumentation. + CS = 0x8, // A context sensitive IR-level profile. + SingleByteCoverage = 0x10, // Use single byte probes for coverage. + FunctionEntryOnly = 0x20, // Only instrument the function entry basic block. + LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/FunctionEntryOnly) +}; + const std::error_category &instrprof_category(); enum class instrprof_error { @@ -1155,12 +1168,6 @@ struct Header { void getMemOPSizeRangeFromOption(StringRef Str, int64_t &RangeStart, int64_t &RangeLast); -// Create a COMDAT variable INSTR_PROF_RAW_VERSION_VAR to make the runtime -// aware this is an ir_level profile so it can set the version flag. -GlobalVariable *createIRLevelProfileFlagVar(Module &M, bool IsCS, - bool InstrEntryBBEnabled, - bool DebugInfoCorrelate); - // Create the variable for the profile file name. void createProfileFileNameVar(Module &M, StringRef InstrProfileOutput); diff --git a/llvm/include/llvm/ProfileData/InstrProfCorrelator.h b/llvm/include/llvm/ProfileData/InstrProfCorrelator.h index 135936b99f24..3d0076fd9035 100644 --- a/llvm/include/llvm/ProfileData/InstrProfCorrelator.h +++ b/llvm/include/llvm/ProfileData/InstrProfCorrelator.h @@ -55,7 +55,7 @@ public: enum InstrProfCorrelatorKind { CK_32Bit, CK_64Bit }; InstrProfCorrelatorKind getKind() const { return Kind; } - virtual ~InstrProfCorrelator() {} + virtual ~InstrProfCorrelator() = default; protected: struct Context { diff --git a/llvm/include/llvm/ProfileData/InstrProfData.inc b/llvm/include/llvm/ProfileData/InstrProfData.inc index 0544b6b2ef71..62054a6a3df5 100644 --- a/llvm/include/llvm/ProfileData/InstrProfData.inc +++ b/llvm/include/llvm/ProfileData/InstrProfData.inc @@ -660,6 +660,8 @@ serializeValueProfDataFrom(ValueProfRecordClosure *Closure, * generated profile, and 0 if this is a Clang FE generated profile. * 1 in bit 57 indicates there are context-sensitive records in the profile. * The 59th bit indicates whether to use debug info to correlate profiles. + * The 60th bit indicates single byte coverage instrumentation. + * The 61st bit indicates function entry instrumentation only. */ #define VARIANT_MASKS_ALL 0xff00000000000000ULL #define GET_VERSION(V) ((V) & ~VARIANT_MASKS_ALL) @@ -667,6 +669,8 @@ serializeValueProfDataFrom(ValueProfRecordClosure *Closure, #define VARIANT_MASK_CSIR_PROF (0x1ULL << 57) #define VARIANT_MASK_INSTR_ENTRY (0x1ULL << 58) #define VARIANT_MASK_DBG_CORRELATE (0x1ULL << 59) +#define VARIANT_MASK_BYTE_COVERAGE (0x1ULL << 60) +#define VARIANT_MASK_FUNCTION_ENTRY_ONLY (0x1ULL << 61) #define INSTR_PROF_RAW_VERSION_VAR __llvm_profile_raw_version #define INSTR_PROF_PROFILE_RUNTIME_VAR __llvm_profile_runtime #define INSTR_PROF_PROFILE_COUNTER_BIAS_VAR __llvm_profile_counter_bias diff --git a/llvm/include/llvm/ProfileData/InstrProfReader.h b/llvm/include/llvm/ProfileData/InstrProfReader.h index 1326cbf0e1ce..e9dd19a69792 100644 --- a/llvm/include/llvm/ProfileData/InstrProfReader.h +++ b/llvm/include/llvm/ProfileData/InstrProfReader.h @@ -100,6 +100,16 @@ public: /// Return true if we must provide debug info to create PGO profiles. virtual bool useDebugInfoCorrelate() const { return false; } + /// Return true if the profile has single byte counters representing coverage. + virtual bool hasSingleByteCoverage() const = 0; + + /// Return true if the profile only instruments function entries. + virtual bool functionEntryOnly() const = 0; + + /// Returns a BitsetEnum describing the attributes of the profile. To check + /// individual attributes prefer using the helpers above. + virtual InstrProfKind getProfileKind() const = 0; + /// Return the PGO symtab. There are three different readers: /// Raw, Text, and Indexed profile readers. The first two types /// of readers are used only by llvm-profdata tool, while the indexed @@ -176,9 +186,8 @@ private: std::unique_ptr<MemoryBuffer> DataBuffer; /// Iterator over the profile data. line_iterator Line; - bool IsIRLevelProfile = false; - bool HasCSIRLevelProfile = false; - bool InstrEntryBBEnabled = false; + /// The attributes of the current profile. + InstrProfKind ProfileKind = InstrProfKind::Unknown; Error readValueProfileData(InstrProfRecord &Record); @@ -191,11 +200,27 @@ public: /// Return true if the given buffer is in text instrprof format. static bool hasFormat(const MemoryBuffer &Buffer); - bool isIRLevelProfile() const override { return IsIRLevelProfile; } + bool isIRLevelProfile() const override { + return static_cast<bool>(ProfileKind & InstrProfKind::IR); + } - bool hasCSIRLevelProfile() const override { return HasCSIRLevelProfile; } + bool hasCSIRLevelProfile() const override { + return static_cast<bool>(ProfileKind & InstrProfKind::CS); + } - bool instrEntryBBEnabled() const override { return InstrEntryBBEnabled; } + bool instrEntryBBEnabled() const override { + return static_cast<bool>(ProfileKind & InstrProfKind::BB); + } + + bool hasSingleByteCoverage() const override { + return static_cast<bool>(ProfileKind & InstrProfKind::SingleByteCoverage); + } + + bool functionEntryOnly() const override { + return static_cast<bool>(ProfileKind & InstrProfKind::FunctionEntryOnly); + } + + InstrProfKind getProfileKind() const override { return ProfileKind; } /// Read the header. Error readHeader() override; @@ -276,6 +301,17 @@ public: return (Version & VARIANT_MASK_DBG_CORRELATE) != 0; } + bool hasSingleByteCoverage() const override { + return (Version & VARIANT_MASK_BYTE_COVERAGE) != 0; + } + + bool functionEntryOnly() const override { + return (Version & VARIANT_MASK_FUNCTION_ENTRY_ONLY) != 0; + } + + /// Returns a BitsetEnum describing the attributes of the raw instr profile. + InstrProfKind getProfileKind() const override; + InstrProfSymtab &getSymtab() override { assert(Symtab.get()); return *Symtab.get(); @@ -333,7 +369,9 @@ private: return Symtab->getFuncName(swap(NameRef)); } - int getCounterTypeSize() const { return sizeof(uint64_t); } + int getCounterTypeSize() const { + return hasSingleByteCoverage() ? sizeof(uint8_t) : sizeof(uint64_t); + } }; using RawInstrProfReader32 = RawInstrProfReader<uint32_t>; @@ -413,6 +451,9 @@ struct InstrProfReaderIndexBase { virtual bool isIRLevelProfile() const = 0; virtual bool hasCSIRLevelProfile() const = 0; virtual bool instrEntryBBEnabled() const = 0; + virtual bool hasSingleByteCoverage() const = 0; + virtual bool functionEntryOnly() const = 0; + virtual InstrProfKind getProfileKind() const = 0; virtual Error populateSymtab(InstrProfSymtab &) = 0; }; @@ -465,6 +506,16 @@ public: return (FormatVersion & VARIANT_MASK_INSTR_ENTRY) != 0; } + bool hasSingleByteCoverage() const override { + return (FormatVersion & VARIANT_MASK_BYTE_COVERAGE) != 0; + } + + bool functionEntryOnly() const override { + return (FormatVersion & VARIANT_MASK_FUNCTION_ENTRY_ONLY) != 0; + } + + InstrProfKind getProfileKind() const override; + Error populateSymtab(InstrProfSymtab &Symtab) override { return Symtab.create(HashTable->keys()); } @@ -473,7 +524,7 @@ public: /// Name matcher supporting fuzzy matching of symbol names to names in profiles. class InstrProfReaderRemapper { public: - virtual ~InstrProfReaderRemapper() {} + virtual ~InstrProfReaderRemapper() = default; virtual Error populateRemappings() { return Error::success(); } virtual Error getRecords(StringRef FuncName, ArrayRef<NamedInstrProfRecord> &Data) = 0; @@ -523,6 +574,18 @@ public: return Index->instrEntryBBEnabled(); } + bool hasSingleByteCoverage() const override { + return Index->hasSingleByteCoverage(); + } + + bool functionEntryOnly() const override { return Index->functionEntryOnly(); } + + /// Returns a BitsetEnum describing the attributes of the indexed instr + /// profile. + InstrProfKind getProfileKind() const override { + return Index->getProfileKind(); + } + /// Return true if the given buffer is in an indexed instrprof format. static bool hasFormat(const MemoryBuffer &DataBuffer); diff --git a/llvm/include/llvm/ProfileData/InstrProfWriter.h b/llvm/include/llvm/ProfileData/InstrProfWriter.h index 97c80de6aa23..af1e46cf4fc2 100644 --- a/llvm/include/llvm/ProfileData/InstrProfWriter.h +++ b/llvm/include/llvm/ProfileData/InstrProfWriter.h @@ -33,19 +33,17 @@ class raw_fd_ostream; class InstrProfWriter { public: using ProfilingData = SmallDenseMap<uint64_t, InstrProfRecord>; - // PF_IRLevelWithCS is the profile from context sensitive IR instrumentation. - enum ProfKind { PF_Unknown = 0, PF_FE, PF_IRLevel, PF_IRLevelWithCS }; private: bool Sparse; StringMap<ProfilingData> FunctionData; - ProfKind ProfileKind = PF_Unknown; - bool InstrEntryBBEnabled; + // An enum describing the attributes of the profile. + InstrProfKind ProfileKind = InstrProfKind::Unknown; // Use raw pointer here for the incomplete type object. InstrProfRecordWriterTrait *InfoObj; public: - InstrProfWriter(bool Sparse = false, bool InstrEntryBBEnabled = false); + InstrProfWriter(bool Sparse = false); ~InstrProfWriter(); StringMap<ProfilingData> &getProfileData() { return FunctionData; } @@ -79,30 +77,41 @@ public: /// Write the profile, returning the raw data. For testing. std::unique_ptr<MemoryBuffer> writeBuffer(); - /// Set the ProfileKind. Report error if mixing FE and IR level profiles. - /// \c WithCS indicates if this is for contenxt sensitive instrumentation. - Error setIsIRLevelProfile(bool IsIRLevel, bool WithCS) { - if (ProfileKind == PF_Unknown) { - if (IsIRLevel) - ProfileKind = WithCS ? PF_IRLevelWithCS : PF_IRLevel; - else - ProfileKind = PF_FE; + /// Update the attributes of the current profile from the attributes + /// specified. An error is returned if IR and FE profiles are mixed. + Error mergeProfileKind(const InstrProfKind Other) { + // If the kind is unset, this is the first profile we are merging so just + // set it to the given type. + if (ProfileKind == InstrProfKind::Unknown) { + ProfileKind = Other; return Error::success(); } - if (((ProfileKind != PF_FE) && !IsIRLevel) || - ((ProfileKind == PF_FE) && IsIRLevel)) + // Returns true if merging is should fail assuming A and B are incompatible. + auto testIncompatible = [&](InstrProfKind A, InstrProfKind B) { + return (static_cast<bool>(ProfileKind & A) && + static_cast<bool>(Other & B)) || + (static_cast<bool>(ProfileKind & B) && + static_cast<bool>(Other & A)); + }; + + // Check if the profiles are in-compatible. Clang frontend profiles can't be + // merged with other profile types. + if (static_cast<bool>((ProfileKind & InstrProfKind::FE) ^ + (Other & InstrProfKind::FE))) { return make_error<InstrProfError>(instrprof_error::unsupported_version); + } + if (testIncompatible(InstrProfKind::FunctionEntryOnly, InstrProfKind::BB)) { + return make_error<InstrProfError>( + instrprof_error::unsupported_version, + "cannot merge FunctionEntryOnly profiles and BB profiles together"); + } - // When merging a context-sensitive profile (WithCS == true) with an IRLevel - // profile, set the kind to PF_IRLevelWithCS. - if (ProfileKind == PF_IRLevel && WithCS) - ProfileKind = PF_IRLevelWithCS; - + // Now we update the profile type with the bits that are set. + ProfileKind |= Other; return Error::success(); } - void setInstrEntryBBEnabled(bool Enabled) { InstrEntryBBEnabled = Enabled; } // Internal interface for testing purpose only. void setValueProfDataEndianness(support::endianness Endianness); void setOutputSparse(bool Sparse); diff --git a/llvm/include/llvm/ProfileData/MemProfData.inc b/llvm/include/llvm/ProfileData/MemProfData.inc index f2cb3738f053..ff22a697965c 100644 --- a/llvm/include/llvm/ProfileData/MemProfData.inc +++ b/llvm/include/llvm/ProfileData/MemProfData.inc @@ -20,11 +20,10 @@ * \*===----------------------------------------------------------------------===*/ - #ifdef _MSC_VER -#define PACKED(__decl__) __pragma(pack(push,1)) __decl__ __pragma(pack(pop)) +#define PACKED(...) __pragma(pack(push,1)) __VA_ARGS__ __pragma(pack(pop)) #else -#define PACKED(__decl__) __decl__ __attribute__((__packed__)) +#define PACKED(...) __VA_ARGS__ __attribute__((__packed__)) #endif // A 64-bit magic number to uniquely identify the raw binary memprof profile file. @@ -47,14 +46,106 @@ PACKED(struct Header { uint64_t StackOffset; }); + // A struct describing the information necessary to describe a /proc/maps // segment entry for a particular binary/library identified by its build id. PACKED(struct SegmentEntry { uint64_t Start; uint64_t End; uint64_t Offset; - uint8_t BuildId[32]; + // This field is unused until sanitizer procmaps support for build ids for + // Linux-Elf is implemented. + uint8_t BuildId[32] = {0}; + + SegmentEntry(uint64_t S, uint64_t E, uint64_t O) : + Start(S), End(E), Offset(O) {} + + SegmentEntry(const SegmentEntry& S) { + Start = S.Start; + End = S.End; + Offset = S.Offset; + } + + SegmentEntry& operator=(const SegmentEntry& S) { + Start = S.Start; + End = S.End; + Offset = S.Offset; + return *this; + } + + bool operator==(const SegmentEntry& S) const { + return Start == S.Start && + End == S.End && + Offset == S.Offset; + } }); + +// A struct representing the heap allocation characteristics of a particular +// runtime context. This struct is shared between the compiler-rt runtime and +// the raw profile reader. The indexed format uses a separate, self-describing +// backwards compatible format. +PACKED(struct MemInfoBlock { + uint32_t alloc_count; + uint64_t total_access_count, min_access_count, max_access_count; + uint64_t total_size; + uint32_t min_size, max_size; + uint32_t alloc_timestamp, dealloc_timestamp; + uint64_t total_lifetime; + uint32_t min_lifetime, max_lifetime; + uint32_t alloc_cpu_id, dealloc_cpu_id; + uint32_t num_migrated_cpu; + + // Only compared to prior deallocated object currently. + uint32_t num_lifetime_overlaps; + uint32_t num_same_alloc_cpu; + uint32_t num_same_dealloc_cpu; + + uint64_t data_type_id; // TODO: hash of type name + + MemInfoBlock() : alloc_count(0) {} + + MemInfoBlock(uint32_t size, uint64_t access_count, uint32_t alloc_timestamp, + uint32_t dealloc_timestamp, uint32_t alloc_cpu, uint32_t dealloc_cpu) + : alloc_count(1), total_access_count(access_count), + min_access_count(access_count), max_access_count(access_count), + total_size(size), min_size(size), max_size(size), + alloc_timestamp(alloc_timestamp), dealloc_timestamp(dealloc_timestamp), + total_lifetime(dealloc_timestamp - alloc_timestamp), + min_lifetime(total_lifetime), max_lifetime(total_lifetime), + alloc_cpu_id(alloc_cpu), dealloc_cpu_id(dealloc_cpu), + num_lifetime_overlaps(0), num_same_alloc_cpu(0), + num_same_dealloc_cpu(0) { + num_migrated_cpu = alloc_cpu_id != dealloc_cpu_id; + } + + void Merge(const MemInfoBlock &newMIB) { + alloc_count += newMIB.alloc_count; + + total_access_count += newMIB.total_access_count; + min_access_count = newMIB.min_access_count < min_access_count ? newMIB.min_access_count : min_access_count; + max_access_count = newMIB.max_access_count < max_access_count ? newMIB.max_access_count : max_access_count; + + total_size += newMIB.total_size; + min_size = newMIB.min_size < min_size ? newMIB.min_size : min_size; + max_size = newMIB.max_size < max_size ? newMIB.max_size : max_size; + + total_lifetime += newMIB.total_lifetime; + min_lifetime = newMIB.min_lifetime < min_lifetime ? newMIB.min_lifetime : min_lifetime; + max_lifetime = newMIB.max_lifetime > max_lifetime ? newMIB.max_lifetime : max_lifetime; + + // We know newMIB was deallocated later, so just need to check if it was + // allocated before last one deallocated. + num_lifetime_overlaps += newMIB.alloc_timestamp < dealloc_timestamp; + alloc_timestamp = newMIB.alloc_timestamp; + dealloc_timestamp = newMIB.dealloc_timestamp; + + num_same_alloc_cpu += alloc_cpu_id == newMIB.alloc_cpu_id; + num_same_dealloc_cpu += dealloc_cpu_id == newMIB.dealloc_cpu_id; + alloc_cpu_id = newMIB.alloc_cpu_id; + dealloc_cpu_id = newMIB.dealloc_cpu_id; + } +}); + } // namespace memprof } // namespace llvm diff --git a/llvm/include/llvm/Remarks/BitstreamRemarkSerializer.h b/llvm/include/llvm/Remarks/BitstreamRemarkSerializer.h index 645a8b3c0b17..19080c0132e3 100644 --- a/llvm/include/llvm/Remarks/BitstreamRemarkSerializer.h +++ b/llvm/include/llvm/Remarks/BitstreamRemarkSerializer.h @@ -17,11 +17,12 @@ #include "llvm/Bitstream/BitstreamWriter.h" #include "llvm/Remarks/BitstreamRemarkContainer.h" #include "llvm/Remarks/RemarkSerializer.h" -#include "llvm/Support/raw_ostream.h" namespace llvm { namespace remarks { +struct Remarks; + /// Serialize the remarks to LLVM bitstream. /// This class provides ways to emit remarks in the LLVM bitstream format and /// its associated metadata. diff --git a/llvm/include/llvm/Remarks/RemarkLinker.h b/llvm/include/llvm/Remarks/RemarkLinker.h index 49fd880be8ba..79d74e39deee 100644 --- a/llvm/include/llvm/Remarks/RemarkLinker.h +++ b/llvm/include/llvm/Remarks/RemarkLinker.h @@ -13,7 +13,6 @@ #ifndef LLVM_REMARKS_REMARKLINKER_H #define LLVM_REMARKS_REMARKLINKER_H -#include "llvm/Object/ObjectFile.h" #include "llvm/Remarks/Remark.h" #include "llvm/Remarks/RemarkFormat.h" #include "llvm/Remarks/RemarkStringTable.h" @@ -22,6 +21,11 @@ #include <set> namespace llvm { + +namespace object { +class ObjectFile; +} + namespace remarks { struct RemarkLinker { diff --git a/llvm/include/llvm/Remarks/RemarkParser.h b/llvm/include/llvm/Remarks/RemarkParser.h index b838f75e530f..61dfdbf3c17c 100644 --- a/llvm/include/llvm/Remarks/RemarkParser.h +++ b/llvm/include/llvm/Remarks/RemarkParser.h @@ -13,9 +13,7 @@ #ifndef LLVM_REMARKS_REMARKPARSER_H #define LLVM_REMARKS_REMARKPARSER_H -#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" -#include "llvm/Remarks/Remark.h" #include "llvm/Remarks/RemarkFormat.h" #include "llvm/Support/Error.h" #include <memory> @@ -23,11 +21,13 @@ namespace llvm { namespace remarks { +struct Remark; + class EndOfFileError : public ErrorInfo<EndOfFileError> { public: static char ID; - EndOfFileError() {} + EndOfFileError() = default; void log(raw_ostream &OS) const override { OS << "End of file reached."; } std::error_code convertToErrorCode() const override { diff --git a/llvm/include/llvm/Remarks/RemarkSerializer.h b/llvm/include/llvm/Remarks/RemarkSerializer.h index 90e556df87e7..6217bd98d1a5 100644 --- a/llvm/include/llvm/Remarks/RemarkSerializer.h +++ b/llvm/include/llvm/Remarks/RemarkSerializer.h @@ -16,11 +16,15 @@ #include "llvm/Remarks/Remark.h" #include "llvm/Remarks/RemarkFormat.h" #include "llvm/Remarks/RemarkStringTable.h" -#include "llvm/Support/raw_ostream.h" namespace llvm { + +class raw_ostream; + namespace remarks { +struct Remark; + enum class SerializerMode { Separate, // A mode where the metadata is serialized separately from the // remarks. Typically, this is used when the remarks need to be diff --git a/llvm/include/llvm/Remarks/RemarkStreamer.h b/llvm/include/llvm/Remarks/RemarkStreamer.h index 7741cb45b72c..b25cb0c331a4 100644 --- a/llvm/include/llvm/Remarks/RemarkStreamer.h +++ b/llvm/include/llvm/Remarks/RemarkStreamer.h @@ -34,10 +34,12 @@ #include "llvm/Remarks/RemarkSerializer.h" #include "llvm/Support/Error.h" #include "llvm/Support/Regex.h" -#include "llvm/Support/raw_ostream.h" #include <memory> namespace llvm { + +class raw_ostream; + namespace remarks { class RemarkStreamer final { /// The regex used to filter remarks based on the passes that emit them. diff --git a/llvm/include/llvm/Support/AArch64TargetParser.def b/llvm/include/llvm/Support/AArch64TargetParser.def index 26f4bae53119..a953e9439db4 100644 --- a/llvm/include/llvm/Support/AArch64TargetParser.def +++ b/llvm/include/llvm/Support/AArch64TargetParser.def @@ -204,6 +204,9 @@ AARCH64_CPU_NAME("cortex-r82", ARMV8R, FK_CRYPTO_NEON_FP_ARMV8, false, AARCH64_CPU_NAME("cortex-x1", ARMV8_2A, FK_CRYPTO_NEON_FP_ARMV8, false, (AArch64::AEK_FP16 | AArch64::AEK_DOTPROD | AArch64::AEK_RCPC | AArch64::AEK_SSBS)) +AARCH64_CPU_NAME("cortex-x1c", ARMV8_2A, FK_CRYPTO_NEON_FP_ARMV8, false, + (AArch64::AEK_FP16 | AArch64::AEK_DOTPROD | AArch64::AEK_RCPC | + AArch64::AEK_SSBS | AArch64::AEK_PAUTH)) AARCH64_CPU_NAME("cortex-x2", ARMV9A, FK_NEON_FP_ARMV8, false, (AArch64::AEK_MTE | AArch64::AEK_BF16 | AArch64::AEK_I8MM | AArch64::AEK_PAUTH | AArch64::AEK_SSBS | AArch64::AEK_SB | diff --git a/llvm/include/llvm/Support/AMDGPUMetadata.h b/llvm/include/llvm/Support/AMDGPUMetadata.h index 784a980fee24..e0838a1f425e 100644 --- a/llvm/include/llvm/Support/AMDGPUMetadata.h +++ b/llvm/include/llvm/Support/AMDGPUMetadata.h @@ -44,6 +44,11 @@ constexpr uint32_t VersionMajorV4 = 1; /// HSA metadata minor version for code object V4. constexpr uint32_t VersionMinorV4 = 1; +/// HSA metadata major version for code object V5. +constexpr uint32_t VersionMajorV5 = 1; +/// HSA metadata minor version for code object V5. +constexpr uint32_t VersionMinorV5 = 2; + /// HSA metadata beginning assembler directive. constexpr char AssemblerDirectiveBegin[] = ".amd_amdgpu_hsa_metadata"; /// HSA metadata ending assembler directive. diff --git a/llvm/include/llvm/Support/ARMTargetParser.def b/llvm/include/llvm/Support/ARMTargetParser.def index 433d7fdc2c3b..80deeb2a6e9d 100644 --- a/llvm/include/llvm/Support/ARMTargetParser.def +++ b/llvm/include/llvm/Support/ARMTargetParser.def @@ -328,6 +328,8 @@ ARM_CPU_NAME("cortex-a710", ARMV9A, FK_NEON_FP_ARMV8, false, ARM::AEK_I8MM)) ARM_CPU_NAME("cortex-x1", ARMV8_2A, FK_CRYPTO_NEON_FP_ARMV8, false, (ARM::AEK_FP16 | ARM::AEK_DOTPROD)) +ARM_CPU_NAME("cortex-x1c", ARMV8_2A, FK_CRYPTO_NEON_FP_ARMV8, false, + (ARM::AEK_FP16 | ARM::AEK_DOTPROD)) ARM_CPU_NAME("neoverse-n1", ARMV8_2A, FK_CRYPTO_NEON_FP_ARMV8, false, (ARM::AEK_FP16 | ARM::AEK_DOTPROD)) ARM_CPU_NAME("neoverse-n2", ARMV8_5A, FK_CRYPTO_NEON_FP_ARMV8, false, diff --git a/llvm/include/llvm/Support/BinaryStreamReader.h b/llvm/include/llvm/Support/BinaryStreamReader.h index c664ac48daad..6853df3ccab1 100644 --- a/llvm/include/llvm/Support/BinaryStreamReader.h +++ b/llvm/include/llvm/Support/BinaryStreamReader.h @@ -35,16 +35,11 @@ public: llvm::support::endianness Endian); explicit BinaryStreamReader(StringRef Data, llvm::support::endianness Endian); - BinaryStreamReader(const BinaryStreamReader &Other) - : Stream(Other.Stream), Offset(Other.Offset) {} + BinaryStreamReader(const BinaryStreamReader &Other) = default; - BinaryStreamReader &operator=(const BinaryStreamReader &Other) { - Stream = Other.Stream; - Offset = Other.Offset; - return *this; - } + BinaryStreamReader &operator=(const BinaryStreamReader &Other) = default; - virtual ~BinaryStreamReader() {} + virtual ~BinaryStreamReader() = default; /// Read as much as possible from the underlying string at the current offset /// without invoking a copy, and set \p Buffer to the resulting data slice. diff --git a/llvm/include/llvm/Support/BinaryStreamWriter.h b/llvm/include/llvm/Support/BinaryStreamWriter.h index c05b0420aaa3..ce7af3650f52 100644 --- a/llvm/include/llvm/Support/BinaryStreamWriter.h +++ b/llvm/include/llvm/Support/BinaryStreamWriter.h @@ -35,16 +35,11 @@ public: explicit BinaryStreamWriter(MutableArrayRef<uint8_t> Data, llvm::support::endianness Endian); - BinaryStreamWriter(const BinaryStreamWriter &Other) - : Stream(Other.Stream), Offset(Other.Offset) {} + BinaryStreamWriter(const BinaryStreamWriter &Other) = default; - BinaryStreamWriter &operator=(const BinaryStreamWriter &Other) { - Stream = Other.Stream; - Offset = Other.Offset; - return *this; - } + BinaryStreamWriter &operator=(const BinaryStreamWriter &Other) = default; - virtual ~BinaryStreamWriter() {} + virtual ~BinaryStreamWriter() = default; /// Write the bytes specified in \p Buffer to the underlying stream. /// On success, updates the offset so that subsequent writes will occur diff --git a/llvm/include/llvm/Support/CommandLine.h b/llvm/include/llvm/Support/CommandLine.h index 120ab1840915..c8e29ac42559 100644 --- a/llvm/include/llvm/Support/CommandLine.h +++ b/llvm/include/llvm/Support/CommandLine.h @@ -877,7 +877,7 @@ class basic_parser_impl { // non-template implementation of basic_parser<t> public: basic_parser_impl(Option &) {} - virtual ~basic_parser_impl() {} + virtual ~basic_parser_impl() = default; enum ValueExpected getValueExpectedFlagDefault() const { return ValueRequired; diff --git a/llvm/include/llvm/Support/Compiler.h b/llvm/include/llvm/Support/Compiler.h index f4c277fae7cc..f3317049524f 100644 --- a/llvm/include/llvm/Support/Compiler.h +++ b/llvm/include/llvm/Support/Compiler.h @@ -77,12 +77,21 @@ /// * 1916: VS2017, version 15.9 /// * 1920: VS2019, version 16.0 /// * 1921: VS2019, version 16.1 +/// * 1922: VS2019, version 16.2 +/// * 1923: VS2019, version 16.3 +/// * 1924: VS2019, version 16.4 +/// * 1925: VS2019, version 16.5 +/// * 1926: VS2019, version 16.6 +/// * 1927: VS2019, version 16.7 +/// * 1928: VS2019, version 16.8 + 16.9 +/// * 1929: VS2019, version 16.10 + 16.11 +/// * 1930: VS2022, version 17.0 #ifdef _MSC_VER #define LLVM_MSC_PREREQ(version) (_MSC_VER >= (version)) -// We require at least MSVC 2017. -#if !LLVM_MSC_PREREQ(1910) -#error LLVM requires at least MSVC 2017. +// We require at least VS 2019. +#if !LLVM_MSC_PREREQ(1920) +#error LLVM requires at least VS 2019. #endif #else @@ -94,12 +103,8 @@ /// Sadly, this is separate from just rvalue reference support because GCC /// and MSVC implemented this later than everything else. This appears to be /// corrected in MSVC 2019 but not MSVC 2017. -#if __has_feature(cxx_rvalue_references) || defined(__GNUC__) || \ - LLVM_MSC_PREREQ(1920) +/// FIXME: Remove LLVM_HAS_RVALUE_REFERENCE_THIS macro #define LLVM_HAS_RVALUE_REFERENCE_THIS 1 -#else -#define LLVM_HAS_RVALUE_REFERENCE_THIS 0 -#endif /// Expands to '&' if ref-qualifiers for *this are supported. /// diff --git a/llvm/include/llvm/Support/FileOutputBuffer.h b/llvm/include/llvm/Support/FileOutputBuffer.h index 17b44380e9cd..d4b73522115d 100644 --- a/llvm/include/llvm/Support/FileOutputBuffer.h +++ b/llvm/include/llvm/Support/FileOutputBuffer.h @@ -70,7 +70,7 @@ public: /// If this object was previously committed, the destructor just deletes /// this object. If this object was not committed, the destructor /// deallocates the buffer and the target file is never written. - virtual ~FileOutputBuffer() {} + virtual ~FileOutputBuffer() = default; /// This removes the temporary file (unless it already was committed) /// but keeps the memory mapping alive. diff --git a/llvm/include/llvm/Support/FormatVariadicDetails.h b/llvm/include/llvm/Support/FormatVariadicDetails.h index 2cafc120c1d7..2204cff13a64 100644 --- a/llvm/include/llvm/Support/FormatVariadicDetails.h +++ b/llvm/include/llvm/Support/FormatVariadicDetails.h @@ -24,7 +24,7 @@ class format_adapter { virtual void anchor(); protected: - virtual ~format_adapter() {} + virtual ~format_adapter() = default; public: virtual void format(raw_ostream &S, StringRef Options) = 0; diff --git a/llvm/include/llvm/Support/GenericDomTree.h b/llvm/include/llvm/Support/GenericDomTree.h index f39400c26eab..d7c64bf62c7a 100644 --- a/llvm/include/llvm/Support/GenericDomTree.h +++ b/llvm/include/llvm/Support/GenericDomTree.h @@ -260,7 +260,7 @@ protected: friend struct DomTreeBuilder::SemiNCAInfo<DominatorTreeBase>; public: - DominatorTreeBase() {} + DominatorTreeBase() = default; DominatorTreeBase(DominatorTreeBase &&Arg) : Roots(std::move(Arg.Roots)), diff --git a/llvm/include/llvm/Support/GenericIteratedDominanceFrontier.h b/llvm/include/llvm/Support/GenericIteratedDominanceFrontier.h index 3bafeb48f64a..96105d6b4684 100644 --- a/llvm/include/llvm/Support/GenericIteratedDominanceFrontier.h +++ b/llvm/include/llvm/Support/GenericIteratedDominanceFrontier.h @@ -37,7 +37,7 @@ namespace IDFCalculatorDetail { /// May be specialized if, for example, one wouldn't like to return nullpointer /// successors. template <class NodeTy, bool IsPostDom> struct ChildrenGetterTy { - using NodeRef = typename GraphTraits<NodeTy>::NodeRef; + using NodeRef = typename GraphTraits<NodeTy *>::NodeRef; using ChildrenTy = SmallVector<NodeRef, 8>; ChildrenTy get(const NodeRef &N); diff --git a/llvm/include/llvm/Support/KnownBits.h b/llvm/include/llvm/Support/KnownBits.h index 5ef0ba31f785..96b7753e9b20 100644 --- a/llvm/include/llvm/Support/KnownBits.h +++ b/llvm/include/llvm/Support/KnownBits.h @@ -31,7 +31,7 @@ private: public: // Default construct Zero and One. - KnownBits() {} + KnownBits() = default; /// Create a known bits object of BitWidth bits initialized to unknown. KnownBits(unsigned BitWidth) : Zero(BitWidth, 0), One(BitWidth, 0) {} diff --git a/llvm/include/llvm/Support/RISCVISAInfo.h b/llvm/include/llvm/Support/RISCVISAInfo.h index b450c1df3558..7fa0e6ee3acf 100644 --- a/llvm/include/llvm/Support/RISCVISAInfo.h +++ b/llvm/include/llvm/Support/RISCVISAInfo.h @@ -92,6 +92,9 @@ private: void updateFLen(); void updateMinVLen(); void updateMaxELen(); + + static llvm::Expected<std::unique_ptr<RISCVISAInfo>> + postProcessAndChecking(std::unique_ptr<RISCVISAInfo> &&ISAInfo); }; } // namespace llvm diff --git a/llvm/include/llvm/Support/ScopedPrinter.h b/llvm/include/llvm/Support/ScopedPrinter.h index 9bde4f455a2d..6b5daf710c9f 100644 --- a/llvm/include/llvm/Support/ScopedPrinter.h +++ b/llvm/include/llvm/Support/ScopedPrinter.h @@ -115,7 +115,7 @@ public: return SP->getKind() == ScopedPrinterKind::Base; } - virtual ~ScopedPrinter() {} + virtual ~ScopedPrinter() = default; void flush() { OS.flush(); } @@ -792,13 +792,13 @@ private: struct DelimitedScope { DelimitedScope(ScopedPrinter &W) : W(&W) {} DelimitedScope() : W(nullptr) {} - virtual ~DelimitedScope(){}; + virtual ~DelimitedScope() = default; virtual void setPrinter(ScopedPrinter &W) = 0; ScopedPrinter *W; }; struct DictScope : DelimitedScope { - explicit DictScope() {} + explicit DictScope() = default; explicit DictScope(ScopedPrinter &W) : DelimitedScope(W) { W.objectBegin(); } DictScope(ScopedPrinter &W, StringRef N) : DelimitedScope(W) { @@ -817,7 +817,7 @@ struct DictScope : DelimitedScope { }; struct ListScope : DelimitedScope { - explicit ListScope() {} + explicit ListScope() = default; explicit ListScope(ScopedPrinter &W) : DelimitedScope(W) { W.arrayBegin(); } ListScope(ScopedPrinter &W, StringRef N) : DelimitedScope(W) { diff --git a/llvm/include/llvm/Support/SuffixTree.h b/llvm/include/llvm/Support/SuffixTree.h index 352fba511937..162a1de72f1a 100644 --- a/llvm/include/llvm/Support/SuffixTree.h +++ b/llvm/include/llvm/Support/SuffixTree.h @@ -109,7 +109,7 @@ struct SuffixTreeNode { SuffixTreeNode(unsigned StartIdx, unsigned *EndIdx, SuffixTreeNode *Link) : StartIdx(StartIdx), EndIdx(EndIdx), Link(Link) {} - SuffixTreeNode() {} + SuffixTreeNode() = default; }; /// A data structure for fast substring queries. diff --git a/llvm/include/llvm/Support/Timer.h b/llvm/include/llvm/Support/Timer.h index eb49e805b40d..742d20ce51dd 100644 --- a/llvm/include/llvm/Support/Timer.h +++ b/llvm/include/llvm/Support/Timer.h @@ -106,7 +106,7 @@ public: ~Timer(); /// Create an uninitialized timer, client must use 'init'. - explicit Timer() {} + explicit Timer() = default; void init(StringRef TimerName, StringRef TimerDescription); void init(StringRef TimerName, StringRef TimerDescription, TimerGroup &tg); diff --git a/llvm/include/llvm/TableGen/Record.h b/llvm/include/llvm/TableGen/Record.h index add05bd078d6..1157487eced3 100644 --- a/llvm/include/llvm/TableGen/Record.h +++ b/llvm/include/llvm/TableGen/Record.h @@ -2015,7 +2015,7 @@ class Resolver { public: explicit Resolver(Record *CurRec) : CurRec(CurRec) {} - virtual ~Resolver() {} + virtual ~Resolver() = default; Record *getCurrentRecord() const { return CurRec; } diff --git a/llvm/include/llvm/Transforms/AggressiveInstCombine/AggressiveInstCombine.h b/llvm/include/llvm/Transforms/AggressiveInstCombine/AggressiveInstCombine.h index 03ead4bc0714..072ccf7320e8 100644 --- a/llvm/include/llvm/Transforms/AggressiveInstCombine/AggressiveInstCombine.h +++ b/llvm/include/llvm/Transforms/AggressiveInstCombine/AggressiveInstCombine.h @@ -18,6 +18,7 @@ #define LLVM_TRANSFORMS_AGGRESSIVEINSTCOMBINE_AGGRESSIVEINSTCOMBINE_H #include "llvm/IR/PassManager.h" +#include "llvm/Pass.h" namespace llvm { diff --git a/llvm/include/llvm/Transforms/IPO/AlwaysInliner.h b/llvm/include/llvm/Transforms/IPO/AlwaysInliner.h index 6a208dfa6a25..78b2f909f1c9 100644 --- a/llvm/include/llvm/Transforms/IPO/AlwaysInliner.h +++ b/llvm/include/llvm/Transforms/IPO/AlwaysInliner.h @@ -15,6 +15,7 @@ #define LLVM_TRANSFORMS_IPO_ALWAYSINLINER_H #include "llvm/IR/PassManager.h" +#include "llvm/Pass.h" namespace llvm { diff --git a/llvm/include/llvm/Transforms/IPO/ArgumentPromotion.h b/llvm/include/llvm/Transforms/IPO/ArgumentPromotion.h index 6d6cb58abdbb..225def99678a 100644 --- a/llvm/include/llvm/Transforms/IPO/ArgumentPromotion.h +++ b/llvm/include/llvm/Transforms/IPO/ArgumentPromotion.h @@ -27,14 +27,6 @@ class ArgumentPromotionPass : public PassInfoMixin<ArgumentPromotionPass> { public: ArgumentPromotionPass(unsigned MaxElements = 3u) : MaxElements(MaxElements) {} - /// Check if callers and the callee \p F agree how promoted arguments would be - /// passed. The ones that they do not agree on are eliminated from the sets but - /// the return value has to be observed as well. - static bool areFunctionArgsABICompatible( - const Function &F, const TargetTransformInfo &TTI, - SmallPtrSetImpl<Argument *> &ArgsToPromote, - SmallPtrSetImpl<Argument *> &ByValArgsToTransform); - /// Checks if a type could have padding bytes. static bool isDenselyPacked(Type *type, const DataLayout &DL); diff --git a/llvm/include/llvm/Transforms/IPO/Attributor.h b/llvm/include/llvm/Transforms/IPO/Attributor.h index d56a43ec7961..7eee16f71d64 100644 --- a/llvm/include/llvm/Transforms/IPO/Attributor.h +++ b/llvm/include/llvm/Transforms/IPO/Attributor.h @@ -132,6 +132,7 @@ struct AbstractAttribute; struct InformationCache; struct AAIsDead; struct AttributorCallGraph; +struct IRPosition; class AAResults; class Function; @@ -139,6 +140,11 @@ class Function; /// Abstract Attribute helper functions. namespace AA { +/// Return true if \p I is a `nosync` instruction. Use generic reasoning and +/// potentially the corresponding AANoSync. +bool isNoSyncInst(Attributor &A, const Instruction &I, + const AbstractAttribute &QueryingAA); + /// Return true if \p V is dynamically unique, that is, there are no two /// "instances" of \p V at runtime with different values. bool isDynamicallyUnique(Attributor &A, const AbstractAttribute &QueryingAA, @@ -185,7 +191,8 @@ Constant *getInitialValueForObj(Value &Obj, Type &Ty, bool getAssumedUnderlyingObjects(Attributor &A, const Value &Ptr, SmallVectorImpl<Value *> &Objects, const AbstractAttribute &QueryingAA, - const Instruction *CtxI); + const Instruction *CtxI, + bool Intraprocedural = false); /// Collect all potential values of the one stored by \p SI into /// \p PotentialCopies. That is, the only copies that were made via the @@ -200,6 +207,34 @@ bool getPotentialCopiesOfStoredValue( Attributor &A, StoreInst &SI, SmallSetVector<Value *, 4> &PotentialCopies, const AbstractAttribute &QueryingAA, bool &UsedAssumedInformation); +/// Return true if \p IRP is readonly. This will query respective AAs that +/// deduce the information and introduce dependences for \p QueryingAA. +bool isAssumedReadOnly(Attributor &A, const IRPosition &IRP, + const AbstractAttribute &QueryingAA, bool &IsKnown); + +/// Return true if \p IRP is readnone. This will query respective AAs that +/// deduce the information and introduce dependences for \p QueryingAA. +bool isAssumedReadNone(Attributor &A, const IRPosition &IRP, + const AbstractAttribute &QueryingAA, bool &IsKnown); + +/// Return true if \p ToI is potentially reachable from \p FromI. The two +/// instructions do not need to be in the same function. \p GoBackwardsCB +/// can be provided to convey domain knowledge about the "lifespan" the user is +/// interested in. By default, the callers of \p FromI are checked as well to +/// determine if \p ToI can be reached. If the query is not interested in +/// callers beyond a certain point, e.g., a GPU kernel entry or the function +/// containing an alloca, the \p GoBackwardsCB should return false. +bool isPotentiallyReachable( + Attributor &A, const Instruction &FromI, const Instruction &ToI, + const AbstractAttribute &QueryingAA, + std::function<bool(const Function &F)> GoBackwardsCB = nullptr); + +/// Same as above but it is sufficient to reach any instruction in \p ToFn. +bool isPotentiallyReachable( + Attributor &A, const Instruction &FromI, const Function &ToFn, + const AbstractAttribute &QueryingAA, + std::function<bool(const Function &F)> GoBackwardsCB); + } // namespace AA /// The value passed to the line option that defines the maximal initialization @@ -227,7 +262,7 @@ enum class DepClassTy { /// The data structure for the nodes of a dependency graph struct AADepGraphNode { public: - virtual ~AADepGraphNode(){}; + virtual ~AADepGraphNode() = default; using DepTy = PointerIntPair<AADepGraphNode *, 1>; protected: @@ -266,8 +301,8 @@ public: /// then it means that B depends on A, and when the state of A is /// updated, node B should also be updated struct AADepGraph { - AADepGraph() {} - ~AADepGraph() {} + AADepGraph() = default; + ~AADepGraph() = default; using DepTy = AADepGraphNode::DepTy; static AADepGraphNode *DepGetVal(DepTy &DT) { return DT.getPointer(); } @@ -334,6 +369,14 @@ struct IRPosition { return IRPosition(const_cast<Value &>(V), IRP_FLOAT, CBContext); } + /// Create a position describing the instruction \p I. This is different from + /// the value version because call sites are treated as intrusctions rather + /// than their return value in this function. + static const IRPosition inst(const Instruction &I, + const CallBaseContext *CBContext = nullptr) { + return IRPosition(const_cast<Instruction &>(I), IRP_FLOAT, CBContext); + } + /// Create a position describing the function scope of \p F. /// \p CBContext is used for call base specific analysis. static const IRPosition function(const Function &F, @@ -662,7 +705,7 @@ private: break; case IRPosition::IRP_FLOAT: // Special case for floating functions. - if (isa<Function>(AnchorVal)) + if (isa<Function>(AnchorVal) || isa<CallBase>(AnchorVal)) Enc = {&AnchorVal, ENC_FLOATING_FUNCTION}; else Enc = {&AnchorVal, ENC_VALUE}; @@ -844,7 +887,7 @@ struct AnalysisGetter { } AnalysisGetter(FunctionAnalysisManager &FAM) : FAM(&FAM) {} - AnalysisGetter() {} + AnalysisGetter() = default; private: FunctionAnalysisManager *FAM = nullptr; @@ -879,7 +922,7 @@ struct InformationCache { [&](const Function &F) { return AG.getAnalysis<PostDominatorTreeAnalysis>(F); }), - AG(AG), CGSCC(CGSCC), TargetTriple(M.getTargetTriple()) { + AG(AG), TargetTriple(M.getTargetTriple()) { if (CGSCC) initializeModuleSlice(*CGSCC); } @@ -996,13 +1039,6 @@ struct InformationCache { return AG.getAnalysis<AP>(F); } - /// Return SCC size on call graph for function \p F or 0 if unknown. - unsigned getSccSize(const Function &F) { - if (CGSCC && CGSCC->count(const_cast<Function *>(&F))) - return CGSCC->size(); - return 0; - } - /// Return datalayout used in the module. const DataLayout &getDL() { return DL; } @@ -1092,9 +1128,6 @@ private: /// Getters for analysis. AnalysisGetter &AG; - /// The underlying CGSCC, or null if not available. - SetVector<Function *> *CGSCC; - /// Set of inlineable functions SmallPtrSet<const Function *, 8> InlineableFunctions; @@ -1362,6 +1395,9 @@ struct Attributor { return AA; } + /// Allows a query AA to request an update if a new query was received. + void registerForUpdate(AbstractAttribute &AA); + /// Explicitly record a dependence from \p FromAA to \p ToAA, that is if /// \p FromAA changes \p ToAA should be updated as well. /// @@ -1794,6 +1830,18 @@ public: const AbstractAttribute &QueryingAA, bool RequireAllCallSites, bool &AllCallSitesKnown); + /// Check \p Pred on all call sites of \p Fn. + /// + /// This method will evaluate \p Pred on call sites and return + /// true if \p Pred holds in every call sites. However, this is only possible + /// all call sites are known, hence the function has internal linkage. + /// If true is returned, \p AllCallSitesKnown is set if all possible call + /// sites of the function have been visited. + bool checkForAllCallSites(function_ref<bool(AbstractCallSite)> Pred, + const Function &Fn, bool RequireAllCallSites, + const AbstractAttribute *QueryingAA, + bool &AllCallSitesKnown); + /// Check \p Pred on all values potentially returned by \p F. /// /// This method will evaluate \p Pred on all values potentially returned by @@ -1932,18 +1980,6 @@ private: /// may trigger further updates. (\see DependenceStack) void rememberDependences(); - /// Check \p Pred on all call sites of \p Fn. - /// - /// This method will evaluate \p Pred on call sites and return - /// true if \p Pred holds in every call sites. However, this is only possible - /// all call sites are known, hence the function has internal linkage. - /// If true is returned, \p AllCallSitesKnown is set if all possible call - /// sites of the function have been visited. - bool checkForAllCallSites(function_ref<bool(AbstractCallSite)> Pred, - const Function &Fn, bool RequireAllCallSites, - const AbstractAttribute *QueryingAA, - bool &AllCallSitesKnown); - /// Determine if CallBase context in \p IRP should be propagated. bool shouldPropagateCallBaseContext(const IRPosition &IRP); @@ -2056,6 +2092,10 @@ private: /// Callback to get an OptimizationRemarkEmitter from a Function *. Optional<OptimizationRemarkGetter> OREGetter; + /// Container with all the query AAs that requested an update via + /// registerForUpdate. + SmallSetVector<AbstractAttribute *, 16> QueryAAsAwaitingUpdate; + /// The name of the pass to emit remarks for. const char *PassName = ""; @@ -2081,7 +2121,7 @@ private: /// additional methods to directly modify the state based if needed. See the /// class comments for help. struct AbstractState { - virtual ~AbstractState() {} + virtual ~AbstractState() = default; /// Return if this abstract state is in a valid state. If false, no /// information provided should be used. @@ -2122,7 +2162,7 @@ template <typename base_ty, base_ty BestState, base_ty WorstState> struct IntegerStateBase : public AbstractState { using base_t = base_ty; - IntegerStateBase() {} + IntegerStateBase() = default; IntegerStateBase(base_t Assumed) : Assumed(Assumed) {} /// Return the best possible representable state. @@ -2365,7 +2405,7 @@ struct BooleanState : public IntegerStateBase<bool, true, false> { using super = IntegerStateBase<bool, true, false>; using base_t = IntegerStateBase::base_t; - BooleanState() {} + BooleanState() = default; BooleanState(base_t Assumed) : super(Assumed) {} /// Set the assumed value to \p Value but never below the known one. @@ -2773,7 +2813,7 @@ struct AbstractAttribute : public IRPosition, public AADepGraphNode { AbstractAttribute(const IRPosition &IRP) : IRPosition(IRP) {} /// Virtual destructor. - virtual ~AbstractAttribute() {} + virtual ~AbstractAttribute() = default; /// This function is used to identify if an \p DGN is of type /// AbstractAttribute so that the dyn_cast and cast can use such information @@ -2793,6 +2833,14 @@ struct AbstractAttribute : public IRPosition, public AADepGraphNode { /// in the `updateImpl` method. virtual void initialize(Attributor &A) {} + /// A query AA is always scheduled as long as we do updates because it does + /// lazy computation that cannot be determined to be done from the outside. + /// However, while query AAs will not be fixed if they do not have outstanding + /// dependences, we will only schedule them like other AAs. If a query AA that + /// received a new query it needs to request an update via + /// `Attributor::requestUpdateForAA`. + virtual bool isQueryAA() const { return false; } + /// Return the internal abstract state for inspection. virtual StateType &getState() = 0; virtual const StateType &getState() const = 0; @@ -2989,6 +3037,14 @@ struct AANoSync /// Returns true if "nosync" is known. bool isKnownNoSync() const { return getKnown(); } + /// Helper function used to determine whether an instruction is non-relaxed + /// atomic. In other words, if an atomic instruction does not have unordered + /// or monotonic ordering + static bool isNonRelaxedAtomic(const Instruction *I); + + /// Helper function specific for intrinsics which are potentially volatile. + static bool isNoSyncIntrinsic(const Instruction *I); + /// Create an abstract attribute view for the position \p IRP. static AANoSync &createForPosition(const IRPosition &IRP, Attributor &A); @@ -4419,7 +4475,7 @@ private: struct AACallGraphNode { AACallGraphNode(Attributor &A) : A(A) {} - virtual ~AACallGraphNode() {} + virtual ~AACallGraphNode() = default; virtual AACallEdgeIterator optimisticEdgesBegin() const = 0; virtual AACallEdgeIterator optimisticEdgesEnd() const = 0; @@ -4485,7 +4541,7 @@ struct AACallEdges : public StateWrapper<BooleanState, AbstractAttribute>, // Synthetic root node for the Attributor's internal call graph. struct AttributorCallGraph : public AACallGraphNode { AttributorCallGraph(Attributor &A) : AACallGraphNode(A) {} - virtual ~AttributorCallGraph() {} + virtual ~AttributorCallGraph() = default; AACallEdgeIterator optimisticEdgesBegin() const override { return AACallEdgeIterator(A, A.Functions.begin()); @@ -4592,18 +4648,30 @@ struct AAFunctionReachability AAFunctionReachability(const IRPosition &IRP, Attributor &A) : Base(IRP) {} + /// See AbstractAttribute::isQueryAA. + bool isQueryAA() const override { return true; } + /// If the function represented by this possition can reach \p Fn. - virtual bool canReach(Attributor &A, Function *Fn) const = 0; + virtual bool canReach(Attributor &A, const Function &Fn) const = 0; + + /// Can \p CB reach \p Fn. + virtual bool canReach(Attributor &A, CallBase &CB, + const Function &Fn) const = 0; - /// Can \p CB reach \p Fn - virtual bool canReach(Attributor &A, CallBase &CB, Function *Fn) const = 0; + /// Can \p Inst reach \p Fn. + /// See also AA::isPotentiallyReachable. + virtual bool instructionCanReach(Attributor &A, const Instruction &Inst, + const Function &Fn, + bool UseBackwards = true) const = 0; /// Create an abstract attribute view for the position \p IRP. static AAFunctionReachability &createForPosition(const IRPosition &IRP, Attributor &A); /// See AbstractAttribute::getName() - const std::string getName() const override { return "AAFuncitonReacability"; } + const std::string getName() const override { + return "AAFunctionReachability"; + } /// See AbstractAttribute::getIdAddr() const char *getIdAddr() const override { return &ID; } @@ -4639,21 +4707,12 @@ struct AAPointerInfo : public AbstractAttribute { AccessKind Kind, Type *Ty) : LocalI(LocalI), RemoteI(RemoteI), Content(Content), Kind(Kind), Ty(Ty) {} - Access(const Access &Other) - : LocalI(Other.LocalI), RemoteI(Other.RemoteI), Content(Other.Content), - Kind(Other.Kind), Ty(Other.Ty) {} + Access(const Access &Other) = default; Access(const Access &&Other) : LocalI(Other.LocalI), RemoteI(Other.RemoteI), Content(Other.Content), Kind(Other.Kind), Ty(Other.Ty) {} - Access &operator=(const Access &Other) { - LocalI = Other.LocalI; - RemoteI = Other.RemoteI; - Content = Other.Content; - Kind = Other.Kind; - Ty = Other.Ty; - return *this; - } + Access &operator=(const Access &Other) = default; bool operator==(const Access &R) const { return LocalI == R.LocalI && RemoteI == R.RemoteI && Content == R.Content && Kind == R.Kind; @@ -4741,6 +4800,15 @@ struct AAPointerInfo : public AbstractAttribute { virtual bool forallInterferingAccesses( StoreInst &SI, function_ref<bool(const Access &, bool)> CB) const = 0; + /// Call \p CB on all write accesses that might interfere with \p LI and + /// return true if all such accesses were known and the callback returned true + /// for all of them, false otherwise. In contrast to forallInterferingAccesses + /// this function will perform reasoning to exclude write accesses that cannot + /// affect the load even if they on the surface look as if they would. + virtual bool forallInterferingWrites( + Attributor &A, const AbstractAttribute &QueryingAA, LoadInst &LI, + function_ref<bool(const Access &, bool)> CB) const = 0; + /// This function should return true if the type of the \p AA is AAPointerInfo static bool classof(const AbstractAttribute *AA) { return (AA->getIdAddr() == &ID); diff --git a/llvm/include/llvm/Transforms/IPO/ForceFunctionAttrs.h b/llvm/include/llvm/Transforms/IPO/ForceFunctionAttrs.h index fd99843d0449..a2b93f8aa30d 100644 --- a/llvm/include/llvm/Transforms/IPO/ForceFunctionAttrs.h +++ b/llvm/include/llvm/Transforms/IPO/ForceFunctionAttrs.h @@ -14,6 +14,7 @@ #define LLVM_TRANSFORMS_IPO_FORCEFUNCTIONATTRS_H #include "llvm/IR/PassManager.h" +#include "llvm/Pass.h" namespace llvm { diff --git a/llvm/include/llvm/Transforms/IPO/IROutliner.h b/llvm/include/llvm/Transforms/IPO/IROutliner.h index ed74c8ed0e96..e4807a1c9c65 100644 --- a/llvm/include/llvm/Transforms/IPO/IROutliner.h +++ b/llvm/include/llvm/Transforms/IPO/IROutliner.h @@ -337,11 +337,9 @@ private: /// be analyzed for similarity. This is needed as there may be instruction we /// can identify as having similarity, but are more complicated to outline. struct InstructionAllowed : public InstVisitor<InstructionAllowed, bool> { - InstructionAllowed() {} + InstructionAllowed() = default; - bool visitBranchInst(BranchInst &BI) { - return EnableBranches; - } + bool visitBranchInst(BranchInst &BI) { return EnableBranches; } bool visitPHINode(PHINode &PN) { return EnableBranches; } // TODO: Handle allocas. bool visitAllocaInst(AllocaInst &AI) { return false; } @@ -359,7 +357,7 @@ private: bool visitDbgInfoIntrinsic(DbgInfoIntrinsic &DII) { return true; } // TODO: Handle specific intrinsics individually from those that can be // handled. - bool IntrinsicInst(IntrinsicInst &II) { return false; } + bool IntrinsicInst(IntrinsicInst &II) { return EnableIntrinsics; } // We only handle CallInsts that are not indirect, since we cannot guarantee // that they have a name in these cases. bool visitCallInst(CallInst &CI) { @@ -395,6 +393,10 @@ private: // The flag variable that marks whether we should allow indirect calls // to be outlined. bool EnableIndirectCalls = true; + + // The flag variable that marks whether we should allow intrinsics + // instructions to be outlined. + bool EnableIntrinsics = false; }; /// A InstVisitor used to exclude certain instructions from being outlined. diff --git a/llvm/include/llvm/Transforms/IPO/InferFunctionAttrs.h b/llvm/include/llvm/Transforms/IPO/InferFunctionAttrs.h index bb7907fb8ac8..302695d96355 100644 --- a/llvm/include/llvm/Transforms/IPO/InferFunctionAttrs.h +++ b/llvm/include/llvm/Transforms/IPO/InferFunctionAttrs.h @@ -17,6 +17,7 @@ #include "llvm/IR/Module.h" #include "llvm/IR/PassManager.h" +#include "llvm/Pass.h" namespace llvm { diff --git a/llvm/include/llvm/Transforms/IPO/SampleProfile.h b/llvm/include/llvm/Transforms/IPO/SampleProfile.h index 2b05aaf320cf..704b793ab3ea 100644 --- a/llvm/include/llvm/Transforms/IPO/SampleProfile.h +++ b/llvm/include/llvm/Transforms/IPO/SampleProfile.h @@ -15,6 +15,7 @@ #define LLVM_TRANSFORMS_IPO_SAMPLEPROFILE_H #include "llvm/IR/PassManager.h" +#include "llvm/Pass.h" #include <string> namespace llvm { diff --git a/llvm/include/llvm/Transforms/IPO/SampleProfileProbe.h b/llvm/include/llvm/Transforms/IPO/SampleProfileProbe.h index 43f4bc78140f..e73c36043cb2 100644 --- a/llvm/include/llvm/Transforms/IPO/SampleProfileProbe.h +++ b/llvm/include/llvm/Transforms/IPO/SampleProfileProbe.h @@ -154,7 +154,7 @@ class PseudoProbeUpdatePass : public PassInfoMixin<PseudoProbeUpdatePass> { void runOnFunction(Function &F, FunctionAnalysisManager &FAM); public: - PseudoProbeUpdatePass() {} + PseudoProbeUpdatePass() = default; PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); }; diff --git a/llvm/include/llvm/Transforms/InstCombine/InstCombiner.h b/llvm/include/llvm/Transforms/InstCombine/InstCombiner.h index f8cb6dc73a6f..ae19fbfb49a7 100644 --- a/llvm/include/llvm/Transforms/InstCombine/InstCombiner.h +++ b/llvm/include/llvm/Transforms/InstCombine/InstCombiner.h @@ -93,7 +93,7 @@ public: MinimizeSize(MinimizeSize), AA(AA), AC(AC), TLI(TLI), DT(DT), DL(DL), SQ(DL, &TLI, &DT, &AC), ORE(ORE), BFI(BFI), PSI(PSI), LI(LI) {} - virtual ~InstCombiner() {} + virtual ~InstCombiner() = default; /// Return the source operand of a potentially bitcasted value while /// optionally checking if it has one use. If there is no bitcast or the one diff --git a/llvm/include/llvm/Transforms/Instrumentation/AddressSanitizer.h b/llvm/include/llvm/Transforms/Instrumentation/AddressSanitizer.h index 6002f0270083..a0d8118c23f7 100644 --- a/llvm/include/llvm/Transforms/Instrumentation/AddressSanitizer.h +++ b/llvm/include/llvm/Transforms/Instrumentation/AddressSanitizer.h @@ -16,6 +16,7 @@ #include "llvm/IR/Function.h" #include "llvm/IR/Module.h" #include "llvm/IR/PassManager.h" +#include "llvm/Pass.h" #include "llvm/Transforms/Instrumentation/AddressSanitizerOptions.h" namespace llvm { diff --git a/llvm/include/llvm/Transforms/Instrumentation/AddressSanitizerCommon.h b/llvm/include/llvm/Transforms/Instrumentation/AddressSanitizerCommon.h index 5a0fb835606a..0a5456c5956f 100644 --- a/llvm/include/llvm/Transforms/Instrumentation/AddressSanitizerCommon.h +++ b/llvm/include/llvm/Transforms/Instrumentation/AddressSanitizerCommon.h @@ -66,7 +66,7 @@ bool forAllReachableExits(const DominatorTree &DT, const PostDominatorTree &PDT, } SmallVector<Instruction *, 8> ReachableRetVec; unsigned NumCoveredExits = 0; - for (auto &RI : RetVec) { + for (auto *RI : RetVec) { if (!isPotentiallyReachable(Start, RI, nullptr, &DT)) continue; ReachableRetVec.push_back(RI); @@ -83,7 +83,7 @@ bool forAllReachableExits(const DominatorTree &DT, const PostDominatorTree &PDT, for (auto *End : Ends) Callback(End); } else { - for (auto &RI : ReachableRetVec) + for (auto *RI : ReachableRetVec) Callback(RI); // We may have inserted untag outside of the lifetime interval. // Signal the caller to remove the lifetime end call for this alloca. diff --git a/llvm/include/llvm/Transforms/Instrumentation/BoundsChecking.h b/llvm/include/llvm/Transforms/Instrumentation/BoundsChecking.h index 8d70f1429b99..76d586252743 100644 --- a/llvm/include/llvm/Transforms/Instrumentation/BoundsChecking.h +++ b/llvm/include/llvm/Transforms/Instrumentation/BoundsChecking.h @@ -10,6 +10,7 @@ #define LLVM_TRANSFORMS_INSTRUMENTATION_BOUNDSCHECKING_H #include "llvm/IR/PassManager.h" +#include "llvm/Pass.h" namespace llvm { diff --git a/llvm/include/llvm/Transforms/Instrumentation/HWAddressSanitizer.h b/llvm/include/llvm/Transforms/Instrumentation/HWAddressSanitizer.h index 3118a3762935..70949026a892 100644 --- a/llvm/include/llvm/Transforms/Instrumentation/HWAddressSanitizer.h +++ b/llvm/include/llvm/Transforms/Instrumentation/HWAddressSanitizer.h @@ -15,6 +15,7 @@ #include "llvm/IR/Function.h" #include "llvm/IR/PassManager.h" +#include "llvm/Pass.h" namespace llvm { diff --git a/llvm/include/llvm/Transforms/Instrumentation/InstrProfiling.h b/llvm/include/llvm/Transforms/Instrumentation/InstrProfiling.h index 64523d7d073c..5873db22a5d1 100644 --- a/llvm/include/llvm/Transforms/Instrumentation/InstrProfiling.h +++ b/llvm/include/llvm/Transforms/Instrumentation/InstrProfiling.h @@ -87,21 +87,32 @@ private: /// Count the number of instrumented value sites for the function. void computeNumValueSiteCounts(InstrProfValueProfileInst *Ins); - /// Replace instrprof_value_profile with a call to runtime library. + /// Replace instrprof.value.profile with a call to runtime library. void lowerValueProfileInst(InstrProfValueProfileInst *Ins); - /// Replace instrprof_increment with an increment of the appropriate value. + /// Replace instrprof.cover with a store instruction to the coverage byte. + void lowerCover(InstrProfCoverInst *Inc); + + /// Replace instrprof.increment with an increment of the appropriate value. void lowerIncrement(InstrProfIncrementInst *Inc); /// Force emitting of name vars for unused functions. void lowerCoverageData(GlobalVariable *CoverageNamesVar); + /// Compute the address of the counter value that this profiling instruction + /// acts on. + Value *getCounterAddress(InstrProfInstBase *I); + /// Get the region counters for an increment, creating them if necessary. /// /// If the counter array doesn't yet exist, the profile data variables /// referring to them will also be created. GlobalVariable *getOrCreateRegionCounters(InstrProfInstBase *Inc); + /// Create the region counters. + GlobalVariable *createRegionCounters(InstrProfInstBase *Inc, StringRef Name, + GlobalValue::LinkageTypes Linkage); + /// Emit the section with compressed function names. void emitNameData(); diff --git a/llvm/include/llvm/Transforms/Instrumentation/MemProfiler.h b/llvm/include/llvm/Transforms/Instrumentation/MemProfiler.h index f4d1b1d90e6f..b9ad56ba7509 100644 --- a/llvm/include/llvm/Transforms/Instrumentation/MemProfiler.h +++ b/llvm/include/llvm/Transforms/Instrumentation/MemProfiler.h @@ -15,6 +15,7 @@ #include "llvm/IR/Function.h" #include "llvm/IR/Module.h" #include "llvm/IR/PassManager.h" +#include "llvm/Pass.h" namespace llvm { diff --git a/llvm/include/llvm/Transforms/Scalar/LoopPassManager.h b/llvm/include/llvm/Transforms/Scalar/LoopPassManager.h index 7ba9d65cae55..e83cc2b9bef0 100644 --- a/llvm/include/llvm/Transforms/Scalar/LoopPassManager.h +++ b/llvm/include/llvm/Transforms/Scalar/LoopPassManager.h @@ -73,7 +73,7 @@ class PassManager<Loop, LoopAnalysisManager, LoopStandardAnalysisResults &, PassManager<Loop, LoopAnalysisManager, LoopStandardAnalysisResults &, LPMUpdater &>> { public: - explicit PassManager() {} + explicit PassManager() = default; // FIXME: These are equivalent to the default move constructor/move // assignment. However, using = default triggers linker errors due to the diff --git a/llvm/include/llvm/Transforms/Scalar/LowerConstantIntrinsics.h b/llvm/include/llvm/Transforms/Scalar/LowerConstantIntrinsics.h index a5ad4a2192a0..61c7bf0454e1 100644 --- a/llvm/include/llvm/Transforms/Scalar/LowerConstantIntrinsics.h +++ b/llvm/include/llvm/Transforms/Scalar/LowerConstantIntrinsics.h @@ -23,7 +23,7 @@ namespace llvm { struct LowerConstantIntrinsicsPass : PassInfoMixin<LowerConstantIntrinsicsPass> { public: - explicit LowerConstantIntrinsicsPass() {} + explicit LowerConstantIntrinsicsPass() = default; /// Run the pass over the function. /// diff --git a/llvm/include/llvm/Transforms/Scalar/Scalarizer.h b/llvm/include/llvm/Transforms/Scalar/Scalarizer.h index 81363130e2e3..f4472e699295 100644 --- a/llvm/include/llvm/Transforms/Scalar/Scalarizer.h +++ b/llvm/include/llvm/Transforms/Scalar/Scalarizer.h @@ -18,6 +18,7 @@ #define LLVM_TRANSFORMS_SCALAR_SCALARIZER_H #include "llvm/IR/PassManager.h" +#include "llvm/Pass.h" namespace llvm { diff --git a/llvm/include/llvm/Transforms/Scalar/WarnMissedTransforms.h b/llvm/include/llvm/Transforms/Scalar/WarnMissedTransforms.h index 04a5f7e6ff38..64691d68b1c4 100644 --- a/llvm/include/llvm/Transforms/Scalar/WarnMissedTransforms.h +++ b/llvm/include/llvm/Transforms/Scalar/WarnMissedTransforms.h @@ -14,6 +14,7 @@ #define LLVM_TRANSFORMS_SCALAR_WARNMISSEDTRANSFORMS_H #include "llvm/IR/PassManager.h" +#include "llvm/Pass.h" namespace llvm { class Function; @@ -22,7 +23,7 @@ class Function; class WarnMissedTransformationsPass : public PassInfoMixin<WarnMissedTransformationsPass> { public: - explicit WarnMissedTransformationsPass() {} + explicit WarnMissedTransformationsPass() = default; PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); }; diff --git a/llvm/include/llvm/Transforms/Utils/AssumeBundleBuilder.h b/llvm/include/llvm/Transforms/Utils/AssumeBundleBuilder.h index a497722eece6..d679bca69510 100644 --- a/llvm/include/llvm/Transforms/Utils/AssumeBundleBuilder.h +++ b/llvm/include/llvm/Transforms/Utils/AssumeBundleBuilder.h @@ -20,6 +20,7 @@ #include "llvm/IR/Attributes.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/PassManager.h" +#include "llvm/Pass.h" namespace llvm { class AssumptionCache; diff --git a/llvm/include/llvm/Transforms/Utils/BasicBlockUtils.h b/llvm/include/llvm/Transforms/Utils/BasicBlockUtils.h index 8970afb3aeaa..d99b2a56559d 100644 --- a/llvm/include/llvm/Transforms/Utils/BasicBlockUtils.h +++ b/llvm/include/llvm/Transforms/Utils/BasicBlockUtils.h @@ -46,9 +46,9 @@ class Value; /// instruction. If \p Updates is specified, collect all necessary DT updates /// into this vector. If \p KeepOneInputPHIs is true, one-input Phis in /// successors of blocks being deleted will be preserved. -void DetatchDeadBlocks(ArrayRef <BasicBlock *> BBs, - SmallVectorImpl<DominatorTree::UpdateType> *Updates, - bool KeepOneInputPHIs = false); +void detachDeadBlocks(ArrayRef <BasicBlock *> BBs, + SmallVectorImpl<DominatorTree::UpdateType> *Updates, + bool KeepOneInputPHIs = false); /// Delete the specified block, which must have no predecessors. void DeleteDeadBlock(BasicBlock *BB, DomTreeUpdater *DTU = nullptr, diff --git a/llvm/include/llvm/Transforms/Utils/CallGraphUpdater.h b/llvm/include/llvm/Transforms/Utils/CallGraphUpdater.h index f8211d60938e..e12d7e09aad6 100644 --- a/llvm/include/llvm/Transforms/Utils/CallGraphUpdater.h +++ b/llvm/include/llvm/Transforms/Utils/CallGraphUpdater.h @@ -53,7 +53,7 @@ class CallGraphUpdater { ///} public: - CallGraphUpdater() {} + CallGraphUpdater() = default; ~CallGraphUpdater() { finalize(); } /// Initializers for usage outside of a CGSCC pass, inside a CGSCC pass in diff --git a/llvm/include/llvm/Transforms/Utils/Debugify.h b/llvm/include/llvm/Transforms/Utils/Debugify.h index 0f1c7ec724df..892e354cd9ed 100644 --- a/llvm/include/llvm/Transforms/Utils/Debugify.h +++ b/llvm/include/llvm/Transforms/Utils/Debugify.h @@ -21,6 +21,7 @@ #include "llvm/IR/LegacyPassManager.h" #include "llvm/IR/PassManager.h" #include "llvm/IR/ValueHandle.h" +#include "llvm/Pass.h" using DebugFnMap = llvm::MapVector<llvm::StringRef, const llvm::DISubprogram *>; using DebugInstMap = llvm::MapVector<const llvm::Instruction *, bool>; diff --git a/llvm/include/llvm/Transforms/Utils/InjectTLIMappings.h b/llvm/include/llvm/Transforms/Utils/InjectTLIMappings.h index 84e4fee51c26..af9cdb9fd619 100644 --- a/llvm/include/llvm/Transforms/Utils/InjectTLIMappings.h +++ b/llvm/include/llvm/Transforms/Utils/InjectTLIMappings.h @@ -15,6 +15,7 @@ #include "llvm/IR/PassManager.h" #include "llvm/InitializePasses.h" +#include "llvm/Pass.h" namespace llvm { class InjectTLIMappings : public PassInfoMixin<InjectTLIMappings> { diff --git a/llvm/include/llvm/Transforms/Utils/LoopPeel.h b/llvm/include/llvm/Transforms/Utils/LoopPeel.h index 7b6595c192de..07dabaeaa907 100644 --- a/llvm/include/llvm/Transforms/Utils/LoopPeel.h +++ b/llvm/include/llvm/Transforms/Utils/LoopPeel.h @@ -21,7 +21,7 @@ namespace llvm { bool canPeel(Loop *L); bool peelLoop(Loop *L, unsigned PeelCount, LoopInfo *LI, ScalarEvolution *SE, - DominatorTree *DT, AssumptionCache *AC, bool PreserveLCSSA); + DominatorTree &DT, AssumptionCache *AC, bool PreserveLCSSA); TargetTransformInfo::PeelingPreferences gatherPeelingPreferences(Loop *L, ScalarEvolution &SE, diff --git a/llvm/include/llvm/Transforms/Utils/ModuleUtils.h b/llvm/include/llvm/Transforms/Utils/ModuleUtils.h index 9bbe8ea7e1e8..8d459972336b 100644 --- a/llvm/include/llvm/Transforms/Utils/ModuleUtils.h +++ b/llvm/include/llvm/Transforms/Utils/ModuleUtils.h @@ -15,6 +15,7 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" +#include "llvm/Support/MemoryBuffer.h" #include <utility> // for std::pair namespace llvm { @@ -106,6 +107,10 @@ void filterDeadComdatFunctions( /// unique identifier for this module, so we return the empty string. std::string getUniqueModuleId(Module *M); +/// Embed the memory buffer \p Buf into the module \p M as a global using the +/// specified section name. +void embedBufferInModule(Module &M, MemoryBufferRef Buf, StringRef SectionName); + class CallInst; namespace VFABI { /// Overwrite the Vector Function ABI variants attribute with the names provide diff --git a/llvm/include/llvm/Transforms/Utils/SSAUpdaterBulk.h b/llvm/include/llvm/Transforms/Utils/SSAUpdaterBulk.h index 5de575aed059..ad24cb454d5e 100644 --- a/llvm/include/llvm/Transforms/Utils/SSAUpdaterBulk.h +++ b/llvm/include/llvm/Transforms/Utils/SSAUpdaterBulk.h @@ -42,7 +42,7 @@ class SSAUpdaterBulk { SmallVector<Use *, 4> Uses; StringRef Name; Type *Ty; - RewriteInfo(){}; + RewriteInfo() = default; RewriteInfo(StringRef &N, Type *T) : Name(N), Ty(T){}; }; SmallVector<RewriteInfo, 4> Rewrites; @@ -52,10 +52,10 @@ class SSAUpdaterBulk { Value *computeValueAt(BasicBlock *BB, RewriteInfo &R, DominatorTree *DT); public: - explicit SSAUpdaterBulk(){}; + explicit SSAUpdaterBulk() = default; SSAUpdaterBulk(const SSAUpdaterBulk &) = delete; SSAUpdaterBulk &operator=(const SSAUpdaterBulk &) = delete; - ~SSAUpdaterBulk(){}; + ~SSAUpdaterBulk() = default; /// Add a new variable to the SSA rewriter. This needs to be called before /// AddAvailableValue or AddUse calls. The return value is the variable ID, diff --git a/llvm/include/llvm/Transforms/Vectorize/LoadStoreVectorizer.h b/llvm/include/llvm/Transforms/Vectorize/LoadStoreVectorizer.h index f72c76c6f0f2..3636285e38f5 100644 --- a/llvm/include/llvm/Transforms/Vectorize/LoadStoreVectorizer.h +++ b/llvm/include/llvm/Transforms/Vectorize/LoadStoreVectorizer.h @@ -10,6 +10,7 @@ #define LLVM_TRANSFORMS_VECTORIZE_LOADSTOREVECTORIZER_H #include "llvm/IR/PassManager.h" +#include "llvm/Pass.h" namespace llvm { diff --git a/llvm/include/llvm/module.modulemap b/llvm/include/llvm/module.modulemap index 25c7aeee148e..d0693ccfd8f6 100644 --- a/llvm/include/llvm/module.modulemap +++ b/llvm/include/llvm/module.modulemap @@ -60,6 +60,7 @@ module LLVM_BinaryFormat { textual header "BinaryFormat/DynamicTags.def" textual header "BinaryFormat/MachO.def" textual header "BinaryFormat/MinidumpConstants.def" + textual header "BinaryFormat/Swift.def" textual header "BinaryFormat/ELFRelocs/AArch64.def" textual header "BinaryFormat/ELFRelocs/AMDGPU.def" textual header "BinaryFormat/ELFRelocs/ARM.def" diff --git a/llvm/lib/Analysis/BasicAliasAnalysis.cpp b/llvm/lib/Analysis/BasicAliasAnalysis.cpp index b4c985962837..0a0b53796add 100644 --- a/llvm/lib/Analysis/BasicAliasAnalysis.cpp +++ b/llvm/lib/Analysis/BasicAliasAnalysis.cpp @@ -1010,10 +1010,13 @@ ModRefInfo BasicAAResult::getModRefInfo(const CallBase *Call, return ModRefInfo::NoModRef; } - // The semantics of memcpy intrinsics either exactly overlap or do not - // overlap, i.e., source and destination of any given memcpy are either - // no-alias or must-alias. - if (auto *Inst = dyn_cast<AnyMemCpyInst>(Call)) { + // Ideally, there should be no need to special case for memcpy/memove + // intrinsics here since general machinery (based on memory attributes) should + // already handle it just fine. Unfortunately, it doesn't due to deficiency in + // operand bundles support. At the moment it's not clear if complexity behind + // enhancing general mechanism worths it. + // TODO: Consider improving operand bundles support in general mechanism. + if (auto *Inst = dyn_cast<AnyMemTransferInst>(Call)) { AliasResult SrcAA = getBestAAResults().alias(MemoryLocation::getForSource(Inst), Loc, AAQI); AliasResult DestAA = diff --git a/llvm/lib/Analysis/IRSimilarityIdentifier.cpp b/llvm/lib/Analysis/IRSimilarityIdentifier.cpp index d2f0c57f6dab..01681c47418a 100644 --- a/llvm/lib/Analysis/IRSimilarityIdentifier.cpp +++ b/llvm/lib/Analysis/IRSimilarityIdentifier.cpp @@ -39,6 +39,10 @@ cl::opt<bool> MatchCallsByName("ir-sim-calls-by-name", cl::init(false), cl::ReallyHidden, cl::desc("only allow matching call instructions if the " "name and type signature match.")); + +cl::opt<bool> + DisableIntrinsics("no-ir-sim-intrinsics", cl::init(false), cl::ReallyHidden, + cl::desc("Don't match or outline intrinsics")); } // namespace llvm IRInstructionData::IRInstructionData(Instruction &I, bool Legality, @@ -109,6 +113,24 @@ void IRInstructionData::setCalleeName(bool MatchByName) { assert(CI && "Instruction must be call"); CalleeName = ""; + if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) { + // To hash intrinsics, we use the opcode, and types like the other + // instructions, but also, the Intrinsic ID, and the Name of the + // intrinsic. + Intrinsic::ID IntrinsicID = II->getIntrinsicID(); + FunctionType *FT = II->getFunctionType(); + // If there is an overloaded name, we have to use the complex version + // of getName to get the entire string. + if (Intrinsic::isOverloaded(IntrinsicID)) + CalleeName = + Intrinsic::getName(IntrinsicID, FT->params(), II->getModule(), FT); + // If there is not an overloaded name, we only need to use this version. + else + CalleeName = Intrinsic::getName(IntrinsicID).str(); + + return; + } + if (!CI->isIndirectCall() && MatchByName) CalleeName = CI->getCalledFunction()->getName().str(); } @@ -232,7 +254,7 @@ bool IRSimilarity::isClose(const IRInstructionData &A, // name is the same. We already know that the types are since is // isSameOperationAs is true. if (isa<CallInst>(A.Inst) && isa<CallInst>(B.Inst)) { - if (A.getCalleeName().str().compare(B.getCalleeName().str()) != 0) + if (A.getCalleeName().str() != B.getCalleeName().str()) return false; } @@ -1139,6 +1161,7 @@ SimilarityGroupList &IRSimilarityIdentifier::findSimilarity( Mapper.InstClassifier.EnableBranches = this->EnableBranches; Mapper.InstClassifier.EnableIndirectCalls = EnableIndirectCalls; Mapper.EnableMatchCallsByName = EnableMatchingCallsByName; + Mapper.InstClassifier.EnableIntrinsics = EnableIntrinsics; populateMapper(Modules, InstrList, IntegerMapping); findCandidates(InstrList, IntegerMapping); @@ -1151,6 +1174,7 @@ SimilarityGroupList &IRSimilarityIdentifier::findSimilarity(Module &M) { Mapper.InstClassifier.EnableBranches = this->EnableBranches; Mapper.InstClassifier.EnableIndirectCalls = EnableIndirectCalls; Mapper.EnableMatchCallsByName = EnableMatchingCallsByName; + Mapper.InstClassifier.EnableIntrinsics = EnableIntrinsics; std::vector<IRInstructionData *> InstrList; std::vector<unsigned> IntegerMapping; @@ -1172,7 +1196,7 @@ IRSimilarityIdentifierWrapperPass::IRSimilarityIdentifierWrapperPass() bool IRSimilarityIdentifierWrapperPass::doInitialization(Module &M) { IRSI.reset(new IRSimilarityIdentifier(!DisableBranches, !DisableIndirectCalls, - MatchCallsByName)); + MatchCallsByName, !DisableIntrinsics)); return false; } @@ -1189,9 +1213,8 @@ bool IRSimilarityIdentifierWrapperPass::runOnModule(Module &M) { AnalysisKey IRSimilarityAnalysis::Key; IRSimilarityIdentifier IRSimilarityAnalysis::run(Module &M, ModuleAnalysisManager &) { - auto IRSI = IRSimilarityIdentifier(!DisableBranches, !DisableIndirectCalls, - MatchCallsByName); + MatchCallsByName, !DisableIntrinsics); IRSI.findSimilarity(M); return IRSI; } diff --git a/llvm/lib/Analysis/InstructionSimplify.cpp b/llvm/lib/Analysis/InstructionSimplify.cpp index b71b39334ace..4775340b3438 100644 --- a/llvm/lib/Analysis/InstructionSimplify.cpp +++ b/llvm/lib/Analysis/InstructionSimplify.cpp @@ -951,7 +951,7 @@ static Value *simplifyDivRem(Instruction::BinaryOps Opcode, Value *Op0, // X / undef -> poison // X % undef -> poison - if (Q.isUndefValue(Op1)) + if (Q.isUndefValue(Op1) || isa<PoisonValue>(Op1)) return PoisonValue::get(Ty); // X / 0 -> poison @@ -2418,6 +2418,10 @@ static Value *SimplifyXorInst(Value *Op0, Value *Op1, const SimplifyQuery &Q, if (Constant *C = foldOrCommuteConstant(Instruction::Xor, Op0, Op1, Q)) return C; + // X ^ poison -> poison + if (isa<PoisonValue>(Op1)) + return Op1; + // A ^ undef -> undef if (Q.isUndefValue(Op1)) return Op1; diff --git a/llvm/lib/Analysis/Loads.cpp b/llvm/lib/Analysis/Loads.cpp index 0fbf1db0685d..cd0d4d6b9ca8 100644 --- a/llvm/lib/Analysis/Loads.cpp +++ b/llvm/lib/Analysis/Loads.cpp @@ -208,7 +208,7 @@ bool llvm::isDereferenceableAndAlignedPointer(const Value *V, Align Alignment, } bool llvm::isDereferenceableAndAlignedPointer(const Value *V, Type *Ty, - MaybeAlign MA, + Align Alignment, const DataLayout &DL, const Instruction *CtxI, const DominatorTree *DT, @@ -223,8 +223,6 @@ bool llvm::isDereferenceableAndAlignedPointer(const Value *V, Type *Ty, // determine the exact offset to the attributed variable, we can use that // information here. - // Require ABI alignment for loads without alignment specification - const Align Alignment = DL.getValueOrABITypeAlignment(MA, Ty); APInt AccessSize(DL.getPointerTypeSizeInBits(V->getType()), DL.getTypeStoreSize(Ty)); return isDereferenceableAndAlignedPointer(V, Alignment, AccessSize, DL, CtxI, diff --git a/llvm/lib/Analysis/LoopInfo.cpp b/llvm/lib/Analysis/LoopInfo.cpp index dd6958716127..b161c490a6bc 100644 --- a/llvm/lib/Analysis/LoopInfo.cpp +++ b/llvm/lib/Analysis/LoopInfo.cpp @@ -1107,6 +1107,10 @@ int llvm::getIntLoopAttribute(const Loop *TheLoop, StringRef Name, return getOptionalIntLoopAttribute(TheLoop, Name).getValueOr(Default); } +bool llvm::isFinite(const Loop *L) { + return L->getHeader()->getParent()->willReturn(); +} + static const char *LLVMLoopMustProgress = "llvm.loop.mustprogress"; bool llvm::hasMustProgress(const Loop *L) { diff --git a/llvm/lib/Analysis/MemDerefPrinter.cpp b/llvm/lib/Analysis/MemDerefPrinter.cpp index 30937a2e4931..82617c7256a5 100644 --- a/llvm/lib/Analysis/MemDerefPrinter.cpp +++ b/llvm/lib/Analysis/MemDerefPrinter.cpp @@ -59,8 +59,8 @@ bool MemDerefPrinter::runOnFunction(Function &F) { Value *PO = LI->getPointerOperand(); if (isDereferenceablePointer(PO, LI->getType(), DL)) Deref.push_back(PO); - if (isDereferenceableAndAlignedPointer(PO, LI->getType(), - MaybeAlign(LI->getAlign()), DL)) + if (isDereferenceableAndAlignedPointer(PO, LI->getType(), LI->getAlign(), + DL)) DerefAndAligned.insert(PO); } } @@ -94,8 +94,8 @@ PreservedAnalyses MemDerefPrinterPass::run(Function &F, Value *PO = LI->getPointerOperand(); if (isDereferenceablePointer(PO, LI->getType(), DL)) Deref.push_back(PO); - if (isDereferenceableAndAlignedPointer(PO, LI->getType(), - MaybeAlign(LI->getAlign()), DL)) + if (isDereferenceableAndAlignedPointer(PO, LI->getType(), LI->getAlign(), + DL)) DerefAndAligned.insert(PO); } } diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp index 07aac1523b47..977fc0911355 100644 --- a/llvm/lib/Analysis/ScalarEvolution.cpp +++ b/llvm/lib/Analysis/ScalarEvolution.cpp @@ -3486,7 +3486,7 @@ const SCEV *ScalarEvolution::getUDivExpr(const SCEV *LHS, return S; } -const APInt gcd(const SCEVConstant *C1, const SCEVConstant *C2) { +APInt gcd(const SCEVConstant *C1, const SCEVConstant *C2) { APInt A = C1->getAPInt().abs(); APInt B = C2->getAPInt().abs(); uint32_t ABW = A.getBitWidth(); @@ -7017,7 +7017,7 @@ bool ScalarEvolution::loopIsFiniteByAssumption(const Loop *L) { // A mustprogress loop without side effects must be finite. // TODO: The check used here is very conservative. It's only *specific* // side effects which are well defined in infinite loops. - return isMustProgress(L) && loopHasNoSideEffects(L); + return isFinite(L) || (isMustProgress(L) && loopHasNoSideEffects(L)); } const SCEV *ScalarEvolution::createSCEV(Value *V) { @@ -8466,8 +8466,11 @@ ScalarEvolution::computeExitLimitFromICmp(const Loop *L, Pred = ICmpInst::getSwappedPredicate(Pred); } + bool ControllingFiniteLoop = + ControlsExit && loopHasNoAbnormalExits(L) && loopIsFiniteByAssumption(L); // Simplify the operands before analyzing them. - (void)SimplifyICmpOperands(Pred, LHS, RHS); + (void)SimplifyICmpOperands(Pred, LHS, RHS, /*Depth=*/0, + ControllingFiniteLoop); // If we have a comparison of a chrec against a constant, try to use value // ranges to answer this query. @@ -8487,9 +8490,7 @@ ScalarEvolution::computeExitLimitFromICmp(const Loop *L, // the same values on self-wrap of the IV, then we can infer that IV // doesn't self wrap because if it did, we'd have an infinite (undefined) // loop. - if (ControlsExit && isLoopInvariant(RHS, L) && loopHasNoAbnormalExits(L) && - loopIsFiniteByAssumption(L)) { - + if (ControllingFiniteLoop && isLoopInvariant(RHS, L)) { // TODO: We can peel off any functions which are invertible *in L*. Loop // invariant terms are effectively constants for our purposes here. auto *InnerLHS = LHS; @@ -9940,7 +9941,8 @@ static bool HasSameValue(const SCEV *A, const SCEV *B) { bool ScalarEvolution::SimplifyICmpOperands(ICmpInst::Predicate &Pred, const SCEV *&LHS, const SCEV *&RHS, - unsigned Depth) { + unsigned Depth, + bool ControllingFiniteLoop) { bool Changed = false; // Simplifies ICMP to trivial true or false by turning it into '0 == 0' or // '0 != 0'. @@ -10069,10 +10071,15 @@ bool ScalarEvolution::SimplifyICmpOperands(ICmpInst::Predicate &Pred, } // If possible, canonicalize GE/LE comparisons to GT/LT comparisons, by - // adding or subtracting 1 from one of the operands. + // adding or subtracting 1 from one of the operands. This can be done for + // one of two reasons: + // 1) The range of the RHS does not include the (signed/unsigned) boundaries + // 2) The loop is finite, with this comparison controlling the exit. Since the + // loop is finite, the bound cannot include the corresponding boundary + // (otherwise it would loop forever). switch (Pred) { case ICmpInst::ICMP_SLE: - if (!getSignedRangeMax(RHS).isMaxSignedValue()) { + if (ControllingFiniteLoop || !getSignedRangeMax(RHS).isMaxSignedValue()) { RHS = getAddExpr(getConstant(RHS->getType(), 1, true), RHS, SCEV::FlagNSW); Pred = ICmpInst::ICMP_SLT; @@ -10085,7 +10092,7 @@ bool ScalarEvolution::SimplifyICmpOperands(ICmpInst::Predicate &Pred, } break; case ICmpInst::ICMP_SGE: - if (!getSignedRangeMin(RHS).isMinSignedValue()) { + if (ControllingFiniteLoop || !getSignedRangeMin(RHS).isMinSignedValue()) { RHS = getAddExpr(getConstant(RHS->getType(), (uint64_t)-1, true), RHS, SCEV::FlagNSW); Pred = ICmpInst::ICMP_SGT; @@ -10098,7 +10105,7 @@ bool ScalarEvolution::SimplifyICmpOperands(ICmpInst::Predicate &Pred, } break; case ICmpInst::ICMP_ULE: - if (!getUnsignedRangeMax(RHS).isMaxValue()) { + if (ControllingFiniteLoop || !getUnsignedRangeMax(RHS).isMaxValue()) { RHS = getAddExpr(getConstant(RHS->getType(), 1, true), RHS, SCEV::FlagNUW); Pred = ICmpInst::ICMP_ULT; @@ -10110,7 +10117,7 @@ bool ScalarEvolution::SimplifyICmpOperands(ICmpInst::Predicate &Pred, } break; case ICmpInst::ICMP_UGE: - if (!getUnsignedRangeMin(RHS).isMinValue()) { + if (ControllingFiniteLoop || !getUnsignedRangeMin(RHS).isMinValue()) { RHS = getAddExpr(getConstant(RHS->getType(), (uint64_t)-1, true), RHS); Pred = ICmpInst::ICMP_UGT; Changed = true; @@ -10130,7 +10137,8 @@ bool ScalarEvolution::SimplifyICmpOperands(ICmpInst::Predicate &Pred, // Recursively simplify until we either hit a recursion limit or nothing // changes. if (Changed) - return SimplifyICmpOperands(Pred, LHS, RHS, Depth+1); + return SimplifyICmpOperands(Pred, LHS, RHS, Depth + 1, + ControllingFiniteLoop); return Changed; } @@ -10911,7 +10919,8 @@ bool ScalarEvolution::isImpliedCond(ICmpInst::Predicate Pred, const SCEV *LHS, // For unsigned and equality predicates, try to prove that both found // operands fit into narrow unsigned range. If so, try to prove facts in // narrow types. - if (!CmpInst::isSigned(FoundPred) && !FoundLHS->getType()->isPointerTy()) { + if (!CmpInst::isSigned(FoundPred) && !FoundLHS->getType()->isPointerTy() && + !FoundRHS->getType()->isPointerTy()) { auto *NarrowType = LHS->getType(); auto *WideType = FoundLHS->getType(); auto BitWidth = getTypeSizeInBits(NarrowType); @@ -10929,7 +10938,7 @@ bool ScalarEvolution::isImpliedCond(ICmpInst::Predicate Pred, const SCEV *LHS, } } - if (LHS->getType()->isPointerTy()) + if (LHS->getType()->isPointerTy() || RHS->getType()->isPointerTy()) return false; if (CmpInst::isSigned(Pred)) { LHS = getSignExtendExpr(LHS, FoundLHS->getType()); @@ -10940,7 +10949,7 @@ bool ScalarEvolution::isImpliedCond(ICmpInst::Predicate Pred, const SCEV *LHS, } } else if (getTypeSizeInBits(LHS->getType()) > getTypeSizeInBits(FoundLHS->getType())) { - if (FoundLHS->getType()->isPointerTy()) + if (FoundLHS->getType()->isPointerTy() || FoundRHS->getType()->isPointerTy()) return false; if (CmpInst::isSigned(FoundPred)) { FoundLHS = getSignExtendExpr(FoundLHS, LHS->getType()); diff --git a/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp b/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp index 23dbb32f38de..627a78a2a2fd 100644 --- a/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp +++ b/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp @@ -786,3 +786,36 @@ MDNode *AAMDNodes::shiftTBAAStruct(MDNode *MD, size_t Offset) { } return MDNode::get(MD->getContext(), Sub); } + +MDNode *AAMDNodes::extendToTBAA(MDNode *MD, ssize_t Len) { + // Fast path if 0-length + if (Len == 0) + return nullptr; + + // Regular TBAA is invariant of length, so we only need to consider + // struct-path TBAA. + if (!isStructPathTBAA(MD)) + return MD; + + TBAAStructTagNode Tag(MD); + + // Only new format TBAA has a size + if (!Tag.isNewFormat()) + return MD; + + // If unknown size, drop the TBAA. + if (Len == -1) + return nullptr; + + // Otherwise, create TBAA with the new Len + SmallVector<Metadata *, 4> NextNodes(MD->operands()); + ConstantInt *PreviousSize = mdconst::extract<ConstantInt>(NextNodes[3]); + + // Don't create a new MDNode if it is the same length. + if (PreviousSize->equalsInt(Len)) + return MD; + + NextNodes[3] = + ConstantAsMetadata::get(ConstantInt::get(PreviousSize->getType(), Len)); + return MDNode::get(MD->getContext(), NextNodes); +} diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp index 34358739f9a8..c14bdb8bc262 100644 --- a/llvm/lib/Analysis/ValueTracking.cpp +++ b/llvm/lib/Analysis/ValueTracking.cpp @@ -4559,8 +4559,8 @@ bool llvm::isSafeToSpeculativelyExecute(const Value *V, return false; const DataLayout &DL = LI->getModule()->getDataLayout(); return isDereferenceableAndAlignedPointer( - LI->getPointerOperand(), LI->getType(), MaybeAlign(LI->getAlign()), DL, - CtxI, DT, TLI); + LI->getPointerOperand(), LI->getType(), LI->getAlign(), DL, CtxI, DT, + TLI); } case Instruction::Call: { auto *CI = cast<const CallInst>(Inst); diff --git a/llvm/lib/BinaryFormat/AMDGPUMetadataVerifier.cpp b/llvm/lib/BinaryFormat/AMDGPUMetadataVerifier.cpp index 99d2c8221281..0d28d93c93c0 100644 --- a/llvm/lib/BinaryFormat/AMDGPUMetadataVerifier.cpp +++ b/llvm/lib/BinaryFormat/AMDGPUMetadataVerifier.cpp @@ -117,15 +117,28 @@ bool MetadataVerifier::verifyKernelArgs(msgpack::DocNode &Node) { .Case("image", true) .Case("pipe", true) .Case("queue", true) + .Case("hidden_block_count_x", true) + .Case("hidden_block_count_y", true) + .Case("hidden_block_count_z", true) + .Case("hidden_group_size_x", true) + .Case("hidden_group_size_y", true) + .Case("hidden_group_size_z", true) + .Case("hidden_remainder_x", true) + .Case("hidden_remainder_y", true) + .Case("hidden_remainder_z", true) .Case("hidden_global_offset_x", true) .Case("hidden_global_offset_y", true) .Case("hidden_global_offset_z", true) + .Case("hidden_grid_dims", true) .Case("hidden_none", true) .Case("hidden_printf_buffer", true) .Case("hidden_hostcall_buffer", true) .Case("hidden_default_queue", true) .Case("hidden_completion_action", true) .Case("hidden_multigrid_sync_arg", true) + .Case("hidden_private_base", true) + .Case("hidden_shared_base", true) + .Case("hidden_queue_ptr", true) .Default(false); })) return false; diff --git a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp index eb4e09ea3a26..4bba0b356675 100644 --- a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp +++ b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp @@ -4669,7 +4669,7 @@ void IndexBitcodeWriter::write() { // where it will be written in a new bitcode block. This is used when // writing the combined index file for ThinLTO. When writing a subset of the // index for a distributed backend, provide a \p ModuleToSummariesForIndex map. -void llvm::WriteIndexToFile( +void llvm::writeIndexToFile( const ModuleSummaryIndex &Index, raw_ostream &Out, const std::map<std::string, GVSummaryMapTy> *ModuleToSummariesForIndex) { SmallVector<char, 0> Buffer; @@ -4829,7 +4829,7 @@ void BitcodeWriter::writeThinLinkBitcode(const Module &M, // Write the specified thin link bitcode file to the given raw output stream, // where it will be written in a new bitcode block. This is used when // writing the per-module index file for ThinLTO. -void llvm::WriteThinLinkBitcodeToFile(const Module &M, raw_ostream &Out, +void llvm::writeThinLinkBitcodeToFile(const Module &M, raw_ostream &Out, const ModuleSummaryIndex &Index, const ModuleHash &ModHash) { SmallVector<char, 0> Buffer; @@ -4881,7 +4881,7 @@ static const char *getSectionNameForCommandline(const Triple &T) { llvm_unreachable("Unimplemented ObjectFormatType"); } -void llvm::EmbedBitcodeInModule(llvm::Module &M, llvm::MemoryBufferRef Buf, +void llvm::embedBitcodeInModule(llvm::Module &M, llvm::MemoryBufferRef Buf, bool EmbedBitcode, bool EmbedCmdline, const std::vector<uint8_t> &CmdArgs) { // Save llvm.compiler.used and remove it. diff --git a/llvm/lib/CodeGen/Analysis.cpp b/llvm/lib/CodeGen/Analysis.cpp index e8fef505e43d..cdf5586766da 100644 --- a/llvm/lib/CodeGen/Analysis.cpp +++ b/llvm/lib/CodeGen/Analysis.cpp @@ -585,7 +585,7 @@ bool llvm::attributesPermitTailCall(const Function *F, const Instruction *I, // goes, they shouldn't affect whether the call is a tail call. for (const auto &Attr : {Attribute::Alignment, Attribute::Dereferenceable, Attribute::DereferenceableOrNull, Attribute::NoAlias, - Attribute::NonNull}) { + Attribute::NonNull, Attribute::NoUndef}) { CallerAttrs.removeAttribute(Attr); CalleeAttrs.removeAttribute(Attr); } diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index 4f3f798fe6f8..3e8e190eecc3 100644 --- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -1647,8 +1647,18 @@ void AsmPrinter::emitGlobalAlias(Module &M, const GlobalAlias &GA) { // Set the symbol type to function if the alias has a function type. // This affects codegen when the aliasee is not a function. - if (IsFunction) + if (IsFunction) { OutStreamer->emitSymbolAttribute(Name, MCSA_ELF_TypeFunction); + if (TM.getTargetTriple().isOSBinFormatCOFF()) { + OutStreamer->BeginCOFFSymbolDef(Name); + OutStreamer->EmitCOFFSymbolStorageClass( + GA.hasLocalLinkage() ? COFF::IMAGE_SYM_CLASS_STATIC + : COFF::IMAGE_SYM_CLASS_EXTERNAL); + OutStreamer->EmitCOFFSymbolType(COFF::IMAGE_SYM_DTYPE_FUNCTION + << COFF::SCT_COMPLEX_TYPE_SHIFT); + OutStreamer->EndCOFFSymbolDef(); + } + } emitVisibility(Name, GA.getVisibility()); diff --git a/llvm/lib/CodeGen/AsmPrinter/DIE.cpp b/llvm/lib/CodeGen/AsmPrinter/DIE.cpp index 1a0256f30d41..396322c4979d 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DIE.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/DIE.cpp @@ -314,8 +314,7 @@ unsigned DIE::computeOffsetsAndAbbrevs(const dwarf::FormParams &FormParams, //===----------------------------------------------------------------------===// // DIEUnit Implementation //===----------------------------------------------------------------------===// -DIEUnit::DIEUnit(dwarf::Tag UnitTag) - : Die(UnitTag), Section(nullptr), Offset(0) { +DIEUnit::DIEUnit(dwarf::Tag UnitTag) : Die(UnitTag) { Die.Owner = this; assert((UnitTag == dwarf::DW_TAG_compile_unit || UnitTag == dwarf::DW_TAG_skeleton_unit || diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp index e36b7e2ae885..63343d2519f9 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp @@ -33,8 +33,7 @@ #include "llvm/Target/TargetOptions.h" using namespace llvm; -DwarfCFIExceptionBase::DwarfCFIExceptionBase(AsmPrinter *A) - : EHStreamer(A), shouldEmitCFI(false), hasEmittedCFISections(false) {} +DwarfCFIExceptionBase::DwarfCFIExceptionBase(AsmPrinter *A) : EHStreamer(A) {} void DwarfCFIExceptionBase::markFunctionEnd() { endFragment(); @@ -52,8 +51,7 @@ void DwarfCFIExceptionBase::endFragment() { } DwarfCFIException::DwarfCFIException(AsmPrinter *A) - : DwarfCFIExceptionBase(A), shouldEmitPersonality(false), - forceEmitPersonality(false), shouldEmitLSDA(false) {} + : DwarfCFIExceptionBase(A) {} DwarfCFIException::~DwarfCFIException() {} diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp index 680b9586228f..609b568f28be 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp @@ -3367,8 +3367,7 @@ void DwarfDebug::addDwarfTypeUnitType(DwarfCompileUnit &CU, // Fast path if we're building some type units and one has already used the // address pool we know we're going to throw away all this work anyway, so // don't bother building dependent types. - if (!TypeUnitsUnderConstruction.empty() && - (AddrPool.hasBeenUsed() || SeenLocalType)) + if (!TypeUnitsUnderConstruction.empty() && AddrPool.hasBeenUsed()) return; auto Ins = TypeSignatures.insert(std::make_pair(CTy, 0)); @@ -3379,7 +3378,6 @@ void DwarfDebug::addDwarfTypeUnitType(DwarfCompileUnit &CU, bool TopLevelType = TypeUnitsUnderConstruction.empty(); AddrPool.resetUsedFlag(); - SeenLocalType = false; auto OwnedUnit = std::make_unique<DwarfTypeUnit>(CU, Asm, this, &InfoHolder, getDwoLineTable(CU)); @@ -3423,7 +3421,7 @@ void DwarfDebug::addDwarfTypeUnitType(DwarfCompileUnit &CU, // Types referencing entries in the address table cannot be placed in type // units. - if (AddrPool.hasBeenUsed() || SeenLocalType) { + if (AddrPool.hasBeenUsed()) { // Remove all the types built while building this type. // This is pessimistic as some of these types might not be dependent on @@ -3451,18 +3449,14 @@ void DwarfDebug::addDwarfTypeUnitType(DwarfCompileUnit &CU, DwarfDebug::NonTypeUnitContext::NonTypeUnitContext(DwarfDebug *DD) : DD(DD), - TypeUnitsUnderConstruction(std::move(DD->TypeUnitsUnderConstruction)), - AddrPoolUsed(DD->AddrPool.hasBeenUsed()), - SeenLocalType(DD->SeenLocalType) { + TypeUnitsUnderConstruction(std::move(DD->TypeUnitsUnderConstruction)), AddrPoolUsed(DD->AddrPool.hasBeenUsed()) { DD->TypeUnitsUnderConstruction.clear(); DD->AddrPool.resetUsedFlag(); - DD->SeenLocalType = false; } DwarfDebug::NonTypeUnitContext::~NonTypeUnitContext() { DD->TypeUnitsUnderConstruction = std::move(TypeUnitsUnderConstruction); DD->AddrPool.resetUsedFlag(AddrPoolUsed); - DD->SeenLocalType = SeenLocalType; } DwarfDebug::NonTypeUnitContext DwarfDebug::enterNonTypeUnitContext() { diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h index 0043000652e8..4e1a1b1e068d 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h @@ -433,7 +433,6 @@ private: DenseMap<const DIStringType *, unsigned> StringTypeLocMap; AddressPool AddrPool; - bool SeenLocalType = false; /// Accelerator tables. AccelTable<DWARF5AccelTableData> AccelDebugNames; @@ -672,7 +671,6 @@ public: DwarfDebug *DD; decltype(DwarfDebug::TypeUnitsUnderConstruction) TypeUnitsUnderConstruction; bool AddrPoolUsed; - bool SeenLocalType; friend class DwarfDebug; NonTypeUnitContext(DwarfDebug *DD); public: @@ -681,7 +679,6 @@ public: }; NonTypeUnitContext enterNonTypeUnitContext(); - void seenLocalType() { SeenLocalType = true; } /// Add a label so that arange data can be generated for it. void addArangeLabel(SymbolCU SCU) { ArangeLabels.push_back(SCU); } diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfException.h b/llvm/lib/CodeGen/AsmPrinter/DwarfException.h index 4defa8a30855..e5cda4739fde 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DwarfException.h +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfException.h @@ -26,9 +26,9 @@ protected: DwarfCFIExceptionBase(AsmPrinter *A); /// Per-function flag to indicate if frame CFI info should be emitted. - bool shouldEmitCFI; + bool shouldEmitCFI = false; /// Per-module flag to indicate if .cfi_section has beeen emitted. - bool hasEmittedCFISections; + bool hasEmittedCFISections = false; void markFunctionEnd() override; void endFragment() override; @@ -36,13 +36,13 @@ protected: class LLVM_LIBRARY_VISIBILITY DwarfCFIException : public DwarfCFIExceptionBase { /// Per-function flag to indicate if .cfi_personality should be emitted. - bool shouldEmitPersonality; + bool shouldEmitPersonality = false; /// Per-function flag to indicate if .cfi_personality must be emitted. - bool forceEmitPersonality; + bool forceEmitPersonality = false; /// Per-function flag to indicate if .cfi_lsda should be emitted. - bool shouldEmitLSDA; + bool shouldEmitLSDA = false; public: //===--------------------------------------------------------------------===// diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp index ee932d105107..fe438102ee98 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp @@ -287,9 +287,17 @@ bool DwarfExpression::addMachineRegExpression(const TargetRegisterInfo &TRI, // expression representing a value, rather than a location. if ((!isParameterValue() && !isMemoryLocation() && !HasComplexExpression) || isEntryValue()) { + auto FragmentInfo = ExprCursor.getFragmentInfo(); + unsigned RegSize = 0; for (auto &Reg : DwarfRegs) { + RegSize += Reg.SubRegSize; if (Reg.DwarfRegNo >= 0) addReg(Reg.DwarfRegNo, Reg.Comment); + if (FragmentInfo) + if (RegSize > FragmentInfo->SizeInBits) + // If the register is larger than the current fragment stop + // once the fragment is covered. + break; addOpPiece(Reg.SubRegSize); } diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp index 15d90c54adfc..5a2bd479f277 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp @@ -89,8 +89,7 @@ bool DIEDwarfExpression::isFrameRegister(const TargetRegisterInfo &TRI, DwarfUnit::DwarfUnit(dwarf::Tag UnitTag, const DICompileUnit *Node, AsmPrinter *A, DwarfDebug *DW, DwarfFile *DWU) - : DIEUnit(UnitTag), CUNode(Node), Asm(A), DD(DW), DU(DWU), - IndexTyDie(nullptr) {} + : DIEUnit(UnitTag), CUNode(Node), Asm(A), DD(DW), DU(DWU) {} DwarfTypeUnit::DwarfTypeUnit(DwarfCompileUnit &CU, AsmPrinter *A, DwarfDebug *DW, DwarfFile *DWU, @@ -597,8 +596,10 @@ DIE *DwarfUnit::createTypeDIE(const DIScope *Context, DIE &ContextDIE, // Skip updating the accelerator tables since this is not the full type. if (MDString *TypeId = CTy->getRawIdentifier()) DD->addDwarfTypeUnitType(getCU(), TypeId->getString(), TyDIE, CTy); - else + else { + auto X = DD->enterNonTypeUnitContext(); finishNonUnitTypeDIE(TyDIE, CTy); + } return &TyDIE; } constructTypeDIE(TyDIE, CTy); @@ -1852,23 +1853,5 @@ void DwarfTypeUnit::finishNonUnitTypeDIE(DIE& D, const DICompositeType *CTy) { addString(D, dwarf::DW_AT_name, Name); if (Name.startswith("_STN") || !Name.contains('<')) addTemplateParams(D, CTy->getTemplateParams()); - // If the type is in an anonymous namespace, we can't reference it from a TU - // (since the type would be CU local and the TU doesn't specify which TU has - // the appropriate type definition) - so flag this emission as such and skip - // the rest of the emission now since we're going to throw out all this work - // and put the outer/referencing type in the CU instead. - // FIXME: Probably good to generalize this to a DICompositeType flag populated - // by the frontend, then we could use that to have types that can have - // decl+def merged by LTO but where the definition still doesn't go in a type - // unit because the type has only one definition. - for (DIScope *S = CTy->getScope(); S; S = S->getScope()) { - if (auto *NS = dyn_cast<DINamespace>(S)) { - if (NS->getName().empty()) { - DD->seenLocalType(); - break; - } - } - } - auto X = DD->enterNonTypeUnitContext(); getCU().createTypeDIE(CTy); } diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h b/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h index 330f3bacca43..48d63d126701 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h @@ -51,7 +51,7 @@ protected: DwarfFile *DU; /// An anonymous type for index type. Owned by DIEUnit. - DIE *IndexTyDie; + DIE *IndexTyDie = nullptr; /// Tracks the mapping of unit level debug information variables to debug /// information entries. diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp index 28f24e5ea908..c888adeafca5 100644 --- a/llvm/lib/CodeGen/CodeGenPrepare.cpp +++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp @@ -3446,7 +3446,7 @@ private: bool AllAddrModesTrivial = true; /// Common Type for all different fields in addressing modes. - Type *CommonType; + Type *CommonType = nullptr; /// SimplifyQuery for simplifyInstruction utility. const SimplifyQuery &SQ; @@ -3456,7 +3456,7 @@ private: public: AddressingModeCombiner(const SimplifyQuery &_SQ, Value *OriginalValue) - : CommonType(nullptr), SQ(_SQ), Original(OriginalValue) {} + : SQ(_SQ), Original(OriginalValue) {} /// Get the combined AddrMode const ExtAddrMode &getAddrMode() const { diff --git a/llvm/lib/CodeGen/EarlyIfConversion.cpp b/llvm/lib/CodeGen/EarlyIfConversion.cpp index 0b5469b02637..6a0da4dad3c1 100644 --- a/llvm/lib/CodeGen/EarlyIfConversion.cpp +++ b/llvm/lib/CodeGen/EarlyIfConversion.cpp @@ -111,12 +111,11 @@ public: /// Information about each phi in the Tail block. struct PHIInfo { MachineInstr *PHI; - unsigned TReg, FReg; + unsigned TReg = 0, FReg = 0; // Latencies from Cond+Branch, TReg, and FReg to DstReg. - int CondCycles, TCycles, FCycles; + int CondCycles = 0, TCycles = 0, FCycles = 0; - PHIInfo(MachineInstr *phi) - : PHI(phi), TReg(0), FReg(0), CondCycles(0), TCycles(0), FCycles(0) {} + PHIInfo(MachineInstr *phi) : PHI(phi) {} }; SmallVector<PHIInfo, 8> PHIs; diff --git a/llvm/lib/CodeGen/ExpandMemCmp.cpp b/llvm/lib/CodeGen/ExpandMemCmp.cpp index d0c2b8c267ff..60ee1812ee2c 100644 --- a/llvm/lib/CodeGen/ExpandMemCmp.cpp +++ b/llvm/lib/CodeGen/ExpandMemCmp.cpp @@ -70,8 +70,8 @@ class MemCmpExpansion { CallInst *const CI; ResultBlock ResBlock; const uint64_t Size; - unsigned MaxLoadSize; - uint64_t NumLoadsNonOneByte; + unsigned MaxLoadSize = 0; + uint64_t NumLoadsNonOneByte = 0; const uint64_t NumLoadsPerBlockForZeroCmp; std::vector<BasicBlock *> LoadCmpBlocks; BasicBlock *EndBlock; @@ -219,8 +219,7 @@ MemCmpExpansion::MemCmpExpansion( const TargetTransformInfo::MemCmpExpansionOptions &Options, const bool IsUsedForZeroCmp, const DataLayout &TheDataLayout, DomTreeUpdater *DTU) - : CI(CI), Size(Size), MaxLoadSize(0), NumLoadsNonOneByte(0), - NumLoadsPerBlockForZeroCmp(Options.NumLoadsPerBlock), + : CI(CI), Size(Size), NumLoadsPerBlockForZeroCmp(Options.NumLoadsPerBlock), IsUsedForZeroCmp(IsUsedForZeroCmp), DL(TheDataLayout), DTU(DTU), Builder(CI) { assert(Size > 0 && "zero blocks"); diff --git a/llvm/lib/CodeGen/GlobalISel/LegacyLegalizerInfo.cpp b/llvm/lib/CodeGen/GlobalISel/LegacyLegalizerInfo.cpp index 727d33fe4a40..6271a4514c27 100644 --- a/llvm/lib/CodeGen/GlobalISel/LegacyLegalizerInfo.cpp +++ b/llvm/lib/CodeGen/GlobalISel/LegacyLegalizerInfo.cpp @@ -64,7 +64,7 @@ raw_ostream &llvm::operator<<(raw_ostream &OS, LegacyLegalizeAction Action) { return OS; } -LegacyLegalizerInfo::LegacyLegalizerInfo() : TablesInitialized(false) { +LegacyLegalizerInfo::LegacyLegalizerInfo() { // Set defaults. // FIXME: these two (G_ANYEXT and G_TRUNC?) can be legalized to the // fundamental load/store Jakob proposed. Once loads & stores are supported. diff --git a/llvm/lib/CodeGen/IfConversion.cpp b/llvm/lib/CodeGen/IfConversion.cpp index 681e2f3dc848..1b20d1da20ad 100644 --- a/llvm/lib/CodeGen/IfConversion.cpp +++ b/llvm/lib/CodeGen/IfConversion.cpp @@ -1211,11 +1211,11 @@ bool IfConverter::FeasibilityAnalysis(BBInfo &BBI, void IfConverter::AnalyzeBlock( MachineBasicBlock &MBB, std::vector<std::unique_ptr<IfcvtToken>> &Tokens) { struct BBState { - BBState(MachineBasicBlock &MBB) : MBB(&MBB), SuccsAnalyzed(false) {} + BBState(MachineBasicBlock &MBB) : MBB(&MBB) {} MachineBasicBlock *MBB; /// This flag is true if MBB's successors have been analyzed. - bool SuccsAnalyzed; + bool SuccsAnalyzed = false; }; // Push MBB to the stack. diff --git a/llvm/lib/CodeGen/InterleavedLoadCombinePass.cpp b/llvm/lib/CodeGen/InterleavedLoadCombinePass.cpp index 2ee9379cb286..230c6846dde2 100644 --- a/llvm/lib/CodeGen/InterleavedLoadCombinePass.cpp +++ b/llvm/lib/CodeGen/InterleavedLoadCombinePass.cpp @@ -656,10 +656,10 @@ public: }; /// Basic-block the load instructions are within - BasicBlock *BB; + BasicBlock *BB = nullptr; /// Pointer value of all participation load instructions - Value *PV; + Value *PV = nullptr; /// Participating load instructions std::set<LoadInst *> LIs; @@ -668,7 +668,7 @@ public: std::set<Instruction *> Is; /// Final shuffle-vector instruction - ShuffleVectorInst *SVI; + ShuffleVectorInst *SVI = nullptr; /// Information of the offset for each vector element ElementInfo *EI; @@ -676,8 +676,7 @@ public: /// Vector Type FixedVectorType *const VTy; - VectorInfo(FixedVectorType *VTy) - : BB(nullptr), PV(nullptr), SVI(nullptr), VTy(VTy) { + VectorInfo(FixedVectorType *VTy) : VTy(VTy) { EI = new ElementInfo[VTy->getNumElements()]; } diff --git a/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp b/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp index 8a190e769941..0eb6100230bd 100644 --- a/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp +++ b/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp @@ -274,6 +274,13 @@ public: // Map of the preferred location for each value. DenseMap<ValueIDNum, LocIdx> ValueToLoc; + + // Initialized the preferred-location map with illegal locations, to be + // filled in later. + for (auto &VLoc : VLocs) + if (VLoc.second.Kind == DbgValue::Def) + ValueToLoc.insert({VLoc.second.ID, LocIdx::MakeIllegalLoc()}); + ActiveMLocs.reserve(VLocs.size()); ActiveVLocs.reserve(VLocs.size()); @@ -285,21 +292,20 @@ public: ValueIDNum &VNum = MLocs[Idx.asU64()]; VarLocs.push_back(VNum); - // Short-circuit unnecessary preferred location update. - if (VLocs.empty()) + // Is there a variable that wants a location for this value? If not, skip. + auto VIt = ValueToLoc.find(VNum); + if (VIt == ValueToLoc.end()) continue; - auto it = ValueToLoc.find(VNum); + LocIdx CurLoc = VIt->second; // In order of preference, pick: // * Callee saved registers, // * Other registers, // * Spill slots. - if (it == ValueToLoc.end() || MTracker->isSpill(it->second) || - (!isCalleeSaved(it->second) && isCalleeSaved(Idx.asU64()))) { + if (CurLoc.isIllegal() || MTracker->isSpill(CurLoc) || + (!isCalleeSaved(CurLoc) && isCalleeSaved(Idx.asU64()))) { // Insert, or overwrite if insertion failed. - auto PrefLocRes = ValueToLoc.insert(std::make_pair(VNum, Idx)); - if (!PrefLocRes.second) - PrefLocRes.first->second = Idx; + VIt->second = Idx; } } @@ -314,7 +320,7 @@ public: // If the value has no location, we can't make a variable location. const ValueIDNum &Num = Var.second.ID; auto ValuesPreferredLoc = ValueToLoc.find(Num); - if (ValuesPreferredLoc == ValueToLoc.end()) { + if (ValuesPreferredLoc->second.isIllegal()) { // If it's a def that occurs in this block, register it as a // use-before-def to be resolved as we step through the block. if (Num.getBlock() == (unsigned)MBB.getNumber() && !Num.isPHI()) @@ -1374,18 +1380,20 @@ void InstrRefBasedLDV::transferRegisterDef(MachineInstr &MI) { // Look for any clobbers performed by a register mask. Only test locations // that are actually being tracked. - for (auto L : MTracker->locations()) { - // Stack locations can't be clobbered by regmasks. - if (MTracker->isSpill(L.Idx)) - continue; + if (!RegMaskPtrs.empty()) { + for (auto L : MTracker->locations()) { + // Stack locations can't be clobbered by regmasks. + if (MTracker->isSpill(L.Idx)) + continue; - Register Reg = MTracker->LocIdxToLocID[L.Idx]; - if (IgnoreSPAlias(Reg)) - continue; + Register Reg = MTracker->LocIdxToLocID[L.Idx]; + if (IgnoreSPAlias(Reg)) + continue; - for (auto *MO : RegMaskPtrs) - if (MO->clobbersPhysReg(Reg)) - TTracker->clobberMloc(L.Idx, MI.getIterator(), false); + for (auto *MO : RegMaskPtrs) + if (MO->clobbersPhysReg(Reg)) + TTracker->clobberMloc(L.Idx, MI.getIterator(), false); + } } // Tell TTracker about any folded stack store. @@ -2212,40 +2220,6 @@ void InstrRefBasedLDV::buildMLocValueMap( // redundant PHIs. } -// Boilerplate for feeding MachineBasicBlocks into IDF calculator. Provide -// template specialisations for graph traits and a successor enumerator. -namespace llvm { -template <> struct GraphTraits<MachineBasicBlock> { - using NodeRef = MachineBasicBlock *; - using ChildIteratorType = MachineBasicBlock::succ_iterator; - - static NodeRef getEntryNode(MachineBasicBlock *BB) { return BB; } - static ChildIteratorType child_begin(NodeRef N) { return N->succ_begin(); } - static ChildIteratorType child_end(NodeRef N) { return N->succ_end(); } -}; - -template <> struct GraphTraits<const MachineBasicBlock> { - using NodeRef = const MachineBasicBlock *; - using ChildIteratorType = MachineBasicBlock::const_succ_iterator; - - static NodeRef getEntryNode(const MachineBasicBlock *BB) { return BB; } - static ChildIteratorType child_begin(NodeRef N) { return N->succ_begin(); } - static ChildIteratorType child_end(NodeRef N) { return N->succ_end(); } -}; - -using MachineDomTreeBase = DomTreeBase<MachineBasicBlock>::NodeType; -using MachineDomTreeChildGetter = - typename IDFCalculatorDetail::ChildrenGetterTy<MachineDomTreeBase, false>; - -namespace IDFCalculatorDetail { -template <> -typename MachineDomTreeChildGetter::ChildrenTy -MachineDomTreeChildGetter::get(const NodeRef &N) { - return {N->succ_begin(), N->succ_end()}; -} -} // namespace IDFCalculatorDetail -} // namespace llvm - void InstrRefBasedLDV::BlockPHIPlacement( const SmallPtrSetImpl<MachineBasicBlock *> &AllBlocks, const SmallPtrSetImpl<MachineBasicBlock *> &DefBlocks, @@ -2253,8 +2227,7 @@ void InstrRefBasedLDV::BlockPHIPlacement( // Apply IDF calculator to the designated set of location defs, storing // required PHIs into PHIBlocks. Uses the dominator tree stored in the // InstrRefBasedLDV object. - IDFCalculatorDetail::ChildrenGetterTy<MachineDomTreeBase, false> foo; - IDFCalculatorBase<MachineDomTreeBase, false> IDF(DomTree->getBase(), foo); + IDFCalculatorBase<MachineBasicBlock, false> IDF(DomTree->getBase()); IDF.setLiveInBlocks(AllBlocks); IDF.setDefiningBlocks(DefBlocks); @@ -2465,8 +2438,71 @@ bool InstrRefBasedLDV::vlocJoin( } } -void InstrRefBasedLDV::buildVLocValueMap(const DILocation *DILoc, - const SmallSet<DebugVariable, 4> &VarsWeCareAbout, +void InstrRefBasedLDV::getBlocksForScope( + const DILocation *DILoc, + SmallPtrSetImpl<const MachineBasicBlock *> &BlocksToExplore, + const SmallPtrSetImpl<MachineBasicBlock *> &AssignBlocks) { + // Get the set of "normal" in-lexical-scope blocks. + LS.getMachineBasicBlocks(DILoc, BlocksToExplore); + + // VarLoc LiveDebugValues tracks variable locations that are defined in + // blocks not in scope. This is something we could legitimately ignore, but + // lets allow it for now for the sake of coverage. + BlocksToExplore.insert(AssignBlocks.begin(), AssignBlocks.end()); + + // Storage for artificial blocks we intend to add to BlocksToExplore. + DenseSet<const MachineBasicBlock *> ToAdd; + + // To avoid needlessly dropping large volumes of variable locations, propagate + // variables through aritifical blocks, i.e. those that don't have any + // instructions in scope at all. To accurately replicate VarLoc + // LiveDebugValues, this means exploring all artificial successors too. + // Perform a depth-first-search to enumerate those blocks. + for (auto *MBB : BlocksToExplore) { + // Depth-first-search state: each node is a block and which successor + // we're currently exploring. + SmallVector<std::pair<const MachineBasicBlock *, + MachineBasicBlock::const_succ_iterator>, + 8> + DFS; + + // Find any artificial successors not already tracked. + for (auto *succ : MBB->successors()) { + if (BlocksToExplore.count(succ)) + continue; + if (!ArtificialBlocks.count(succ)) + continue; + ToAdd.insert(succ); + DFS.push_back({succ, succ->succ_begin()}); + } + + // Search all those blocks, depth first. + while (!DFS.empty()) { + const MachineBasicBlock *CurBB = DFS.back().first; + MachineBasicBlock::const_succ_iterator &CurSucc = DFS.back().second; + // Walk back if we've explored this blocks successors to the end. + if (CurSucc == CurBB->succ_end()) { + DFS.pop_back(); + continue; + } + + // If the current successor is artificial and unexplored, descend into + // it. + if (!ToAdd.count(*CurSucc) && ArtificialBlocks.count(*CurSucc)) { + ToAdd.insert(*CurSucc); + DFS.push_back({*CurSucc, (*CurSucc)->succ_begin()}); + continue; + } + + ++CurSucc; + } + }; + + BlocksToExplore.insert(ToAdd.begin(), ToAdd.end()); +} + +void InstrRefBasedLDV::buildVLocValueMap( + const DILocation *DILoc, const SmallSet<DebugVariable, 4> &VarsWeCareAbout, SmallPtrSetImpl<MachineBasicBlock *> &AssignBlocks, LiveInsT &Output, ValueIDNum **MOutLocs, ValueIDNum **MInLocs, SmallVectorImpl<VLocTracker> &AllTheVLocs) { @@ -2490,74 +2526,7 @@ void InstrRefBasedLDV::buildVLocValueMap(const DILocation *DILoc, return BBToOrder[A] < BBToOrder[B]; }; - LS.getMachineBasicBlocks(DILoc, BlocksToExplore); - - // A separate container to distinguish "blocks we're exploring" versus - // "blocks that are potentially in scope. See comment at start of vlocJoin. - SmallPtrSet<const MachineBasicBlock *, 8> InScopeBlocks = BlocksToExplore; - - // VarLoc LiveDebugValues tracks variable locations that are defined in - // blocks not in scope. This is something we could legitimately ignore, but - // lets allow it for now for the sake of coverage. - BlocksToExplore.insert(AssignBlocks.begin(), AssignBlocks.end()); - - // We also need to propagate variable values through any artificial blocks - // that immediately follow blocks in scope. - DenseSet<const MachineBasicBlock *> ToAdd; - - // Helper lambda: For a given block in scope, perform a depth first search - // of all the artificial successors, adding them to the ToAdd collection. - auto AccumulateArtificialBlocks = - [this, &ToAdd, &BlocksToExplore, - &InScopeBlocks](const MachineBasicBlock *MBB) { - // Depth-first-search state: each node is a block and which successor - // we're currently exploring. - SmallVector<std::pair<const MachineBasicBlock *, - MachineBasicBlock::const_succ_iterator>, - 8> - DFS; - - // Find any artificial successors not already tracked. - for (auto *succ : MBB->successors()) { - if (BlocksToExplore.count(succ) || InScopeBlocks.count(succ)) - continue; - if (!ArtificialBlocks.count(succ)) - continue; - ToAdd.insert(succ); - DFS.push_back(std::make_pair(succ, succ->succ_begin())); - } - - // Search all those blocks, depth first. - while (!DFS.empty()) { - const MachineBasicBlock *CurBB = DFS.back().first; - MachineBasicBlock::const_succ_iterator &CurSucc = DFS.back().second; - // Walk back if we've explored this blocks successors to the end. - if (CurSucc == CurBB->succ_end()) { - DFS.pop_back(); - continue; - } - - // If the current successor is artificial and unexplored, descend into - // it. - if (!ToAdd.count(*CurSucc) && ArtificialBlocks.count(*CurSucc)) { - ToAdd.insert(*CurSucc); - DFS.push_back(std::make_pair(*CurSucc, (*CurSucc)->succ_begin())); - continue; - } - - ++CurSucc; - } - }; - - // Search in-scope blocks and those containing a DBG_VALUE from this scope - // for artificial successors. - for (auto *MBB : BlocksToExplore) - AccumulateArtificialBlocks(MBB); - for (auto *MBB : InScopeBlocks) - AccumulateArtificialBlocks(MBB); - - BlocksToExplore.insert(ToAdd.begin(), ToAdd.end()); - InScopeBlocks.insert(ToAdd.begin(), ToAdd.end()); + getBlocksForScope(DILoc, BlocksToExplore, AssignBlocks); // Single block scope: not interesting! No propagation at all. Note that // this could probably go above ArtificialBlocks without damage, but @@ -2628,7 +2597,15 @@ void InstrRefBasedLDV::buildVLocValueMap(const DILocation *DILoc, SmallVector<MachineBasicBlock *, 32> PHIBlocks; - // Request the set of PHIs we should insert for this variable. + // Request the set of PHIs we should insert for this variable. If there's + // only one value definition, things are very simple. + if (DefBlocks.size() == 1) { + placePHIsForSingleVarDefinition(MutBlocksToExplore, *DefBlocks.begin(), + AllTheVLocs, Var, Output); + continue; + } + + // Otherwise: we need to place PHIs through SSA and propagate values. BlockPHIPlacement(MutBlocksToExplore, DefBlocks, PHIBlocks); // Insert PHIs into the per-block live-in tables for this variable. @@ -2769,6 +2746,39 @@ void InstrRefBasedLDV::buildVLocValueMap(const DILocation *DILoc, BlocksToExplore.clear(); } +void InstrRefBasedLDV::placePHIsForSingleVarDefinition( + const SmallPtrSetImpl<MachineBasicBlock *> &InScopeBlocks, + MachineBasicBlock *AssignMBB, SmallVectorImpl<VLocTracker> &AllTheVLocs, + const DebugVariable &Var, LiveInsT &Output) { + // If there is a single definition of the variable, then working out it's + // value everywhere is very simple: it's every block dominated by the + // definition. At the dominance frontier, the usual algorithm would: + // * Place PHIs, + // * Propagate values into them, + // * Find there's no incoming variable value from the other incoming branches + // of the dominance frontier, + // * Specify there's no variable value in blocks past the frontier. + // This is a common case, hence it's worth special-casing it. + + // Pick out the variables value from the block transfer function. + VLocTracker &VLocs = AllTheVLocs[AssignMBB->getNumber()]; + auto ValueIt = VLocs.Vars.find(Var); + const DbgValue &Value = ValueIt->second; + + // Assign the variable value to entry to each dominated block that's in scope. + // Skip the definition block -- it's assigned the variable value in the middle + // of the block somewhere. + for (auto *ScopeBlock : InScopeBlocks) { + if (!DomTree->properlyDominates(AssignMBB, ScopeBlock)) + continue; + + Output[ScopeBlock->getNumber()].push_back({Var, Value}); + } + + // All blocks that aren't dominated have no live-in value, thus no variable + // value will be given to them. +} + #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) void InstrRefBasedLDV::dump_mloc_transfer( const MLocTransferMap &mloc_transfer) const { @@ -2806,39 +2816,7 @@ void InstrRefBasedLDV::emitLocations( } } - // Go through all the transfers recorded in the TransferTracker -- this is - // both the live-ins to a block, and any movements of values that happen - // in the middle. - for (const auto &P : TTracker->Transfers) { - // We have to insert DBG_VALUEs in a consistent order, otherwise they - // appear in DWARF in different orders. Use the order that they appear - // when walking through each block / each instruction, stored in - // AllVarsNumbering. - SmallVector<std::pair<unsigned, MachineInstr *>> Insts; - for (MachineInstr *MI : P.Insts) { - DebugVariable Var(MI->getDebugVariable(), MI->getDebugExpression(), - MI->getDebugLoc()->getInlinedAt()); - Insts.emplace_back(AllVarsNumbering.find(Var)->second, MI); - } - llvm::sort(Insts, - [](const auto &A, const auto &B) { return A.first < B.first; }); - - // Insert either before or after the designated point... - if (P.MBB) { - MachineBasicBlock &MBB = *P.MBB; - for (const auto &Pair : Insts) - MBB.insert(P.Pos, Pair.second); - } else { - // Terminators, like tail calls, can clobber things. Don't try and place - // transfers after them. - if (P.Pos->isTerminator()) - continue; - - MachineBasicBlock &MBB = *P.Pos->getParent(); - for (const auto &Pair : Insts) - MBB.insertAfterBundle(P.Pos, Pair.second); - } - } + emitTransfers(AllVarsNumbering); } void InstrRefBasedLDV::initialSetup(MachineFunction &MF) { @@ -2883,6 +2861,45 @@ void InstrRefBasedLDV::initialSetup(MachineFunction &MF) { #endif } +bool InstrRefBasedLDV::emitTransfers( + DenseMap<DebugVariable, unsigned> &AllVarsNumbering) { + // Go through all the transfers recorded in the TransferTracker -- this is + // both the live-ins to a block, and any movements of values that happen + // in the middle. + for (const auto &P : TTracker->Transfers) { + // We have to insert DBG_VALUEs in a consistent order, otherwise they + // appear in DWARF in different orders. Use the order that they appear + // when walking through each block / each instruction, stored in + // AllVarsNumbering. + SmallVector<std::pair<unsigned, MachineInstr *>> Insts; + for (MachineInstr *MI : P.Insts) { + DebugVariable Var(MI->getDebugVariable(), MI->getDebugExpression(), + MI->getDebugLoc()->getInlinedAt()); + Insts.emplace_back(AllVarsNumbering.find(Var)->second, MI); + } + llvm::sort(Insts, + [](const auto &A, const auto &B) { return A.first < B.first; }); + + // Insert either before or after the designated point... + if (P.MBB) { + MachineBasicBlock &MBB = *P.MBB; + for (const auto &Pair : Insts) + MBB.insert(P.Pos, Pair.second); + } else { + // Terminators, like tail calls, can clobber things. Don't try and place + // transfers after them. + if (P.Pos->isTerminator()) + continue; + + MachineBasicBlock &MBB = *P.Pos->getParent(); + for (const auto &Pair : Insts) + MBB.insertAfterBundle(P.Pos, Pair.second); + } + } + + return TTracker->Transfers.size() != 0; +} + /// Calculate the liveness information for the given machine function and /// extend ranges across basic blocks. bool InstrRefBasedLDV::ExtendRanges(MachineFunction &MF, @@ -2989,14 +3006,14 @@ bool InstrRefBasedLDV::ExtendRanges(MachineFunction &MF, DenseMap<DebugVariable, unsigned> AllVarsNumbering; // Map from one LexicalScope to all the variables in that scope. - DenseMap<const LexicalScope *, SmallSet<DebugVariable, 4>> ScopeToVars; + ScopeToVarsT ScopeToVars; - // Map from One lexical scope to all blocks in that scope. - DenseMap<const LexicalScope *, SmallPtrSet<MachineBasicBlock *, 4>> - ScopeToBlocks; + // Map from One lexical scope to all blocks where assignments happen for + // that scope. + ScopeToAssignBlocksT ScopeToAssignBlocks; - // Store a DILocation that describes a scope. - DenseMap<const LexicalScope *, const DILocation *> ScopeToDILocation; + // Store map of DILocations that describes scopes. + ScopeToDILocT ScopeToDILocation; // To mirror old LiveDebugValues, enumerate variables in RPOT order. Otherwise // the order is unimportant, it just has to be stable. @@ -3016,7 +3033,7 @@ bool InstrRefBasedLDV::ExtendRanges(MachineFunction &MF, AllVarsNumbering.insert(std::make_pair(Var, AllVarsNumbering.size())); ScopeToVars[Scope].insert(Var); - ScopeToBlocks[Scope].insert(VTracker->MBB); + ScopeToAssignBlocks[Scope].insert(VTracker->MBB); ScopeToDILocation[Scope] = ScopeLoc; ++VarAssignCount; } @@ -3040,7 +3057,7 @@ bool InstrRefBasedLDV::ExtendRanges(MachineFunction &MF, // a map of variables to values in SavedLiveIns. for (auto &P : ScopeToVars) { buildVLocValueMap(ScopeToDILocation[P.first], P.second, - ScopeToBlocks[P.first], SavedLiveIns, MOutLocs, MInLocs, + ScopeToAssignBlocks[P.first], SavedLiveIns, MOutLocs, MInLocs, vlocs); } diff --git a/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.h b/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.h index 9e9c0ce394fd..e7383209c027 100644 --- a/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.h +++ b/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.h @@ -779,6 +779,17 @@ public: /// Used as the result type for the variable value dataflow problem. using LiveInsT = SmallVector<SmallVector<VarAndLoc, 8>, 8>; + /// Mapping from lexical scopes to a DILocation in that scope. + using ScopeToDILocT = DenseMap<const LexicalScope *, const DILocation *>; + + /// Mapping from lexical scopes to variables in that scope. + using ScopeToVarsT = DenseMap<const LexicalScope *, SmallSet<DebugVariable, 4>>; + + /// Mapping from lexical scopes to blocks where variables in that scope are + /// assigned. Such blocks aren't necessarily "in" the lexical scope, it's + /// just a block where an assignment happens. + using ScopeToAssignBlocksT = DenseMap<const LexicalScope *, SmallPtrSet<MachineBasicBlock *, 4>>; + private: MachineDominatorTree *DomTree; const TargetRegisterInfo *TRI; @@ -816,7 +827,7 @@ private: /// Blocks which are artificial, i.e. blocks which exclusively contain /// instructions without DebugLocs, or with line 0 locations. - SmallPtrSet<const MachineBasicBlock *, 16> ArtificialBlocks; + SmallPtrSet<MachineBasicBlock *, 16> ArtificialBlocks; // Mapping of blocks to and from their RPOT order. DenseMap<unsigned int, MachineBasicBlock *> OrderToBB; @@ -958,6 +969,15 @@ private: ValueIDNum **MInLocs, SmallVectorImpl<MLocTransferMap> &MLocTransfer); + /// Propagate variable values to blocks in the common case where there's + /// only one value assigned to the variable. This function has better + /// performance as it doesn't have to find the dominance frontier between + /// different assignments. + void placePHIsForSingleVarDefinition( + const SmallPtrSetImpl<MachineBasicBlock *> &InScopeBlocks, + MachineBasicBlock *MBB, SmallVectorImpl<VLocTracker> &AllTheVLocs, + const DebugVariable &Var, LiveInsT &Output); + /// Calculate the iterated-dominance-frontier for a set of defs, using the /// existing LLVM facilities for this. Works for a single "value" or /// machine/variable location. @@ -979,6 +999,19 @@ private: SmallPtrSet<const MachineBasicBlock *, 16> &Visited, ValueIDNum **OutLocs, ValueIDNum *InLocs); + /// Produce a set of blocks that are in the current lexical scope. This means + /// those blocks that contain instructions "in" the scope, blocks where + /// assignments to variables in scope occur, and artificial blocks that are + /// successors to any of the earlier blocks. See https://llvm.org/PR48091 for + /// more commentry on what "in scope" means. + /// \p DILoc A location in the scope that we're fetching blocks for. + /// \p Output Set to put in-scope-blocks into. + /// \p AssignBlocks Blocks known to contain assignments of variables in scope. + void + getBlocksForScope(const DILocation *DILoc, + SmallPtrSetImpl<const MachineBasicBlock *> &Output, + const SmallPtrSetImpl<MachineBasicBlock *> &AssignBlocks); + /// Solve the variable value dataflow problem, for a single lexical scope. /// Uses the algorithm from the file comment to resolve control flow joins /// using PHI placement and value propagation. Reads the locations of machine @@ -1029,6 +1062,12 @@ private: DenseMap<DebugVariable, unsigned> &AllVarsNumbering, const TargetPassConfig &TPC); + /// Take collections of DBG_VALUE instructions stored in TTracker, and + /// install them into their output blocks. Preserves a stable order of + /// DBG_VALUEs produced (which would otherwise cause nondeterminism) through + /// the AllVarsNumbering order. + bool emitTransfers(DenseMap<DebugVariable, unsigned> &AllVarsNumbering); + /// Boilerplate computation of some initial sets, artifical blocks and /// RPOT block ordering. void initialSetup(MachineFunction &MF); diff --git a/llvm/lib/CodeGen/LiveDebugValues/VarLocBasedImpl.cpp b/llvm/lib/CodeGen/LiveDebugValues/VarLocBasedImpl.cpp index b4dd41bbb810..42a0967bce3f 100644 --- a/llvm/lib/CodeGen/LiveDebugValues/VarLocBasedImpl.cpp +++ b/llvm/lib/CodeGen/LiveDebugValues/VarLocBasedImpl.cpp @@ -329,7 +329,7 @@ private: EntryValueKind, EntryValueBackupKind, EntryValueCopyBackupKind - } EVKind; + } EVKind = EntryValueLocKind::NonEntryValueKind; /// The value location. Stored separately to avoid repeatedly /// extracting it from MI. @@ -397,8 +397,7 @@ private: VarLoc(const MachineInstr &MI, LexicalScopes &LS) : Var(MI.getDebugVariable(), MI.getDebugExpression(), MI.getDebugLoc()->getInlinedAt()), - Expr(MI.getDebugExpression()), MI(MI), - EVKind(EntryValueLocKind::NonEntryValueKind) { + Expr(MI.getDebugExpression()), MI(MI) { assert(MI.isDebugValue() && "not a DBG_VALUE"); assert((MI.isDebugValueList() || MI.getNumOperands() == 4) && "malformed DBG_VALUE"); diff --git a/llvm/lib/CodeGen/MLRegallocEvictAdvisor.cpp b/llvm/lib/CodeGen/MLRegallocEvictAdvisor.cpp index a74c57690640..33782c755eb0 100644 --- a/llvm/lib/CodeGen/MLRegallocEvictAdvisor.cpp +++ b/llvm/lib/CodeGen/MLRegallocEvictAdvisor.cpp @@ -220,6 +220,19 @@ void resetInputs(MLModelRunner &Runner) { #undef _RESET } +// Per-live interval components that get aggregated into the feature values that +// will be passed to the evaluator. +struct LIFeatureComponents { + double R = 0; + double W = 0; + double RW = 0; + double IndVarUpdates = 0; + double HintWeights = 0.0; + int64_t NrDefsAndUses = 0; + float HottestBlockFreq = 0.0; + bool IsRemat = false; +}; + using CandidateRegList = std::array<std::pair<MCRegister, bool>, NumberOfInterferences>; using FeaturesListNormalizer = std::array<float, FeatureIDs::FeatureCount>; @@ -227,8 +240,8 @@ using FeaturesListNormalizer = std::array<float, FeatureIDs::FeatureCount>; /// The ML evictor (commonalities between release and development mode) class MLEvictAdvisor : public RegAllocEvictionAdvisor { public: - MLEvictAdvisor(const MachineFunction &MF, const RAGreedy &RA, - MLModelRunner *Runner, const MachineBlockFrequencyInfo &MBFI, + MLEvictAdvisor(MachineFunction &MF, const RAGreedy &RA, MLModelRunner *Runner, + const MachineBlockFrequencyInfo &MBFI, const MachineLoopInfo &Loops); protected: @@ -277,6 +290,9 @@ private: FixedRegisters); } + const LIFeatureComponents + getLIFeatureComponents(const LiveInterval &LI) const; + // Hold on to a default advisor for: // 1) the implementation of canEvictHintInterference, because we didn't learn // that nuance yet; @@ -319,7 +335,7 @@ private: } std::unique_ptr<RegAllocEvictionAdvisor> - getAdvisor(const MachineFunction &MF, const RAGreedy &RA) override { + getAdvisor(MachineFunction &MF, const RAGreedy &RA) override { if (!Runner) Runner = std::make_unique<ReleaseModeModelRunner<RegallocEvictModel>>( MF.getFunction().getContext(), FeatureNames, DecisionName); @@ -364,7 +380,7 @@ static const std::vector<TensorSpec> TrainingInputFeatures{ class DevelopmentModeEvictAdvisor : public MLEvictAdvisor { public: - DevelopmentModeEvictAdvisor(const MachineFunction &MF, const RAGreedy &RA, + DevelopmentModeEvictAdvisor(MachineFunction &MF, const RAGreedy &RA, MLModelRunner *Runner, const MachineBlockFrequencyInfo &MBFI, const MachineLoopInfo &Loops, Logger *Log) @@ -420,7 +436,7 @@ private: } std::unique_ptr<RegAllocEvictionAdvisor> - getAdvisor(const MachineFunction &MF, const RAGreedy &RA) override { + getAdvisor(MachineFunction &MF, const RAGreedy &RA) override { LLVMContext &Ctx = MF.getFunction().getContext(); if (ModelUnderTraining.empty() && TrainingLog.empty()) { Ctx.emitError("Regalloc development mode should be requested with at " @@ -480,7 +496,7 @@ float MLEvictAdvisor::getInitialQueueSize(const MachineFunction &MF) { return Ret; } -MLEvictAdvisor::MLEvictAdvisor(const MachineFunction &MF, const RAGreedy &RA, +MLEvictAdvisor::MLEvictAdvisor(MachineFunction &MF, const RAGreedy &RA, MLModelRunner *Runner, const MachineBlockFrequencyInfo &MBFI, const MachineLoopInfo &Loops) @@ -615,16 +631,15 @@ MCRegister MLEvictAdvisor::tryFindEvictionCandidate( for (auto I = Order.begin(), E = Order.getOrderLimitEnd(OrderLimit); I != E; ++I, ++Pos) { MCRegister PhysReg = *I; - Regs[Pos] = std::make_pair(PhysReg, true); + assert(!Regs[Pos].second); assert(PhysReg); if (!canAllocatePhysReg(CostPerUseLimit, PhysReg)) { - Regs[Pos].second = false; continue; } if (loadInterferenceFeatures(VirtReg, PhysReg, I.isHint(), FixedRegisters, Largest, Pos)) { ++Available; - Regs[Pos].second = true; + Regs[Pos] = std::make_pair(PhysReg, true); } } if (Available == 0) { @@ -632,6 +647,7 @@ MCRegister MLEvictAdvisor::tryFindEvictionCandidate( assert(!MustFindEviction); return MCRegister::NoRegister; } + const size_t ValidPosLimit = Pos; // If we must find eviction, the candidate should be masked out of the // decision making process. Regs[CandidateVirtRegPos].second = !MustFindEviction; @@ -665,9 +681,55 @@ MCRegister MLEvictAdvisor::tryFindEvictionCandidate( assert(!MustFindEviction); return MCRegister::NoRegister; } + assert(CandidatePos < ValidPosLimit); + (void)ValidPosLimit; return Regs[CandidatePos].first; } +const LIFeatureComponents +MLEvictAdvisor::getLIFeatureComponents(const LiveInterval &LI) const { + LIFeatureComponents Ret; + SmallPtrSet<MachineInstr *, 8> Visited; + const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo(); + + for (MachineRegisterInfo::reg_instr_nodbg_iterator + I = MRI->reg_instr_nodbg_begin(LI.reg()), + E = MRI->reg_instr_nodbg_end(); + I != E;) { + MachineInstr *MI = &*(I++); + + ++Ret.NrDefsAndUses; + if (!Visited.insert(MI).second) + continue; + + if (MI->isIdentityCopy() || MI->isImplicitDef()) + continue; + + bool Reads, Writes; + std::tie(Reads, Writes) = MI->readsWritesVirtualRegister(LI.reg()); + + float Freq = MBFI.getBlockFreqRelativeToEntryBlock(MI->getParent()); + Ret.HottestBlockFreq = std::max(Freq, Ret.HottestBlockFreq); + + Ret.R += (Reads && !Writes) * Freq; + Ret.W += (!Reads && Writes) * Freq; + Ret.RW += (Reads && Writes) * Freq; + + auto *MBB = MI->getParent(); + auto *Loop = Loops.getLoopFor(MBB); + bool IsExiting = Loop ? Loop->isLoopExiting(MBB) : false; + + if (Writes && IsExiting && LIS->isLiveOutOfMBB(LI, MBB)) + Ret.IndVarUpdates += Freq; + + if (MI->isCopy() && VirtRegAuxInfo::copyHint(MI, LI.reg(), TRI, *MRI)) + Ret.HintWeights += Freq; + } + Ret.IsRemat = VirtRegAuxInfo::isRematerializable( + LI, *LIS, *VRM, *MF.getSubtarget().getInstrInfo()); + return Ret; +} + // Overall, this currently mimics what we do for weight calculation, but instead // of accummulating the various features, we keep them separate. void MLEvictAdvisor::extractFeatures( @@ -676,11 +738,11 @@ void MLEvictAdvisor::extractFeatures( int64_t IsHint, int64_t LocalIntfsCount, float NrUrgent) const { int64_t NrDefsAndUses = 0; int64_t NrBrokenHints = 0; - float R = 0; - float W = 0; - float RW = 0; - float IndVarUpdates = 0; - float HintWeights = 0.0; + double R = 0.0; + double W = 0.0; + double RW = 0.0; + double IndVarUpdates = 0.0; + double HintWeights = 0.0; float StartBBFreq = 0.0; float EndBBFreq = 0.0; float HottestBlockFreq = 0.0; @@ -707,46 +769,19 @@ void MLEvictAdvisor::extractFeatures( if (LI.endIndex() > EndSI) EndSI = LI.endIndex(); - - SmallPtrSet<MachineInstr *, 8> Visited; - const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo(); + const LIFeatureComponents LIFC = getLIFeatureComponents(LI); NrBrokenHints += VRM->hasPreferredPhys(LI.reg()); - for (MachineRegisterInfo::reg_instr_nodbg_iterator - I = MRI->reg_instr_nodbg_begin(LI.reg()), - E = MRI->reg_instr_nodbg_end(); - I != E;) { - MachineInstr *MI = &*(I++); + NrDefsAndUses += LIFC.NrDefsAndUses; + HottestBlockFreq = std::max(HottestBlockFreq, LIFC.HottestBlockFreq); + R += LIFC.R; + W += LIFC.W; + RW += LIFC.RW; - ++NrDefsAndUses; - if (!Visited.insert(MI).second) - continue; + IndVarUpdates += LIFC.IndVarUpdates; - if (MI->isIdentityCopy() || MI->isImplicitDef()) - continue; - - bool Reads, Writes; - std::tie(Reads, Writes) = MI->readsWritesVirtualRegister(LI.reg()); - - float Freq = MBFI.getBlockFreqRelativeToEntryBlock(MI->getParent()); - if (Freq > HottestBlockFreq) - HottestBlockFreq = Freq; - R += (Reads && !Writes) * Freq; - W += (!Reads && Writes) * Freq; - RW += (Reads && Writes) * Freq; - - auto *MBB = MI->getParent(); - auto *Loop = Loops.getLoopFor(MBB); - bool IsExiting = Loop ? Loop->isLoopExiting(MBB) : false; - - if (Writes && IsExiting && LIS->isLiveOutOfMBB(LI, MBB)) - IndVarUpdates += Freq; - - if (MI->isCopy() && VirtRegAuxInfo::copyHint(MI, LI.reg(), TRI, *MRI)) - HintWeights += Freq; - } - NrRematerializable += VirtRegAuxInfo::isRematerializable( - LI, *LIS, *VRM, *MF.getSubtarget().getInstrInfo()); + HintWeights += LIFC.HintWeights; + NrRematerializable += LIFC.IsRemat; } size_t Size = 0; if (!Intervals.empty()) { diff --git a/llvm/lib/CodeGen/MachineModuleInfo.cpp b/llvm/lib/CodeGen/MachineModuleInfo.cpp index 50cbb14e926e..31d4fc7d02bf 100644 --- a/llvm/lib/CodeGen/MachineModuleInfo.cpp +++ b/llvm/lib/CodeGen/MachineModuleInfo.cpp @@ -400,12 +400,14 @@ bool MachineModuleInfoWrapperPass::doInitialization(Module &M) { // FIXME: Do this for new pass manager. LLVMContext &Ctx = M.getContext(); MMI.getContext().setDiagnosticHandler( - [&Ctx](const SMDiagnostic &SMD, bool IsInlineAsm, const SourceMgr &SrcMgr, - std::vector<const MDNode *> &LocInfos) { + [&Ctx, &M](const SMDiagnostic &SMD, bool IsInlineAsm, + const SourceMgr &SrcMgr, + std::vector<const MDNode *> &LocInfos) { unsigned LocCookie = 0; if (IsInlineAsm) LocCookie = getLocCookie(SMD, SrcMgr, LocInfos); - Ctx.diagnose(DiagnosticInfoSrcMgr(SMD, IsInlineAsm, LocCookie)); + Ctx.diagnose( + DiagnosticInfoSrcMgr(SMD, M.getName(), IsInlineAsm, LocCookie)); }); MMI.DbgInfoAvailable = !M.debug_compile_units().empty(); return false; diff --git a/llvm/lib/CodeGen/MachineModuleSlotTracker.cpp b/llvm/lib/CodeGen/MachineModuleSlotTracker.cpp index e4da179efcc4..aa63411df965 100644 --- a/llvm/lib/CodeGen/MachineModuleSlotTracker.cpp +++ b/llvm/lib/CodeGen/MachineModuleSlotTracker.cpp @@ -66,8 +66,7 @@ MachineModuleSlotTracker::MachineModuleSlotTracker( const MachineFunction *MF, bool ShouldInitializeAllMetadata) : ModuleSlotTracker(MF->getFunction().getParent(), ShouldInitializeAllMetadata), - TheFunction(MF->getFunction()), TheMMI(MF->getMMI()), MDNStartSlot(0), - MDNEndSlot(0) { + TheFunction(MF->getFunction()), TheMMI(MF->getMMI()) { setProcessHook([this](AbstractSlotTrackerStorage *AST, const Module *M, bool ShouldInitializeAllMetadata) { this->processMachineModule(AST, M, ShouldInitializeAllMetadata); diff --git a/llvm/lib/CodeGen/MachineRegisterInfo.cpp b/llvm/lib/CodeGen/MachineRegisterInfo.cpp index 19bf87d3e290..1a4ad53ddf81 100644 --- a/llvm/lib/CodeGen/MachineRegisterInfo.cpp +++ b/llvm/lib/CodeGen/MachineRegisterInfo.cpp @@ -43,8 +43,7 @@ void MachineRegisterInfo::Delegate::anchor() {} MachineRegisterInfo::MachineRegisterInfo(MachineFunction *MF) : MF(MF), TracksSubRegLiveness(MF->getSubtarget().enableSubRegLiveness() && - EnableSubRegLiveness), - IsUpdatedCSRsInitialized(false) { + EnableSubRegLiveness) { unsigned NumRegs = getTargetRegisterInfo()->getNumRegs(); VRegInfo.reserve(256); RegAllocHints.reserve(256); diff --git a/llvm/lib/CodeGen/MachineVerifier.cpp b/llvm/lib/CodeGen/MachineVerifier.cpp index 005d4ad1a328..c9d3e473062b 100644 --- a/llvm/lib/CodeGen/MachineVerifier.cpp +++ b/llvm/lib/CodeGen/MachineVerifier.cpp @@ -1909,7 +1909,7 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) { const Register Reg = MO->getReg(); if (!Reg) return; - if (MRI->tracksLiveness() && !MI->isDebugValue()) + if (MRI->tracksLiveness() && !MI->isDebugInstr()) checkLiveness(MO, MONum); // Verify the consistency of tied operands. diff --git a/llvm/lib/CodeGen/PostRASchedulerList.cpp b/llvm/lib/CodeGen/PostRASchedulerList.cpp index d7cd0a583cee..aac46cb22084 100644 --- a/llvm/lib/CodeGen/PostRASchedulerList.cpp +++ b/llvm/lib/CodeGen/PostRASchedulerList.cpp @@ -139,7 +139,7 @@ namespace { /// /// This is the instruction number from the top of the current block, not /// the SlotIndex. It is only used by the AntiDepBreaker. - unsigned EndIndex; + unsigned EndIndex = 0; public: SchedulePostRATDList( @@ -206,7 +206,7 @@ SchedulePostRATDList::SchedulePostRATDList( const RegisterClassInfo &RCI, TargetSubtargetInfo::AntiDepBreakMode AntiDepMode, SmallVectorImpl<const TargetRegisterClass *> &CriticalPathRCs) - : ScheduleDAGInstrs(MF, &MLI), AA(AA), EndIndex(0) { + : ScheduleDAGInstrs(MF, &MLI), AA(AA) { const InstrItineraryData *InstrItins = MF.getSubtarget().getInstrItineraryData(); diff --git a/llvm/lib/CodeGen/RegAllocEvictionAdvisor.cpp b/llvm/lib/CodeGen/RegAllocEvictionAdvisor.cpp index 87df7bb4a689..fc5d1104a999 100644 --- a/llvm/lib/CodeGen/RegAllocEvictionAdvisor.cpp +++ b/llvm/lib/CodeGen/RegAllocEvictionAdvisor.cpp @@ -25,7 +25,7 @@ using namespace llvm; static cl::opt<RegAllocEvictionAdvisorAnalysis::AdvisorMode> Mode( - "regalloc-enable-advisor", cl::Hidden, + "regalloc-enable-advisor", cl::Hidden, cl::ZeroOrMore, cl::init(RegAllocEvictionAdvisorAnalysis::AdvisorMode::Default), cl::desc("Enable regalloc advisor mode"), cl::values( @@ -66,7 +66,7 @@ public: private: std::unique_ptr<RegAllocEvictionAdvisor> - getAdvisor(const MachineFunction &MF, const RAGreedy &RA) override { + getAdvisor(MachineFunction &MF, const RAGreedy &RA) override { return std::make_unique<DefaultEvictionAdvisor>(MF, RA); } bool doInitialization(Module &M) override { @@ -113,7 +113,7 @@ StringRef RegAllocEvictionAdvisorAnalysis::getPassName() const { llvm_unreachable("Unknown advisor kind"); } -RegAllocEvictionAdvisor::RegAllocEvictionAdvisor(const MachineFunction &MF, +RegAllocEvictionAdvisor::RegAllocEvictionAdvisor(MachineFunction &MF, const RAGreedy &RA) : MF(MF), RA(RA), Matrix(RA.getInterferenceMatrix()), LIS(RA.getLiveIntervals()), VRM(RA.getVirtRegMap()), @@ -122,3 +122,178 @@ RegAllocEvictionAdvisor::RegAllocEvictionAdvisor(const MachineFunction &MF, EnableLocalReassign(EnableLocalReassignment || MF.getSubtarget().enableRALocalReassignment( MF.getTarget().getOptLevel())) {} + +/// shouldEvict - determine if A should evict the assigned live range B. The +/// eviction policy defined by this function together with the allocation order +/// defined by enqueue() decides which registers ultimately end up being split +/// and spilled. +/// +/// Cascade numbers are used to prevent infinite loops if this function is a +/// cyclic relation. +/// +/// @param A The live range to be assigned. +/// @param IsHint True when A is about to be assigned to its preferred +/// register. +/// @param B The live range to be evicted. +/// @param BreaksHint True when B is already assigned to its preferred register. +bool DefaultEvictionAdvisor::shouldEvict(LiveInterval &A, bool IsHint, + LiveInterval &B, + bool BreaksHint) const { + bool CanSplit = RA.getExtraInfo().getStage(B) < RS_Spill; + + // Be fairly aggressive about following hints as long as the evictee can be + // split. + if (CanSplit && IsHint && !BreaksHint) + return true; + + if (A.weight() > B.weight()) { + LLVM_DEBUG(dbgs() << "should evict: " << B << " w= " << B.weight() << '\n'); + return true; + } + return false; +} + +/// canEvictHintInterference - return true if the interference for VirtReg +/// on the PhysReg, which is VirtReg's hint, can be evicted in favor of VirtReg. +bool DefaultEvictionAdvisor::canEvictHintInterference( + LiveInterval &VirtReg, MCRegister PhysReg, + const SmallVirtRegSet &FixedRegisters) const { + EvictionCost MaxCost; + MaxCost.setBrokenHints(1); + return canEvictInterferenceBasedOnCost(VirtReg, PhysReg, true, MaxCost, + FixedRegisters); +} + +/// canEvictInterferenceBasedOnCost - Return true if all interferences between +/// VirtReg and PhysReg can be evicted. +/// +/// @param VirtReg Live range that is about to be assigned. +/// @param PhysReg Desired register for assignment. +/// @param IsHint True when PhysReg is VirtReg's preferred register. +/// @param MaxCost Only look for cheaper candidates and update with new cost +/// when returning true. +/// @returns True when interference can be evicted cheaper than MaxCost. +bool DefaultEvictionAdvisor::canEvictInterferenceBasedOnCost( + LiveInterval &VirtReg, MCRegister PhysReg, bool IsHint, + EvictionCost &MaxCost, const SmallVirtRegSet &FixedRegisters) const { + // It is only possible to evict virtual register interference. + if (Matrix->checkInterference(VirtReg, PhysReg) > LiveRegMatrix::IK_VirtReg) + return false; + + bool IsLocal = VirtReg.empty() || LIS->intervalIsInOneMBB(VirtReg); + + // Find VirtReg's cascade number. This will be unassigned if VirtReg was never + // involved in an eviction before. If a cascade number was assigned, deny + // evicting anything with the same or a newer cascade number. This prevents + // infinite eviction loops. + // + // This works out so a register without a cascade number is allowed to evict + // anything, and it can be evicted by anything. + unsigned Cascade = RA.getExtraInfo().getCascadeOrCurrentNext(VirtReg.reg()); + + EvictionCost Cost; + for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) { + LiveIntervalUnion::Query &Q = Matrix->query(VirtReg, *Units); + // If there is 10 or more interferences, chances are one is heavier. + const auto &Interferences = Q.interferingVRegs(10); + if (Interferences.size() >= 10) + return false; + + // Check if any interfering live range is heavier than MaxWeight. + for (LiveInterval *Intf : reverse(Interferences)) { + assert(Register::isVirtualRegister(Intf->reg()) && + "Only expecting virtual register interference from query"); + + // Do not allow eviction of a virtual register if we are in the middle + // of last-chance recoloring and this virtual register is one that we + // have scavenged a physical register for. + if (FixedRegisters.count(Intf->reg())) + return false; + + // Never evict spill products. They cannot split or spill. + if (RA.getExtraInfo().getStage(*Intf) == RS_Done) + return false; + // Once a live range becomes small enough, it is urgent that we find a + // register for it. This is indicated by an infinite spill weight. These + // urgent live ranges get to evict almost anything. + // + // Also allow urgent evictions of unspillable ranges from a strictly + // larger allocation order. + bool Urgent = + !VirtReg.isSpillable() && + (Intf->isSpillable() || + RegClassInfo.getNumAllocatableRegs(MRI->getRegClass(VirtReg.reg())) < + RegClassInfo.getNumAllocatableRegs( + MRI->getRegClass(Intf->reg()))); + // Only evict older cascades or live ranges without a cascade. + unsigned IntfCascade = RA.getExtraInfo().getCascade(Intf->reg()); + if (Cascade <= IntfCascade) { + if (!Urgent) + return false; + // We permit breaking cascades for urgent evictions. It should be the + // last resort, though, so make it really expensive. + Cost.BrokenHints += 10; + } + // Would this break a satisfied hint? + bool BreaksHint = VRM->hasPreferredPhys(Intf->reg()); + // Update eviction cost. + Cost.BrokenHints += BreaksHint; + Cost.MaxWeight = std::max(Cost.MaxWeight, Intf->weight()); + // Abort if this would be too expensive. + if (!(Cost < MaxCost)) + return false; + if (Urgent) + continue; + // Apply the eviction policy for non-urgent evictions. + if (!shouldEvict(VirtReg, IsHint, *Intf, BreaksHint)) + return false; + // If !MaxCost.isMax(), then we're just looking for a cheap register. + // Evicting another local live range in this case could lead to suboptimal + // coloring. + if (!MaxCost.isMax() && IsLocal && LIS->intervalIsInOneMBB(*Intf) && + (!EnableLocalReassign || !canReassign(*Intf, PhysReg))) { + return false; + } + } + } + MaxCost = Cost; + return true; +} + +MCRegister DefaultEvictionAdvisor::tryFindEvictionCandidate( + LiveInterval &VirtReg, const AllocationOrder &Order, + uint8_t CostPerUseLimit, const SmallVirtRegSet &FixedRegisters) const { + // Keep track of the cheapest interference seen so far. + EvictionCost BestCost; + BestCost.setMax(); + MCRegister BestPhys; + auto MaybeOrderLimit = getOrderLimit(VirtReg, Order, CostPerUseLimit); + if (!MaybeOrderLimit) + return MCRegister::NoRegister; + unsigned OrderLimit = *MaybeOrderLimit; + + // When we are just looking for a reduced cost per use, don't break any + // hints, and only evict smaller spill weights. + if (CostPerUseLimit < uint8_t(~0u)) { + BestCost.BrokenHints = 0; + BestCost.MaxWeight = VirtReg.weight(); + } + + for (auto I = Order.begin(), E = Order.getOrderLimitEnd(OrderLimit); I != E; + ++I) { + MCRegister PhysReg = *I; + assert(PhysReg); + if (!canAllocatePhysReg(CostPerUseLimit, PhysReg) || + !canEvictInterferenceBasedOnCost(VirtReg, PhysReg, false, BestCost, + FixedRegisters)) + continue; + + // Best so far. + BestPhys = PhysReg; + + // Stop if the hint can be used. + if (I.isHint()) + break; + } + return BestPhys; +} diff --git a/llvm/lib/CodeGen/RegAllocEvictionAdvisor.h b/llvm/lib/CodeGen/RegAllocEvictionAdvisor.h index 33e03aed81a7..1f40386db8da 100644 --- a/llvm/lib/CodeGen/RegAllocEvictionAdvisor.h +++ b/llvm/lib/CodeGen/RegAllocEvictionAdvisor.h @@ -115,7 +115,7 @@ public: bool isUnusedCalleeSavedReg(MCRegister PhysReg) const; protected: - RegAllocEvictionAdvisor(const MachineFunction &MF, const RAGreedy &RA); + RegAllocEvictionAdvisor(MachineFunction &MF, const RAGreedy &RA); Register canReassign(LiveInterval &VirtReg, Register PrevReg) const; @@ -173,7 +173,7 @@ public: /// Get an advisor for the given context (i.e. machine function, etc) virtual std::unique_ptr<RegAllocEvictionAdvisor> - getAdvisor(const MachineFunction &MF, const RAGreedy &RA) = 0; + getAdvisor(MachineFunction &MF, const RAGreedy &RA) = 0; AdvisorMode getAdvisorMode() const { return Mode; } protected: @@ -200,7 +200,7 @@ RegAllocEvictionAdvisorAnalysis *createDevelopmentModeAdvisor(); // out of RegAllocGreedy.cpp class DefaultEvictionAdvisor : public RegAllocEvictionAdvisor { public: - DefaultEvictionAdvisor(const MachineFunction &MF, const RAGreedy &RA) + DefaultEvictionAdvisor(MachineFunction &MF, const RAGreedy &RA) : RegAllocEvictionAdvisor(MF, RA) {} private: diff --git a/llvm/lib/CodeGen/RegAllocGreedy.cpp b/llvm/lib/CodeGen/RegAllocGreedy.cpp index 6ea6dbcbbb74..7870574df5b2 100644 --- a/llvm/lib/CodeGen/RegAllocGreedy.cpp +++ b/llvm/lib/CodeGen/RegAllocGreedy.cpp @@ -440,143 +440,6 @@ Register RegAllocEvictionAdvisor::canReassign(LiveInterval &VirtReg, return PhysReg; } -/// shouldEvict - determine if A should evict the assigned live range B. The -/// eviction policy defined by this function together with the allocation order -/// defined by enqueue() decides which registers ultimately end up being split -/// and spilled. -/// -/// Cascade numbers are used to prevent infinite loops if this function is a -/// cyclic relation. -/// -/// @param A The live range to be assigned. -/// @param IsHint True when A is about to be assigned to its preferred -/// register. -/// @param B The live range to be evicted. -/// @param BreaksHint True when B is already assigned to its preferred register. -bool DefaultEvictionAdvisor::shouldEvict(LiveInterval &A, bool IsHint, - LiveInterval &B, - bool BreaksHint) const { - bool CanSplit = RA.getExtraInfo().getStage(B) < RS_Spill; - - // Be fairly aggressive about following hints as long as the evictee can be - // split. - if (CanSplit && IsHint && !BreaksHint) - return true; - - if (A.weight() > B.weight()) { - LLVM_DEBUG(dbgs() << "should evict: " << B << " w= " << B.weight() << '\n'); - return true; - } - return false; -} - -/// canEvictHintInterference - return true if the interference for VirtReg -/// on the PhysReg, which is VirtReg's hint, can be evicted in favor of VirtReg. -bool DefaultEvictionAdvisor::canEvictHintInterference( - LiveInterval &VirtReg, MCRegister PhysReg, - const SmallVirtRegSet &FixedRegisters) const { - EvictionCost MaxCost; - MaxCost.setBrokenHints(1); - return canEvictInterferenceBasedOnCost(VirtReg, PhysReg, true, MaxCost, - FixedRegisters); -} - -/// canEvictInterferenceBasedOnCost - Return true if all interferences between -/// VirtReg and PhysReg can be evicted. -/// -/// @param VirtReg Live range that is about to be assigned. -/// @param PhysReg Desired register for assignment. -/// @param IsHint True when PhysReg is VirtReg's preferred register. -/// @param MaxCost Only look for cheaper candidates and update with new cost -/// when returning true. -/// @returns True when interference can be evicted cheaper than MaxCost. -bool DefaultEvictionAdvisor::canEvictInterferenceBasedOnCost( - LiveInterval &VirtReg, MCRegister PhysReg, bool IsHint, - EvictionCost &MaxCost, const SmallVirtRegSet &FixedRegisters) const { - // It is only possible to evict virtual register interference. - if (Matrix->checkInterference(VirtReg, PhysReg) > LiveRegMatrix::IK_VirtReg) - return false; - - bool IsLocal = VirtReg.empty() || LIS->intervalIsInOneMBB(VirtReg); - - // Find VirtReg's cascade number. This will be unassigned if VirtReg was never - // involved in an eviction before. If a cascade number was assigned, deny - // evicting anything with the same or a newer cascade number. This prevents - // infinite eviction loops. - // - // This works out so a register without a cascade number is allowed to evict - // anything, and it can be evicted by anything. - unsigned Cascade = RA.getExtraInfo().getCascadeOrCurrentNext(VirtReg.reg()); - - EvictionCost Cost; - for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) { - LiveIntervalUnion::Query &Q = Matrix->query(VirtReg, *Units); - // If there is 10 or more interferences, chances are one is heavier. - const auto &Interferences = Q.interferingVRegs(10); - if (Interferences.size() >= 10) - return false; - - // Check if any interfering live range is heavier than MaxWeight. - for (LiveInterval *Intf : reverse(Interferences)) { - assert(Register::isVirtualRegister(Intf->reg()) && - "Only expecting virtual register interference from query"); - - // Do not allow eviction of a virtual register if we are in the middle - // of last-chance recoloring and this virtual register is one that we - // have scavenged a physical register for. - if (FixedRegisters.count(Intf->reg())) - return false; - - // Never evict spill products. They cannot split or spill. - if (RA.getExtraInfo().getStage(*Intf) == RS_Done) - return false; - // Once a live range becomes small enough, it is urgent that we find a - // register for it. This is indicated by an infinite spill weight. These - // urgent live ranges get to evict almost anything. - // - // Also allow urgent evictions of unspillable ranges from a strictly - // larger allocation order. - bool Urgent = - !VirtReg.isSpillable() && - (Intf->isSpillable() || - RegClassInfo.getNumAllocatableRegs(MRI->getRegClass(VirtReg.reg())) < - RegClassInfo.getNumAllocatableRegs( - MRI->getRegClass(Intf->reg()))); - // Only evict older cascades or live ranges without a cascade. - unsigned IntfCascade = RA.getExtraInfo().getCascade(Intf->reg()); - if (Cascade <= IntfCascade) { - if (!Urgent) - return false; - // We permit breaking cascades for urgent evictions. It should be the - // last resort, though, so make it really expensive. - Cost.BrokenHints += 10; - } - // Would this break a satisfied hint? - bool BreaksHint = VRM->hasPreferredPhys(Intf->reg()); - // Update eviction cost. - Cost.BrokenHints += BreaksHint; - Cost.MaxWeight = std::max(Cost.MaxWeight, Intf->weight()); - // Abort if this would be too expensive. - if (!(Cost < MaxCost)) - return false; - if (Urgent) - continue; - // Apply the eviction policy for non-urgent evictions. - if (!shouldEvict(VirtReg, IsHint, *Intf, BreaksHint)) - return false; - // If !MaxCost.isMax(), then we're just looking for a cheap register. - // Evicting another local live range in this case could lead to suboptimal - // coloring. - if (!MaxCost.isMax() && IsLocal && LIS->intervalIsInOneMBB(*Intf) && - (!EnableLocalReassign || !canReassign(*Intf, PhysReg))) { - return false; - } - } - } - MaxCost = Cost; - return true; -} - /// Return true if all interferences between VirtReg and PhysReg between /// Start and End can be evicted. /// @@ -757,44 +620,6 @@ bool RegAllocEvictionAdvisor::canAllocatePhysReg(unsigned CostPerUseLimit, return true; } -MCRegister DefaultEvictionAdvisor::tryFindEvictionCandidate( - LiveInterval &VirtReg, const AllocationOrder &Order, - uint8_t CostPerUseLimit, const SmallVirtRegSet &FixedRegisters) const { - // Keep track of the cheapest interference seen so far. - EvictionCost BestCost; - BestCost.setMax(); - MCRegister BestPhys; - auto MaybeOrderLimit = getOrderLimit(VirtReg, Order, CostPerUseLimit); - if (!MaybeOrderLimit) - return MCRegister::NoRegister; - unsigned OrderLimit = *MaybeOrderLimit; - - // When we are just looking for a reduced cost per use, don't break any - // hints, and only evict smaller spill weights. - if (CostPerUseLimit < uint8_t(~0u)) { - BestCost.BrokenHints = 0; - BestCost.MaxWeight = VirtReg.weight(); - } - - for (auto I = Order.begin(), E = Order.getOrderLimitEnd(OrderLimit); I != E; - ++I) { - MCRegister PhysReg = *I; - assert(PhysReg); - if (!canAllocatePhysReg(CostPerUseLimit, PhysReg) || - !canEvictInterferenceBasedOnCost(VirtReg, PhysReg, false, BestCost, - FixedRegisters)) - continue; - - // Best so far. - BestPhys = PhysReg; - - // Stop if the hint can be used. - if (I.isHint()) - break; - } - return BestPhys; -} - /// tryEvict - Try to evict all interferences for a physreg. /// @param VirtReg Currently unassigned virtual register. /// @param Order Physregs to try. @@ -2922,6 +2747,10 @@ bool RAGreedy::runOnMachineFunction(MachineFunction &mf) { RegCosts = TRI->getRegisterCosts(*MF); + ExtraInfo.emplace(); + EvictAdvisor = + getAnalysis<RegAllocEvictionAdvisorAnalysis>().getAdvisor(*MF, *this); + VRAI = std::make_unique<VirtRegAuxInfo>(*MF, *LIS, *VRM, *Loops, *MBFI); SpillerInstance.reset(createInlineSpiller(*this, *MF, *VRM, *VRAI)); @@ -2931,9 +2760,7 @@ bool RAGreedy::runOnMachineFunction(MachineFunction &mf) { SA.reset(new SplitAnalysis(*VRM, *LIS, *Loops)); SE.reset(new SplitEditor(*SA, *AA, *LIS, *VRM, *DomTree, *MBFI, *VRAI)); - ExtraInfo.emplace(); - EvictAdvisor = - getAnalysis<RegAllocEvictionAdvisorAnalysis>().getAdvisor(*MF, *this); + IntfCache.init(MF, Matrix->getLiveUnions(), Indexes, LIS, TRI); GlobalCand.resize(32); // This will grow as needed. SetOfBrokenHints.clear(); diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 932f263d2558..041d7e5b4a4a 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -143,7 +143,7 @@ namespace { SelectionDAG &DAG; const TargetLowering &TLI; const SelectionDAGTargetInfo *STI; - CombineLevel Level; + CombineLevel Level = BeforeLegalizeTypes; CodeGenOpt::Level OptLevel; bool LegalDAG = false; bool LegalOperations = false; @@ -238,8 +238,7 @@ namespace { public: DAGCombiner(SelectionDAG &D, AliasAnalysis *AA, CodeGenOpt::Level OL) : DAG(D), TLI(D.getTargetLoweringInfo()), - STI(D.getSubtarget().getSelectionDAGInfo()), - Level(BeforeLegalizeTypes), OptLevel(OL), AA(AA) { + STI(D.getSubtarget().getSelectionDAGInfo()), OptLevel(OL), AA(AA) { ForCodeSize = DAG.shouldOptForSize(); DisableGenericCombines = STI && STI->disableGenericCombines(OptLevel); @@ -441,6 +440,7 @@ namespace { SDValue visitSRA(SDNode *N); SDValue visitSRL(SDNode *N); SDValue visitFunnelShift(SDNode *N); + SDValue visitSHLSAT(SDNode *N); SDValue visitRotate(SDNode *N); SDValue visitABS(SDNode *N); SDValue visitBSWAP(SDNode *N); @@ -907,9 +907,8 @@ bool DAGCombiner::isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS, return true; } - if (N.getOpcode() != ISD::SELECT_CC || - !TLI.isConstTrueVal(N.getOperand(2).getNode()) || - !TLI.isConstFalseVal(N.getOperand(3).getNode())) + if (N.getOpcode() != ISD::SELECT_CC || !TLI.isConstTrueVal(N.getOperand(2)) || + !TLI.isConstFalseVal(N.getOperand(3))) return false; if (TLI.getBooleanContents(N.getValueType()) == @@ -1654,6 +1653,8 @@ SDValue DAGCombiner::visit(SDNode *N) { case ISD::ROTL: return visitRotate(N); case ISD::FSHL: case ISD::FSHR: return visitFunnelShift(N); + case ISD::SSHLSAT: + case ISD::USHLSAT: return visitSHLSAT(N); case ISD::ABS: return visitABS(N); case ISD::BSWAP: return visitBSWAP(N); case ISD::BITREVERSE: return visitBITREVERSE(N); @@ -5530,8 +5531,6 @@ bool DAGCombiner::SearchForAndLoads(SDNode *N, // Some constants may need fixing up later if they are too large. if (auto *C = dyn_cast<ConstantSDNode>(Op)) { - if (Mask->getValueType(0) != C->getValueType(0)) - return false; if ((N->getOpcode() == ISD::OR || N->getOpcode() == ISD::XOR) && (Mask->getAPIntValue() & C->getAPIntValue()) != C->getAPIntValue()) NodesWithConsts.insert(N); @@ -5565,9 +5564,9 @@ bool DAGCombiner::SearchForAndLoads(SDNode *N, case ISD::AssertZext: { unsigned ActiveBits = Mask->getAPIntValue().countTrailingOnes(); EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits); - EVT VT = Op.getOpcode() == ISD::AssertZext - ? cast<VTSDNode>(Op.getOperand(1))->getVT() - : Op.getOperand(0).getValueType(); + EVT VT = Op.getOpcode() == ISD::AssertZext ? + cast<VTSDNode>(Op.getOperand(1))->getVT() : + Op.getOperand(0).getValueType(); // We can accept extending nodes if the mask is wider or an equal // width to the original type. @@ -5575,15 +5574,6 @@ bool DAGCombiner::SearchForAndLoads(SDNode *N, continue; break; } - case ISD::ANY_EXTEND: { - unsigned ActiveBits = Mask->getAPIntValue().countTrailingOnes(); - EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits); - EVT VT = Op.getOperand(0).getValueType(); - if (ExtVT.bitsGE(VT)) - break; - // Fallthrough to searching for nodes from the operands of the extend. - LLVM_FALLTHROUGH; - } case ISD::OR: case ISD::XOR: case ISD::AND: @@ -5643,14 +5633,12 @@ bool DAGCombiner::BackwardsPropagateMask(SDNode *N) { // masking. if (FixupNode) { LLVM_DEBUG(dbgs() << "First, need to fix up: "; FixupNode->dump()); - SDValue MaskOpT = DAG.getZExtOrTrunc(MaskOp, SDLoc(FixupNode), - FixupNode->getValueType(0)); - SDValue And = - DAG.getNode(ISD::AND, SDLoc(FixupNode), FixupNode->getValueType(0), - SDValue(FixupNode, 0), MaskOpT); + SDValue And = DAG.getNode(ISD::AND, SDLoc(FixupNode), + FixupNode->getValueType(0), + SDValue(FixupNode, 0), MaskOp); DAG.ReplaceAllUsesOfValueWith(SDValue(FixupNode, 0), And); if (And.getOpcode() == ISD ::AND) - DAG.UpdateNodeOperands(And.getNode(), SDValue(FixupNode, 0), MaskOpT); + DAG.UpdateNodeOperands(And.getNode(), SDValue(FixupNode, 0), MaskOp); } // Narrow any constants that need it. @@ -5659,12 +5647,10 @@ bool DAGCombiner::BackwardsPropagateMask(SDNode *N) { SDValue Op1 = LogicN->getOperand(1); if (isa<ConstantSDNode>(Op0)) - std::swap(Op0, Op1); + std::swap(Op0, Op1); - SDValue MaskOpT = - DAG.getZExtOrTrunc(MaskOp, SDLoc(Op1), Op1.getValueType()); - SDValue And = - DAG.getNode(ISD::AND, SDLoc(Op1), Op1.getValueType(), Op1, MaskOpT); + SDValue And = DAG.getNode(ISD::AND, SDLoc(Op1), Op1.getValueType(), + Op1, MaskOp); DAG.UpdateNodeOperands(LogicN, Op0, And); } @@ -5672,14 +5658,12 @@ bool DAGCombiner::BackwardsPropagateMask(SDNode *N) { // Create narrow loads. for (auto *Load : Loads) { LLVM_DEBUG(dbgs() << "Propagate AND back to: "; Load->dump()); - SDValue MaskOpT = - DAG.getZExtOrTrunc(MaskOp, SDLoc(Load), Load->getValueType(0)); SDValue And = DAG.getNode(ISD::AND, SDLoc(Load), Load->getValueType(0), - SDValue(Load, 0), MaskOpT); + SDValue(Load, 0), MaskOp); DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), And); if (And.getOpcode() == ISD ::AND) And = SDValue( - DAG.UpdateNodeOperands(And.getNode(), SDValue(Load, 0), MaskOpT), 0); + DAG.UpdateNodeOperands(And.getNode(), SDValue(Load, 0), MaskOp), 0); SDValue NewLoad = reduceLoadWidth(And.getNode()); assert(NewLoad && "Shouldn't be masking the load if it can't be narrowed"); @@ -8036,8 +8020,8 @@ SDValue DAGCombiner::visitXOR(SDNode *N) { // fold !(x cc y) -> (x !cc y) unsigned N0Opcode = N0.getOpcode(); SDValue LHS, RHS, CC; - if (TLI.isConstTrueVal(N1.getNode()) && - isSetCCEquivalent(N0, LHS, RHS, CC, /*MatchStrict*/true)) { + if (TLI.isConstTrueVal(N1) && + isSetCCEquivalent(N0, LHS, RHS, CC, /*MatchStrict*/ true)) { ISD::CondCode NotCC = ISD::getSetCCInverse(cast<CondCodeSDNode>(CC)->get(), LHS.getValueType()); if (!LegalOperations || @@ -9348,6 +9332,22 @@ SDValue DAGCombiner::visitFunnelShift(SDNode *N) { return SDValue(); } +SDValue DAGCombiner::visitSHLSAT(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + if (SDValue V = DAG.simplifyShift(N0, N1)) + return V; + + EVT VT = N0.getValueType(); + + // fold (*shlsat c1, c2) -> c1<<c2 + if (SDValue C = + DAG.FoldConstantArithmetic(N->getOpcode(), SDLoc(N), VT, {N0, N1})) + return C; + + return SDValue(); +} + // Given a ABS node, detect the following pattern: // (ABS (SUB (EXTEND a), (EXTEND b))). // Generates UABD/SABD instruction. @@ -14580,7 +14580,7 @@ SDValue DAGCombiner::combineRepeatedFPDivisors(SDNode *N) { unsigned NumElts = 1; EVT VT = N->getValueType(0); if (VT.isVector() && DAG.isSplatValue(N1)) - NumElts = VT.getVectorNumElements(); + NumElts = VT.getVectorMinNumElements(); if (!MinUses || (N1->use_size() * NumElts) < MinUses) return SDValue(); diff --git a/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp b/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp index bfde35935c7b..d8ef79fe9a7b 100644 --- a/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp @@ -1838,8 +1838,7 @@ FastISel::FastISel(FunctionLoweringInfo &FuncInfo, TII(*MF->getSubtarget().getInstrInfo()), TLI(*MF->getSubtarget().getTargetLowering()), TRI(*MF->getSubtarget().getRegisterInfo()), LibInfo(LibInfo), - SkipTargetIndependentISel(SkipTargetIndependentISel), - LastLocalValue(nullptr), EmitStartPt(nullptr) {} + SkipTargetIndependentISel(SkipTargetIndependentISel) {} FastISel::~FastISel() = default; diff --git a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp index 403f34573899..55f6f288f3e3 100644 --- a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp @@ -47,8 +47,7 @@ static cl::opt<int> HighLatencyCycles( "instructions take for targets with no itinerary")); ScheduleDAGSDNodes::ScheduleDAGSDNodes(MachineFunction &mf) - : ScheduleDAG(mf), BB(nullptr), DAG(nullptr), - InstrItins(mf.getSubtarget().getInstrItineraryData()) {} + : ScheduleDAG(mf), InstrItins(mf.getSubtarget().getInstrItineraryData()) {} /// Run - perform scheduling. /// @@ -577,7 +576,7 @@ void ScheduleDAGSDNodes::RegDefIter::InitNodeNumDefs() { // Construct a RegDefIter for this SUnit and find the first valid value. ScheduleDAGSDNodes::RegDefIter::RegDefIter(const SUnit *SU, const ScheduleDAGSDNodes *SD) - : SchedDAG(SD), Node(SU->getNode()), DefIdx(0), NodeNumDefs(0) { + : SchedDAG(SD), Node(SU->getNode()) { InitNodeNumDefs(); Advance(); } diff --git a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h index 8c28ce403c9b..99bbaeb19182 100644 --- a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h +++ b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h @@ -45,8 +45,8 @@ class InstrItineraryData; /// class ScheduleDAGSDNodes : public ScheduleDAG { public: - MachineBasicBlock *BB; - SelectionDAG *DAG; // DAG of the current basic block + MachineBasicBlock *BB = nullptr; + SelectionDAG *DAG = nullptr; // DAG of the current basic block const InstrItineraryData *InstrItins; /// The schedule. Null SUnit*'s represent noop instructions. @@ -138,8 +138,8 @@ class InstrItineraryData; class RegDefIter { const ScheduleDAGSDNodes *SchedDAG; const SDNode *Node; - unsigned DefIdx; - unsigned NodeNumDefs; + unsigned DefIdx = 0; + unsigned NodeNumDefs = 0; MVT ValueType; public: diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 45f3005e8f57..d5998d166d25 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -2449,7 +2449,7 @@ SDValue SelectionDAG::GetDemandedBits(SDValue V, const APInt &DemandedBits, switch (V.getOpcode()) { default: return TLI->SimplifyMultipleUseDemandedBits(V, DemandedBits, DemandedElts, - *this, 0); + *this); case ISD::Constant: { const APInt &CVal = cast<ConstantSDNode>(V)->getAPIntValue(); APInt NewVal = CVal & DemandedBits; @@ -3082,6 +3082,9 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts, Known = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); bool SelfMultiply = Op.getOperand(0) == Op.getOperand(1); + // TODO: SelfMultiply can be poison, but not undef. + SelfMultiply &= isGuaranteedNotToBeUndefOrPoison( + Op.getOperand(0), DemandedElts, false, Depth + 1); Known = KnownBits::mul(Known, Known2, SelfMultiply); break; } @@ -5240,6 +5243,8 @@ static llvm::Optional<APInt> FoldValue(unsigned Opcode, const APInt &C1, case ISD::UADDSAT: return C1.uadd_sat(C2); case ISD::SSUBSAT: return C1.ssub_sat(C2); case ISD::USUBSAT: return C1.usub_sat(C2); + case ISD::SSHLSAT: return C1.sshl_sat(C2); + case ISD::USHLSAT: return C1.ushl_sat(C2); case ISD::UDIV: if (!C2.getBoolValue()) break; diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 41460f78e1c2..01230a36e744 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -4014,7 +4014,7 @@ void SelectionDAGBuilder::visitAlloca(const AllocaInst &I) { Type *Ty = I.getAllocatedType(); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); auto &DL = DAG.getDataLayout(); - uint64_t TySize = DL.getTypeAllocSize(Ty); + TypeSize TySize = DL.getTypeAllocSize(Ty); MaybeAlign Alignment = std::max(DL.getPrefTypeAlign(Ty), I.getAlign()); SDValue AllocSize = getValue(I.getArraySize()); @@ -4023,9 +4023,15 @@ void SelectionDAGBuilder::visitAlloca(const AllocaInst &I) { if (AllocSize.getValueType() != IntPtr) AllocSize = DAG.getZExtOrTrunc(AllocSize, dl, IntPtr); - AllocSize = DAG.getNode(ISD::MUL, dl, IntPtr, - AllocSize, - DAG.getConstant(TySize, dl, IntPtr)); + if (TySize.isScalable()) + AllocSize = DAG.getNode(ISD::MUL, dl, IntPtr, AllocSize, + DAG.getVScale(dl, IntPtr, + APInt(IntPtr.getScalarSizeInBits(), + TySize.getKnownMinValue()))); + else + AllocSize = + DAG.getNode(ISD::MUL, dl, IntPtr, AllocSize, + DAG.getConstant(TySize.getFixedValue(), dl, IntPtr)); // Handle alignment. If the requested alignment is less than or equal to // the stack alignment, ignore it. If the size is greater than or equal to @@ -6870,6 +6876,8 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, case Intrinsic::experimental_gc_relocate: visitGCRelocate(cast<GCRelocateInst>(I)); return; + case Intrinsic::instrprof_cover: + llvm_unreachable("instrprof failed to lower a cover"); case Intrinsic::instrprof_increment: llvm_unreachable("instrprof failed to lower an increment"); case Intrinsic::instrprof_value_profile: diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp index 77e11b364588..3c786904620a 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -319,7 +319,7 @@ SelectionDAGISel::SelectionDAGISel(TargetMachine &tm, CodeGenOpt::Level OL) CurDAG(new SelectionDAG(tm, OL)), SDB(std::make_unique<SelectionDAGBuilder>(*CurDAG, *FuncInfo, *SwiftError, OL)), - AA(), GFI(), OptLevel(OL), DAGSize(0) { + OptLevel(OL) { initializeGCModuleInfoPass(*PassRegistry::getPassRegistry()); initializeBranchProbabilityInfoWrapperPassPass( *PassRegistry::getPassRegistry()); diff --git a/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp index e2db9633bfb9..dfda7d8b9f81 100644 --- a/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp @@ -990,6 +990,24 @@ SDValue SelectionDAGBuilder::LowerAsSTATEPOINT( return ReturnVal; } +/// Return two gc.results if present. First result is a block local +/// gc.result, second result is a non-block local gc.result. Corresponding +/// entry will be nullptr if not present. +static std::pair<const GCResultInst*, const GCResultInst*> +getGCResultLocality(const GCStatepointInst &S) { + std::pair<const GCResultInst *, const GCResultInst*> Res(nullptr, nullptr); + for (auto *U : S.users()) { + auto *GRI = dyn_cast<GCResultInst>(U); + if (!GRI) + continue; + if (GRI->getParent() == S.getParent()) + Res.first = GRI; + else + Res.second = GRI; + } + return Res; +} + void SelectionDAGBuilder::LowerStatepoint(const GCStatepointInst &I, const BasicBlock *EHPadBB /*= nullptr*/) { @@ -1075,12 +1093,11 @@ SelectionDAGBuilder::LowerStatepoint(const GCStatepointInst &I, SDValue ReturnValue = LowerAsSTATEPOINT(SI); // Export the result value if needed - const std::pair<bool, bool> GCResultLocality = I.getGCResultLocality(); - Type *RetTy = I.getActualReturnType(); + const auto GCResultLocality = getGCResultLocality(I); - if (RetTy->isVoidTy() || - (!GCResultLocality.first && !GCResultLocality.second)) { - // The return value is not needed, just generate a poison value. + if (!GCResultLocality.first && !GCResultLocality.second) { + // The return value is not needed, just generate a poison value. + // Note: This covers the void return case. setValue(&I, DAG.getIntPtrConstant(-1, getCurSDLoc())); return; } @@ -1102,6 +1119,7 @@ SelectionDAGBuilder::LowerStatepoint(const GCStatepointInst &I, // manually. // TODO: To eliminate this problem we can remove gc.result intrinsics // completely and make statepoint call to return a tuple. + Type *RetTy = GCResultLocality.second->getType(); unsigned Reg = FuncInfo.CreateRegs(RetTy); RegsForValue RFV(*DAG.getContext(), DAG.getTargetLoweringInfo(), DAG.getDataLayout(), Reg, RetTy, @@ -1168,7 +1186,7 @@ void SelectionDAGBuilder::visitGCResult(const GCResultInst &CI) { // register because statepoint and actual call return types can be // different, and getValue() will use CopyFromReg of the wrong type, // which is always i32 in our case. - Type *RetTy = SI->getActualReturnType(); + Type *RetTy = CI.getType(); SDValue CopyFromReg = getCopyFromRegs(SI, RetTy); assert(CopyFromReg.getNode()); diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index a98c21f16c71..f6d1fa87676f 100644 --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -63,7 +63,7 @@ bool TargetLowering::isInTailCallPosition(SelectionDAG &DAG, SDNode *Node, AttrBuilder CallerAttrs(F.getContext(), F.getAttributes().getRetAttrs()); for (const auto &Attr : {Attribute::Alignment, Attribute::Dereferenceable, Attribute::DereferenceableOrNull, Attribute::NoAlias, - Attribute::NonNull}) + Attribute::NonNull, Attribute::NoUndef}) CallerAttrs.removeAttribute(Attr); if (CallerAttrs.hasAttributes()) @@ -606,6 +606,23 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits, } bool TargetLowering::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits, + const APInt &DemandedElts, + DAGCombinerInfo &DCI) const { + SelectionDAG &DAG = DCI.DAG; + TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(), + !DCI.isBeforeLegalizeOps()); + KnownBits Known; + + bool Simplified = + SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO); + if (Simplified) { + DCI.AddToWorklist(Op.getNode()); + DCI.CommitTargetLoweringOpt(TLO); + } + return Simplified; +} + +bool TargetLowering::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth, @@ -2247,8 +2264,12 @@ bool TargetLowering::SimplifyDemandedBits( } break; } - case ISD::ADD: case ISD::MUL: + // 'Quadratic Reciprocity': mul(x,x) -> 0 if we're only demanding bit[1] + if (DemandedBits == 2 && Op.getOperand(0) == Op.getOperand(1)) + return TLO.CombineTo(Op, TLO.DAG.getConstant(0, dl, VT)); + LLVM_FALLTHROUGH; + case ISD::ADD: case ISD::SUB: { // Add, Sub, and Mul don't demand any bits in positions beyond that // of the highest bit demanded of them. @@ -3173,29 +3194,25 @@ bool TargetLowering::isSplatValueForTargetNode(SDValue Op, // FIXME: Ideally, this would use ISD::isConstantSplatVector(), but that must // work with truncating build vectors and vectors with elements of less than // 8 bits. -bool TargetLowering::isConstTrueVal(const SDNode *N) const { +bool TargetLowering::isConstTrueVal(SDValue N) const { if (!N) return false; + unsigned EltWidth; APInt CVal; - if (auto *CN = dyn_cast<ConstantSDNode>(N)) { + if (ConstantSDNode *CN = isConstOrConstSplat(N, /*AllowUndefs=*/false, + /*AllowTruncation=*/true)) { CVal = CN->getAPIntValue(); - } else if (auto *BV = dyn_cast<BuildVectorSDNode>(N)) { - auto *CN = BV->getConstantSplatNode(); - if (!CN) - return false; - - // If this is a truncating build vector, truncate the splat value. - // Otherwise, we may fail to match the expected values below. - unsigned BVEltWidth = BV->getValueType(0).getScalarSizeInBits(); - CVal = CN->getAPIntValue(); - if (BVEltWidth < CVal.getBitWidth()) - CVal = CVal.trunc(BVEltWidth); - } else { + EltWidth = N.getValueType().getScalarSizeInBits(); + } else return false; - } - switch (getBooleanContents(N->getValueType(0))) { + // If this is a truncating splat, truncate the splat value. + // Otherwise, we may fail to match the expected values below. + if (EltWidth < CVal.getBitWidth()) + CVal = CVal.trunc(EltWidth); + + switch (getBooleanContents(N.getValueType())) { case UndefinedBooleanContent: return CVal[0]; case ZeroOrOneBooleanContent: @@ -3207,7 +3224,7 @@ bool TargetLowering::isConstTrueVal(const SDNode *N) const { llvm_unreachable("Invalid boolean contents"); } -bool TargetLowering::isConstFalseVal(const SDNode *N) const { +bool TargetLowering::isConstFalseVal(SDValue N) const { if (!N) return false; @@ -3742,7 +3759,7 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, if (TopSetCC.getValueType() == MVT::i1 && VT == MVT::i1 && TopSetCC.getOpcode() == ISD::SETCC && (N0Opc == ISD::ZERO_EXTEND || N0Opc == ISD::SIGN_EXTEND) && - (isConstFalseVal(N1C) || + (isConstFalseVal(N1) || isExtendedTrueVal(N1C, N0->getValueType(0), SExt))) { bool Inverse = (N1C->isZero() && Cond == ISD::SETEQ) || diff --git a/llvm/lib/CodeGen/SlotIndexes.cpp b/llvm/lib/CodeGen/SlotIndexes.cpp index c933031ef37d..ffac68a223bf 100644 --- a/llvm/lib/CodeGen/SlotIndexes.cpp +++ b/llvm/lib/CodeGen/SlotIndexes.cpp @@ -20,7 +20,7 @@ using namespace llvm; char SlotIndexes::ID = 0; -SlotIndexes::SlotIndexes() : MachineFunctionPass(ID), mf(nullptr) { +SlotIndexes::SlotIndexes() : MachineFunctionPass(ID) { initializeSlotIndexesPass(*PassRegistry::getPassRegistry()); } diff --git a/llvm/lib/DWARFLinker/DWARFStreamer.cpp b/llvm/lib/DWARFLinker/DWARFStreamer.cpp index 1ab6ead3b5f6..99e12fce6513 100644 --- a/llvm/lib/DWARFLinker/DWARFStreamer.cpp +++ b/llvm/lib/DWARFLinker/DWARFStreamer.cpp @@ -27,7 +27,8 @@ namespace llvm { -bool DwarfStreamer::init(Triple TheTriple) { +bool DwarfStreamer::init(Triple TheTriple, + StringRef Swift5ReflectionSegmentName) { std::string ErrorStr; std::string TripleName; StringRef Context = "dwarf streamer init"; @@ -54,8 +55,9 @@ bool DwarfStreamer::init(Triple TheTriple) { if (!MSTI) return error("no subtarget info for target " + TripleName, Context), false; - MC.reset(new MCContext(TheTriple, MAI.get(), MRI.get(), MSTI.get())); - MOFI.reset(TheTarget->createMCObjectFileInfo(*MC, /*PIC=*/false)); + MC.reset(new MCContext(TheTriple, MAI.get(), MRI.get(), MSTI.get(), nullptr, + nullptr, true, Swift5ReflectionSegmentName)); + MOFI.reset(TheTarget->createMCObjectFileInfo(*MC, /*PIC=*/false, false)); MC->setObjectFileInfo(MOFI.get()); MAB = TheTarget->createMCAsmBackend(*MSTI, *MRI, MCOptions); @@ -302,6 +304,18 @@ void DwarfStreamer::emitSwiftAST(StringRef Buffer) { MS->emitBytes(Buffer); } +void DwarfStreamer::emitSwiftReflectionSection( + llvm::binaryformat::Swift5ReflectionSectionKind ReflSectionKind, + StringRef Buffer, uint32_t Alignment, uint32_t Size) { + MCSection *ReflectionSection = + MOFI->getSwift5ReflectionSection(ReflSectionKind); + if (ReflectionSection == nullptr) + return; + ReflectionSection->setAlignment(Align(Alignment)); + MS->SwitchSection(ReflectionSection); + MS->emitBytes(Buffer); +} + /// Emit the debug_range section contents for \p FuncRange by /// translating the original \p Entries. The debug_range section /// format is totally trivial, consisting just of pairs of address diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp index 3b8d80c4eeec..99001269e1f8 100644 --- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp +++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp @@ -2866,6 +2866,90 @@ CallInst *OpenMPIRBuilder::createOMPFree(const LocationDescription &Loc, return Builder.CreateCall(Fn, Args, Name); } +CallInst *OpenMPIRBuilder::createOMPInteropInit( + const LocationDescription &Loc, Value *InteropVar, + omp::OMPInteropType InteropType, Value *Device, Value *NumDependences, + Value *DependenceAddress, bool HaveNowaitClause) { + IRBuilder<>::InsertPointGuard IPG(Builder); + Builder.restoreIP(Loc.IP); + + uint32_t SrcLocStrSize; + Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize); + Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize); + Value *ThreadId = getOrCreateThreadID(Ident); + if (Device == nullptr) + Device = ConstantInt::get(Int32, -1); + Constant *InteropTypeVal = ConstantInt::get(Int64, (int)InteropType); + if (NumDependences == nullptr) { + NumDependences = ConstantInt::get(Int32, 0); + PointerType *PointerTypeVar = Type::getInt8PtrTy(M.getContext()); + DependenceAddress = ConstantPointerNull::get(PointerTypeVar); + } + Value *HaveNowaitClauseVal = ConstantInt::get(Int32, HaveNowaitClause); + Value *Args[] = { + Ident, ThreadId, InteropVar, InteropTypeVal, + Device, NumDependences, DependenceAddress, HaveNowaitClauseVal}; + + Function *Fn = getOrCreateRuntimeFunctionPtr(OMPRTL___tgt_interop_init); + + return Builder.CreateCall(Fn, Args); +} + +CallInst *OpenMPIRBuilder::createOMPInteropDestroy( + const LocationDescription &Loc, Value *InteropVar, Value *Device, + Value *NumDependences, Value *DependenceAddress, bool HaveNowaitClause) { + IRBuilder<>::InsertPointGuard IPG(Builder); + Builder.restoreIP(Loc.IP); + + uint32_t SrcLocStrSize; + Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize); + Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize); + Value *ThreadId = getOrCreateThreadID(Ident); + if (Device == nullptr) + Device = ConstantInt::get(Int32, -1); + if (NumDependences == nullptr) { + NumDependences = ConstantInt::get(Int32, 0); + PointerType *PointerTypeVar = Type::getInt8PtrTy(M.getContext()); + DependenceAddress = ConstantPointerNull::get(PointerTypeVar); + } + Value *HaveNowaitClauseVal = ConstantInt::get(Int32, HaveNowaitClause); + Value *Args[] = { + Ident, ThreadId, InteropVar, Device, + NumDependences, DependenceAddress, HaveNowaitClauseVal}; + + Function *Fn = getOrCreateRuntimeFunctionPtr(OMPRTL___tgt_interop_destroy); + + return Builder.CreateCall(Fn, Args); +} + +CallInst *OpenMPIRBuilder::createOMPInteropUse(const LocationDescription &Loc, + Value *InteropVar, Value *Device, + Value *NumDependences, + Value *DependenceAddress, + bool HaveNowaitClause) { + IRBuilder<>::InsertPointGuard IPG(Builder); + Builder.restoreIP(Loc.IP); + uint32_t SrcLocStrSize; + Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize); + Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize); + Value *ThreadId = getOrCreateThreadID(Ident); + if (Device == nullptr) + Device = ConstantInt::get(Int32, -1); + if (NumDependences == nullptr) { + NumDependences = ConstantInt::get(Int32, 0); + PointerType *PointerTypeVar = Type::getInt8PtrTy(M.getContext()); + DependenceAddress = ConstantPointerNull::get(PointerTypeVar); + } + Value *HaveNowaitClauseVal = ConstantInt::get(Int32, HaveNowaitClause); + Value *Args[] = { + Ident, ThreadId, InteropVar, Device, + NumDependences, DependenceAddress, HaveNowaitClauseVal}; + + Function *Fn = getOrCreateRuntimeFunctionPtr(OMPRTL___tgt_interop_use); + + return Builder.CreateCall(Fn, Args); +} + CallInst *OpenMPIRBuilder::createCachedThreadPrivate( const LocationDescription &Loc, llvm::Value *Pointer, llvm::ConstantInt *Size, const llvm::Twine &Name) { @@ -3138,7 +3222,7 @@ OpenMPIRBuilder::createAtomicRead(const LocationDescription &Loc, Type *XTy = X.Var->getType(); assert(XTy->isPointerTy() && "OMP Atomic expects a pointer to target memory"); - Type *XElemTy = XTy->getPointerElementType(); + Type *XElemTy = X.ElemTy; assert((XElemTy->isFloatingPointTy() || XElemTy->isIntegerTy() || XElemTy->isPointerTy()) && "OMP atomic read expected a scalar type"); @@ -3180,7 +3264,7 @@ OpenMPIRBuilder::createAtomicWrite(const LocationDescription &Loc, Type *XTy = X.Var->getType(); assert(XTy->isPointerTy() && "OMP Atomic expects a pointer to target memory"); - Type *XElemTy = XTy->getPointerElementType(); + Type *XElemTy = X.ElemTy; assert((XElemTy->isFloatingPointTy() || XElemTy->isIntegerTy() || XElemTy->isPointerTy()) && "OMP atomic write expected a scalar type"); @@ -3216,7 +3300,7 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createAtomicUpdate( Type *XTy = X.Var->getType(); assert(XTy->isPointerTy() && "OMP Atomic expects a pointer to target memory"); - Type *XElemTy = XTy->getPointerElementType(); + Type *XElemTy = X.ElemTy; assert((XElemTy->isFloatingPointTy() || XElemTy->isIntegerTy() || XElemTy->isPointerTy()) && "OMP atomic update expected a scalar type"); @@ -3225,8 +3309,8 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createAtomicUpdate( "OpenMP atomic does not support LT or GT operations"); }); - emitAtomicUpdate(AllocIP, X.Var, Expr, AO, RMWOp, UpdateOp, X.IsVolatile, - IsXBinopExpr); + emitAtomicUpdate(AllocIP, X.Var, X.ElemTy, Expr, AO, RMWOp, UpdateOp, + X.IsVolatile, IsXBinopExpr); checkAndEmitFlushAfterAtomic(Loc, AO, AtomicKind::Update); return Builder.saveIP(); } @@ -3259,13 +3343,10 @@ Value *OpenMPIRBuilder::emitRMWOpAsInstruction(Value *Src1, Value *Src2, llvm_unreachable("Unsupported atomic update operation"); } -std::pair<Value *, Value *> -OpenMPIRBuilder::emitAtomicUpdate(Instruction *AllocIP, Value *X, Value *Expr, - AtomicOrdering AO, AtomicRMWInst::BinOp RMWOp, - AtomicUpdateCallbackTy &UpdateOp, - bool VolatileX, bool IsXBinopExpr) { - Type *XElemTy = X->getType()->getPointerElementType(); - +std::pair<Value *, Value *> OpenMPIRBuilder::emitAtomicUpdate( + Instruction *AllocIP, Value *X, Type *XElemTy, Value *Expr, + AtomicOrdering AO, AtomicRMWInst::BinOp RMWOp, + AtomicUpdateCallbackTy &UpdateOp, bool VolatileX, bool IsXBinopExpr) { bool DoCmpExch = ((RMWOp == AtomicRMWInst::BAD_BINOP) || (RMWOp == AtomicRMWInst::FAdd)) || (RMWOp == AtomicRMWInst::FSub) || @@ -3380,8 +3461,9 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createAtomicCapture( // If UpdateExpr is 'x' updated with some `expr` not based on 'x', // 'x' is simply atomically rewritten with 'expr'. AtomicRMWInst::BinOp AtomicOp = (UpdateExpr ? RMWOp : AtomicRMWInst::Xchg); - std::pair<Value *, Value *> Result = emitAtomicUpdate( - AllocIP, X.Var, Expr, AO, AtomicOp, UpdateOp, X.IsVolatile, IsXBinopExpr); + std::pair<Value *, Value *> Result = + emitAtomicUpdate(AllocIP, X.Var, X.ElemTy, Expr, AO, AtomicOp, UpdateOp, + X.IsVolatile, IsXBinopExpr); Value *CapturedVal = (IsPostfixUpdate ? Result.first : Result.second); Builder.CreateStore(CapturedVal, V.Var, V.IsVolatile); diff --git a/llvm/lib/IR/Attributes.cpp b/llvm/lib/IR/Attributes.cpp index c92bacaee36d..43fde64c3734 100644 --- a/llvm/lib/IR/Attributes.cpp +++ b/llvm/lib/IR/Attributes.cpp @@ -23,7 +23,6 @@ #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/StringSwitch.h" -#include "llvm/ADT/Twine.h" #include "llvm/Config/llvm-config.h" #include "llvm/IR/Function.h" #include "llvm/IR/LLVMContext.h" @@ -31,11 +30,9 @@ #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" #include <algorithm> #include <cassert> -#include <climits> #include <cstddef> #include <cstdint> #include <limits> @@ -390,26 +387,15 @@ std::string Attribute::getAsString(bool InAttrGrp) const { // align=4 // alignstack=8 // - if (hasAttribute(Attribute::Alignment)) { - std::string Result; - Result += "align"; - Result += (InAttrGrp) ? "=" : " "; - Result += utostr(getValueAsInt()); - return Result; - } + if (hasAttribute(Attribute::Alignment)) + return (InAttrGrp ? "align=" + Twine(getValueAsInt()) + : "align " + Twine(getValueAsInt())) + .str(); auto AttrWithBytesToString = [&](const char *Name) { - std::string Result; - Result += Name; - if (InAttrGrp) { - Result += "="; - Result += utostr(getValueAsInt()); - } else { - Result += "("; - Result += utostr(getValueAsInt()); - Result += ")"; - } - return Result; + return (InAttrGrp ? Name + ("=" + Twine(getValueAsInt())) + : Name + ("(" + Twine(getValueAsInt())) + ")") + .str(); }; if (hasAttribute(Attribute::StackAlignment)) @@ -426,26 +412,18 @@ std::string Attribute::getAsString(bool InAttrGrp) const { Optional<unsigned> NumElems; std::tie(ElemSize, NumElems) = getAllocSizeArgs(); - std::string Result = "allocsize("; - Result += utostr(ElemSize); - if (NumElems.hasValue()) { - Result += ','; - Result += utostr(*NumElems); - } - Result += ')'; - return Result; + return (NumElems + ? "allocsize(" + Twine(ElemSize) + "," + Twine(*NumElems) + ")" + : "allocsize(" + Twine(ElemSize) + ")") + .str(); } if (hasAttribute(Attribute::VScaleRange)) { unsigned MinValue = getVScaleRangeMin(); Optional<unsigned> MaxValue = getVScaleRangeMax(); - - std::string Result = "vscale_range("; - Result += utostr(MinValue); - Result += ','; - Result += utostr(MaxValue.getValueOr(0)); - Result += ')'; - return Result; + return ("vscale_range(" + Twine(MinValue) + "," + + Twine(MaxValue.getValueOr(0)) + ")") + .str(); } // Convert target-dependent attributes to strings of the form: diff --git a/llvm/lib/IR/AutoUpgrade.cpp b/llvm/lib/IR/AutoUpgrade.cpp index 45459e200b3d..11839c7572e3 100644 --- a/llvm/lib/IR/AutoUpgrade.cpp +++ b/llvm/lib/IR/AutoUpgrade.cpp @@ -15,7 +15,6 @@ #include "llvm/IR/AutoUpgrade.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/IR/Constants.h" -#include "llvm/IR/DIBuilder.h" #include "llvm/IR/DebugInfo.h" #include "llvm/IR/DiagnosticInfo.h" #include "llvm/IR/Function.h" diff --git a/llvm/lib/IR/BasicBlock.cpp b/llvm/lib/IR/BasicBlock.cpp index 7beafc485d09..99e3afaa8ba8 100644 --- a/llvm/lib/IR/BasicBlock.cpp +++ b/llvm/lib/IR/BasicBlock.cpp @@ -12,15 +12,14 @@ #include "llvm/IR/BasicBlock.h" #include "SymbolTableListTraitsImpl.h" -#include "llvm/ADT/Statistic.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/Statistic.h" #include "llvm/IR/CFG.h" #include "llvm/IR/Constants.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Type.h" -#include <algorithm> using namespace llvm; diff --git a/llvm/lib/IR/Comdat.cpp b/llvm/lib/IR/Comdat.cpp index 90d5c6e82e5c..2cd6db913621 100644 --- a/llvm/lib/IR/Comdat.cpp +++ b/llvm/lib/IR/Comdat.cpp @@ -11,11 +11,13 @@ //===----------------------------------------------------------------------===// #include "llvm-c/Comdat.h" -#include "llvm/ADT/StringMap.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/StringMapEntry.h" #include "llvm/ADT/StringRef.h" #include "llvm/IR/Comdat.h" #include "llvm/IR/GlobalObject.h" #include "llvm/IR/Module.h" +#include "llvm/IR/Value.h" using namespace llvm; diff --git a/llvm/lib/IR/ConstantFold.cpp b/llvm/lib/IR/ConstantFold.cpp index 622a984be22c..936b1fc2ff6f 100644 --- a/llvm/lib/IR/ConstantFold.cpp +++ b/llvm/lib/IR/ConstantFold.cpp @@ -30,8 +30,6 @@ #include "llvm/IR/Operator.h" #include "llvm/IR/PatternMatch.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/ManagedStatic.h" -#include "llvm/Support/MathExtras.h" using namespace llvm; using namespace llvm::PatternMatch; diff --git a/llvm/lib/IR/Constants.cpp b/llvm/lib/IR/Constants.cpp index c13990af360e..b862a159127f 100644 --- a/llvm/lib/IR/Constants.cpp +++ b/llvm/lib/IR/Constants.cpp @@ -16,16 +16,19 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringMap.h" +#include "llvm/IR/BasicBlock.h" #include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Function.h" #include "llvm/IR/GetElementPtrTypeIterator.h" +#include "llvm/IR/GlobalAlias.h" +#include "llvm/IR/GlobalIFunc.h" #include "llvm/IR/GlobalValue.h" +#include "llvm/IR/GlobalVariable.h" #include "llvm/IR/Instructions.h" -#include "llvm/IR/Module.h" #include "llvm/IR/Operator.h" #include "llvm/IR/PatternMatch.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/ManagedStatic.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" #include <algorithm> diff --git a/llvm/lib/IR/Core.cpp b/llvm/lib/IR/Core.cpp index 43df15e4d932..7ed156d552b1 100644 --- a/llvm/lib/IR/Core.cpp +++ b/llvm/lib/IR/Core.cpp @@ -13,6 +13,7 @@ #include "llvm-c/Core.h" #include "llvm/IR/Attributes.h" +#include "llvm/IR/BasicBlock.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/DerivedTypes.h" @@ -27,6 +28,7 @@ #include "llvm/IR/LegacyPassManager.h" #include "llvm/IR/Module.h" #include "llvm/InitializePasses.h" +#include "llvm/PassRegistry.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/FileSystem.h" diff --git a/llvm/lib/IR/DIBuilder.cpp b/llvm/lib/IR/DIBuilder.cpp index a6e84dfbe1dd..dc5768dd4f26 100644 --- a/llvm/lib/IR/DIBuilder.cpp +++ b/llvm/lib/IR/DIBuilder.cpp @@ -13,12 +13,10 @@ #include "llvm/IR/DIBuilder.h" #include "LLVMContextImpl.h" #include "llvm/ADT/Optional.h" -#include "llvm/ADT/STLExtras.h" #include "llvm/BinaryFormat/Dwarf.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DebugInfo.h" #include "llvm/IR/IRBuilder.h" -#include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Module.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" diff --git a/llvm/lib/IR/DataLayout.cpp b/llvm/lib/IR/DataLayout.cpp index 61b2b13bfd03..96f55cf14de8 100644 --- a/llvm/lib/IR/DataLayout.cpp +++ b/llvm/lib/IR/DataLayout.cpp @@ -30,12 +30,13 @@ #include "llvm/Support/Error.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" +#include "llvm/Support/MemAlloc.h" #include "llvm/Support/TypeSize.h" #include <algorithm> #include <cassert> #include <cstdint> #include <cstdlib> -#include <tuple> +#include <new> #include <utility> using namespace llvm; diff --git a/llvm/lib/IR/DebugInfo.cpp b/llvm/lib/IR/DebugInfo.cpp index 98f25b035157..fd4b4170c0a7 100644 --- a/llvm/lib/IR/DebugInfo.cpp +++ b/llvm/lib/IR/DebugInfo.cpp @@ -14,17 +14,16 @@ #include "llvm-c/DebugInfo.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseSet.h" -#include "llvm/ADT/None.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/Constants.h" +#include "llvm/IR/DIBuilder.h" +#include "llvm/IR/DebugInfo.h" #include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/DebugLoc.h" -#include "llvm/IR/DebugInfo.h" -#include "llvm/IR/DIBuilder.h" #include "llvm/IR/Function.h" #include "llvm/IR/GVMaterializer.h" #include "llvm/IR/Instruction.h" diff --git a/llvm/lib/IR/DebugInfoMetadata.cpp b/llvm/lib/IR/DebugInfoMetadata.cpp index 59afb844eb89..b9fc5261fefe 100644 --- a/llvm/lib/IR/DebugInfoMetadata.cpp +++ b/llvm/lib/IR/DebugInfoMetadata.cpp @@ -15,9 +15,9 @@ #include "MetadataImpl.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/StringSwitch.h" -#include "llvm/IR/DIBuilder.h" #include "llvm/IR/Function.h" -#include "llvm/IR/Instructions.h" +#include "llvm/IR/Type.h" +#include "llvm/IR/Value.h" #include <numeric> diff --git a/llvm/lib/IR/DebugLoc.cpp b/llvm/lib/IR/DebugLoc.cpp index 993f3a39e6ff..34c9d026b19a 100644 --- a/llvm/lib/IR/DebugLoc.cpp +++ b/llvm/lib/IR/DebugLoc.cpp @@ -7,7 +7,6 @@ //===----------------------------------------------------------------------===// #include "llvm/IR/DebugLoc.h" -#include "LLVMContextImpl.h" #include "llvm/Config/llvm-config.h" #include "llvm/IR/DebugInfo.h" using namespace llvm; diff --git a/llvm/lib/IR/DiagnosticInfo.cpp b/llvm/lib/IR/DiagnosticInfo.cpp index 0a872a81f911..f46f0fdd947d 100644 --- a/llvm/lib/IR/DiagnosticInfo.cpp +++ b/llvm/lib/IR/DiagnosticInfo.cpp @@ -12,7 +12,6 @@ //===----------------------------------------------------------------------===// #include "llvm/IR/DiagnosticInfo.h" -#include "LLVMContextImpl.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/Twine.h" #include "llvm/ADT/iterator_range.h" @@ -24,22 +23,19 @@ #include "llvm/IR/Function.h" #include "llvm/IR/GlobalValue.h" #include "llvm/IR/Instruction.h" +#include "llvm/IR/Instructions.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Metadata.h" #include "llvm/IR/Module.h" #include "llvm/IR/Type.h" #include "llvm/IR/Value.h" #include "llvm/Support/Casting.h" -#include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/InstructionCost.h" #include "llvm/Support/Path.h" -#include "llvm/Support/Regex.h" #include "llvm/Support/ScopedPrinter.h" #include "llvm/Support/raw_ostream.h" #include <atomic> -#include <cassert> -#include <memory> #include <string> using namespace llvm; diff --git a/llvm/lib/IR/Dominators.cpp b/llvm/lib/IR/Dominators.cpp index ace708b252c7..aac8936c7bd6 100644 --- a/llvm/lib/IR/Dominators.cpp +++ b/llvm/lib/IR/Dominators.cpp @@ -14,19 +14,27 @@ //===----------------------------------------------------------------------===// #include "llvm/IR/Dominators.h" -#include "llvm/ADT/DepthFirstIterator.h" -#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/StringRef.h" #include "llvm/Config/llvm-config.h" #include "llvm/IR/CFG.h" -#include "llvm/IR/Constants.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/PassManager.h" #include "llvm/InitializePasses.h" +#include "llvm/PassRegistry.h" +#include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" -#include "llvm/Support/GenericDomTreeConstruction.h" #include "llvm/Support/raw_ostream.h" -#include <algorithm> + +#include <cassert> + +namespace llvm { +class Argument; +class Constant; +class Value; +} // namespace llvm using namespace llvm; bool llvm::VerifyDomInfo = false; diff --git a/llvm/lib/IR/Function.cpp b/llvm/lib/IR/Function.cpp index 1e874d7afa79..726ba80da41b 100644 --- a/llvm/lib/IR/Function.cpp +++ b/llvm/lib/IR/Function.cpp @@ -30,7 +30,6 @@ #include "llvm/IR/GlobalValue.h" #include "llvm/IR/InstIterator.h" #include "llvm/IR/Instruction.h" -#include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Intrinsics.h" #include "llvm/IR/IntrinsicsAArch64.h" @@ -63,7 +62,6 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/ErrorHandling.h" -#include <algorithm> #include <cassert> #include <cstddef> #include <cstdint> diff --git a/llvm/lib/IR/Globals.cpp b/llvm/lib/IR/Globals.cpp index c832499dde06..47e8bc0a916d 100644 --- a/llvm/lib/IR/Globals.cpp +++ b/llvm/lib/IR/Globals.cpp @@ -12,7 +12,6 @@ //===----------------------------------------------------------------------===// #include "LLVMContextImpl.h" -#include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/Triple.h" #include "llvm/IR/ConstantRange.h" #include "llvm/IR/Constants.h" @@ -21,7 +20,6 @@ #include "llvm/IR/GlobalValue.h" #include "llvm/IR/GlobalVariable.h" #include "llvm/IR/Module.h" -#include "llvm/IR/Operator.h" #include "llvm/Support/Error.h" #include "llvm/Support/ErrorHandling.h" using namespace llvm; diff --git a/llvm/lib/IR/IRBuilder.cpp b/llvm/lib/IR/IRBuilder.cpp index 27528a69be21..4e8f1b506811 100644 --- a/llvm/lib/IR/IRBuilder.cpp +++ b/llvm/lib/IR/IRBuilder.cpp @@ -29,7 +29,6 @@ #include "llvm/IR/Type.h" #include "llvm/IR/Value.h" #include "llvm/Support/Casting.h" -#include "llvm/Support/MathExtras.h" #include <cassert> #include <cstdint> #include <vector> diff --git a/llvm/lib/IR/InlineAsm.cpp b/llvm/lib/IR/InlineAsm.cpp index a0c48781ced5..203ad6dae1ff 100644 --- a/llvm/lib/IR/InlineAsm.cpp +++ b/llvm/lib/IR/InlineAsm.cpp @@ -22,7 +22,6 @@ #include <algorithm> #include <cassert> #include <cctype> -#include <cstddef> #include <cstdlib> using namespace llvm; diff --git a/llvm/lib/IR/Instruction.cpp b/llvm/lib/IR/Instruction.cpp index 59b7221d1fa2..36a20679863b 100644 --- a/llvm/lib/IR/Instruction.cpp +++ b/llvm/lib/IR/Instruction.cpp @@ -16,7 +16,6 @@ #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Intrinsics.h" -#include "llvm/IR/MDBuilder.h" #include "llvm/IR/Operator.h" #include "llvm/IR/Type.h" using namespace llvm; diff --git a/llvm/lib/IR/IntrinsicInst.cpp b/llvm/lib/IR/IntrinsicInst.cpp index adea7abb75cf..e27758c5de02 100644 --- a/llvm/lib/IR/IntrinsicInst.cpp +++ b/llvm/lib/IR/IntrinsicInst.cpp @@ -24,14 +24,12 @@ #include "llvm/ADT/StringSwitch.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DebugInfoMetadata.h" -#include "llvm/IR/GlobalVariable.h" #include "llvm/IR/Metadata.h" #include "llvm/IR/Module.h" #include "llvm/IR/Operator.h" #include "llvm/IR/PatternMatch.h" #include "llvm/IR/Statepoint.h" -#include "llvm/Support/raw_ostream.h" using namespace llvm; //===----------------------------------------------------------------------===// diff --git a/llvm/lib/IR/LLVMContext.cpp b/llvm/lib/IR/LLVMContext.cpp index 90716d9c81a6..e19ead98a616 100644 --- a/llvm/lib/IR/LLVMContext.cpp +++ b/llvm/lib/IR/LLVMContext.cpp @@ -20,8 +20,6 @@ #include "llvm/IR/DiagnosticInfo.h" #include "llvm/IR/DiagnosticPrinter.h" #include "llvm/IR/LLVMRemarkStreamer.h" -#include "llvm/IR/Metadata.h" -#include "llvm/IR/Module.h" #include "llvm/Remarks/RemarkStreamer.h" #include "llvm/Support/Casting.h" #include "llvm/Support/ErrorHandling.h" diff --git a/llvm/lib/IR/LLVMContextImpl.cpp b/llvm/lib/IR/LLVMContextImpl.cpp index ebbf382aea38..8f9530290459 100644 --- a/llvm/lib/IR/LLVMContextImpl.cpp +++ b/llvm/lib/IR/LLVMContextImpl.cpp @@ -11,12 +11,24 @@ //===----------------------------------------------------------------------===// #include "LLVMContextImpl.h" +#include "AttributeImpl.h" #include "llvm/ADT/SetVector.h" +#include "llvm/ADT/StringMapEntry.h" +#include "llvm/ADT/iterator.h" +#include "llvm/ADT/iterator_range.h" +#include "llvm/IR/DiagnosticHandler.h" +#include "llvm/IR/LLVMRemarkStreamer.h" #include "llvm/IR/Module.h" #include "llvm/IR/OptBisect.h" #include "llvm/IR/Type.h" +#include "llvm/IR/Use.h" +#include "llvm/IR/User.h" +#include "llvm/Remarks/RemarkStreamer.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/ManagedStatic.h" +#include "llvm/Support/TypeSize.h" #include <cassert> #include <utility> diff --git a/llvm/lib/IR/LLVMContextImpl.h b/llvm/lib/IR/LLVMContextImpl.h index 0b5f928165e8..70242f4d8f20 100644 --- a/llvm/lib/IR/LLVMContextImpl.h +++ b/llvm/lib/IR/LLVMContextImpl.h @@ -14,7 +14,6 @@ #ifndef LLVM_LIB_IR_LLVMCONTEXTIMPL_H #define LLVM_LIB_IR_LLVMCONTEXTIMPL_H -#include "AttributeImpl.h" #include "ConstantsContext.h" #include "llvm/ADT/APFloat.h" #include "llvm/ADT/APInt.h" @@ -34,13 +33,14 @@ #include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/LLVMContext.h" -#include "llvm/IR/LLVMRemarkStreamer.h" #include "llvm/IR/Metadata.h" +#include "llvm/IR/Module.h" #include "llvm/IR/TrackingMDRef.h" +#include "llvm/IR/Type.h" +#include "llvm/IR/Value.h" #include "llvm/Support/Allocator.h" #include "llvm/Support/Casting.h" #include "llvm/Support/StringSaver.h" -#include "llvm/Support/YAMLTraits.h" #include <algorithm> #include <cassert> #include <cstddef> @@ -52,9 +52,23 @@ namespace llvm { +class AttributeImpl; +class AttributeListImpl; +class AttributeSetNode; +class BasicBlock; +struct DiagnosticHandler; +class ElementCount; +class Function; +class GlobalObject; +class GlobalValue; +class InlineAsm; +class LLVMRemarkStreamer; +class OptPassGate; +namespace remarks { +class RemarkStreamer; +} +template <typename T> class StringMapEntry; class StringRef; -class Type; -class Value; class ValueHandleBase; using DenseMapAPIntKeyInfo = DenseMapInfo<APInt>; diff --git a/llvm/lib/IR/LLVMRemarkStreamer.cpp b/llvm/lib/IR/LLVMRemarkStreamer.cpp index 21ce47457f52..f7e2aa4e9a35 100644 --- a/llvm/lib/IR/LLVMRemarkStreamer.cpp +++ b/llvm/lib/IR/LLVMRemarkStreamer.cpp @@ -15,7 +15,9 @@ #include "llvm/IR/DiagnosticInfo.h" #include "llvm/IR/Function.h" #include "llvm/IR/GlobalValue.h" +#include "llvm/Remarks/RemarkStreamer.h" #include "llvm/Support/FileSystem.h" +#include "llvm/Support/ToolOutputFile.h" using namespace llvm; diff --git a/llvm/lib/IR/LegacyPassManager.cpp b/llvm/lib/IR/LegacyPassManager.cpp index 4357c95aa9f6..08cf909a83f9 100644 --- a/llvm/lib/IR/LegacyPassManager.cpp +++ b/llvm/lib/IR/LegacyPassManager.cpp @@ -12,28 +12,27 @@ #include "llvm/IR/LegacyPassManager.h" #include "llvm/ADT/MapVector.h" -#include "llvm/ADT/Statistic.h" #include "llvm/IR/DiagnosticInfo.h" #include "llvm/IR/IRPrintingPasses.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/LegacyPassManagers.h" -#include "llvm/IR/LegacyPassNameParser.h" #include "llvm/IR/Module.h" #include "llvm/IR/PassTimingInfo.h" #include "llvm/IR/PrintPasses.h" -#include "llvm/IR/StructuralHash.h" #include "llvm/Support/Chrono.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/Error.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/ManagedStatic.h" -#include "llvm/Support/Mutex.h" #include "llvm/Support/TimeProfiler.h" #include "llvm/Support/Timer.h" #include "llvm/Support/raw_ostream.h" #include <algorithm> -#include <unordered_set> + +#ifdef EXPENSIVE_CHECKS +#include "llvm/IR/StructuralHash.h" +#endif + using namespace llvm; // See PassManagers.h for Pass Manager infrastructure overview. diff --git a/llvm/lib/IR/Metadata.cpp b/llvm/lib/IR/Metadata.cpp index ebcc493407cc..226718ecac28 100644 --- a/llvm/lib/IR/Metadata.cpp +++ b/llvm/lib/IR/Metadata.cpp @@ -13,7 +13,6 @@ #include "llvm/IR/Metadata.h" #include "LLVMContextImpl.h" #include "MetadataImpl.h" -#include "SymbolTableListTraitsImpl.h" #include "llvm/ADT/APFloat.h" #include "llvm/ADT/APInt.h" #include "llvm/ADT/ArrayRef.h" @@ -44,7 +43,6 @@ #include "llvm/IR/TrackingMDRef.h" #include "llvm/IR/Type.h" #include "llvm/IR/Value.h" -#include "llvm/IR/ValueHandle.h" #include "llvm/Support/Casting.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" @@ -52,8 +50,6 @@ #include <cassert> #include <cstddef> #include <cstdint> -#include <iterator> -#include <tuple> #include <type_traits> #include <utility> #include <vector> diff --git a/llvm/lib/IR/Module.cpp b/llvm/lib/IR/Module.cpp index a0485a59d0e0..4974b372db2a 100644 --- a/llvm/lib/IR/Module.cpp +++ b/llvm/lib/IR/Module.cpp @@ -13,7 +13,6 @@ #include "llvm/IR/Module.h" #include "SymbolTableListTraitsImpl.h" #include "llvm/ADT/Optional.h" -#include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringMap.h" @@ -39,7 +38,6 @@ #include "llvm/IR/TypeFinder.h" #include "llvm/IR/Value.h" #include "llvm/IR/ValueSymbolTable.h" -#include "llvm/Pass.h" #include "llvm/Support/Casting.h" #include "llvm/Support/CodeGen.h" #include "llvm/Support/Error.h" diff --git a/llvm/lib/IR/ModuleSummaryIndex.cpp b/llvm/lib/IR/ModuleSummaryIndex.cpp index a0ac7d3ad7d3..0ca40a675fe4 100644 --- a/llvm/lib/IR/ModuleSummaryIndex.cpp +++ b/llvm/lib/IR/ModuleSummaryIndex.cpp @@ -14,7 +14,6 @@ #include "llvm/IR/ModuleSummaryIndex.h" #include "llvm/ADT/SCCIterator.h" #include "llvm/ADT/Statistic.h" -#include "llvm/ADT/StringMap.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Path.h" #include "llvm/Support/raw_ostream.h" diff --git a/llvm/lib/IR/Operator.cpp b/llvm/lib/IR/Operator.cpp index 08c1fc931e2e..c2a4a7c29915 100644 --- a/llvm/lib/IR/Operator.cpp +++ b/llvm/lib/IR/Operator.cpp @@ -14,7 +14,6 @@ #include "llvm/IR/DataLayout.h" #include "llvm/IR/GetElementPtrTypeIterator.h" #include "llvm/IR/Instructions.h" -#include "llvm/IR/Type.h" #include "ConstantsContext.h" diff --git a/llvm/lib/IR/OptBisect.cpp b/llvm/lib/IR/OptBisect.cpp index 55c0dbad5aab..418311eac814 100644 --- a/llvm/lib/IR/OptBisect.cpp +++ b/llvm/lib/IR/OptBisect.cpp @@ -17,7 +17,6 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/raw_ostream.h" #include <cassert> -#include <limits> using namespace llvm; diff --git a/llvm/lib/IR/PassManager.cpp b/llvm/lib/IR/PassManager.cpp index d933003ccdf7..3025c3853d5f 100644 --- a/llvm/lib/IR/PassManager.cpp +++ b/llvm/lib/IR/PassManager.cpp @@ -7,10 +7,9 @@ //===----------------------------------------------------------------------===// #include "llvm/IR/PassManager.h" -#include "llvm/ADT/STLExtras.h" -#include "llvm/IR/LLVMContext.h" +#include "llvm/ADT/DenseMapInfo.h" +#include "llvm/ADT/Optional.h" #include "llvm/IR/PassManagerImpl.h" -#include "llvm/Support/CommandLine.h" using namespace llvm; diff --git a/llvm/lib/IR/ProfileSummary.cpp b/llvm/lib/IR/ProfileSummary.cpp index 05d5ac2c5ddf..9f7335ecbe44 100644 --- a/llvm/lib/IR/ProfileSummary.cpp +++ b/llvm/lib/IR/ProfileSummary.cpp @@ -12,9 +12,8 @@ //===----------------------------------------------------------------------===// #include "llvm/IR/ProfileSummary.h" -#include "llvm/IR/Attributes.h" #include "llvm/IR/Constants.h" -#include "llvm/IR/Function.h" +#include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Metadata.h" #include "llvm/IR/Type.h" #include "llvm/Support/Casting.h" diff --git a/llvm/lib/IR/PseudoProbe.cpp b/llvm/lib/IR/PseudoProbe.cpp index 101cada77ff9..5cad887b295d 100644 --- a/llvm/lib/IR/PseudoProbe.cpp +++ b/llvm/lib/IR/PseudoProbe.cpp @@ -15,7 +15,7 @@ #include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/Instruction.h" -#include <unordered_set> +#include "llvm/IR/IntrinsicInst.h" using namespace llvm; diff --git a/llvm/lib/IR/ReplaceConstant.cpp b/llvm/lib/IR/ReplaceConstant.cpp index cfd8deba5a53..d2f676192e7f 100644 --- a/llvm/lib/IR/ReplaceConstant.cpp +++ b/llvm/lib/IR/ReplaceConstant.cpp @@ -12,9 +12,8 @@ //===----------------------------------------------------------------------===// #include "llvm/IR/ReplaceConstant.h" -#include "llvm/IR/IRBuilder.h" +#include "llvm/ADT/SmallPtrSet.h" #include "llvm/IR/Instructions.h" -#include "llvm/IR/NoFolder.h" #include "llvm/IR/ValueMap.h" namespace llvm { diff --git a/llvm/lib/IR/SSAContext.cpp b/llvm/lib/IR/SSAContext.cpp index a96e39f32882..5b865692dd7f 100644 --- a/llvm/lib/IR/SSAContext.cpp +++ b/llvm/lib/IR/SSAContext.cpp @@ -13,10 +13,11 @@ //===----------------------------------------------------------------------===// #include "llvm/IR/SSAContext.h" -#include "llvm/IR/Argument.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/Function.h" #include "llvm/IR/Instruction.h" +#include "llvm/IR/ModuleSlotTracker.h" +#include "llvm/IR/Value.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; diff --git a/llvm/lib/IR/SafepointIRVerifier.cpp b/llvm/lib/IR/SafepointIRVerifier.cpp index 2117527a64f0..d8634e0ac7dd 100644 --- a/llvm/lib/IR/SafepointIRVerifier.cpp +++ b/llvm/lib/IR/SafepointIRVerifier.cpp @@ -38,10 +38,8 @@ #include "llvm/IR/BasicBlock.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" +#include "llvm/IR/InstrTypes.h" #include "llvm/IR/Instructions.h" -#include "llvm/IR/IntrinsicInst.h" -#include "llvm/IR/Intrinsics.h" -#include "llvm/IR/Module.h" #include "llvm/IR/Statepoint.h" #include "llvm/IR/Value.h" #include "llvm/InitializePasses.h" diff --git a/llvm/lib/IR/Statepoint.cpp b/llvm/lib/IR/Statepoint.cpp index b5916e4937c6..508e3cb71ed2 100644 --- a/llvm/lib/IR/Statepoint.cpp +++ b/llvm/lib/IR/Statepoint.cpp @@ -13,8 +13,6 @@ #include "llvm/IR/Statepoint.h" -#include "llvm/IR/Function.h" - using namespace llvm; bool llvm::isStatepointDirectiveAttr(Attribute Attr) { diff --git a/llvm/lib/IR/Type.cpp b/llvm/lib/IR/Type.cpp index d59d87ad631b..85b658c8a52f 100644 --- a/llvm/lib/IR/Type.cpp +++ b/llvm/lib/IR/Type.cpp @@ -21,10 +21,8 @@ #include "llvm/IR/Constants.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/LLVMContext.h" -#include "llvm/IR/Module.h" #include "llvm/IR/Value.h" #include "llvm/Support/Casting.h" -#include "llvm/Support/MathExtras.h" #include "llvm/Support/TypeSize.h" #include "llvm/Support/raw_ostream.h" #include <cassert> diff --git a/llvm/lib/IR/Use.cpp b/llvm/lib/IR/Use.cpp index 99049c0232aa..601a9df5279e 100644 --- a/llvm/lib/IR/Use.cpp +++ b/llvm/lib/IR/Use.cpp @@ -8,11 +8,13 @@ #include "llvm/IR/Use.h" #include "llvm/IR/User.h" -#include "llvm/IR/Value.h" -#include <new> namespace llvm { +class User; +template <typename> struct simplify_type; +class Value; + void Use::swap(Use &RHS) { if (Val == RHS.Val) return; diff --git a/llvm/lib/IR/Value.cpp b/llvm/lib/IR/Value.cpp index 8741ed917f9f..18aef37e2023 100644 --- a/llvm/lib/IR/Value.cpp +++ b/llvm/lib/IR/Value.cpp @@ -13,7 +13,6 @@ #include "llvm/IR/Value.h" #include "LLVMContextImpl.h" #include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallString.h" #include "llvm/IR/Constant.h" #include "llvm/IR/Constants.h" @@ -21,7 +20,6 @@ #include "llvm/IR/DebugInfo.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/DerivedUser.h" -#include "llvm/IR/GetElementPtrTypeIterator.h" #include "llvm/IR/InstrTypes.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" @@ -32,7 +30,6 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/ManagedStatic.h" #include "llvm/Support/raw_ostream.h" #include <algorithm> diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp index b84edb789405..989d01e2e395 100644 --- a/llvm/lib/IR/Verifier.cpp +++ b/llvm/lib/IR/Verifier.cpp @@ -58,7 +58,6 @@ #include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Twine.h" -#include "llvm/ADT/ilist.h" #include "llvm/BinaryFormat/Dwarf.h" #include "llvm/IR/Argument.h" #include "llvm/IR/Attributes.h" @@ -70,7 +69,6 @@ #include "llvm/IR/ConstantRange.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" -#include "llvm/IR/DebugInfo.h" #include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/DebugLoc.h" #include "llvm/IR/DerivedTypes.h" @@ -5811,15 +5809,11 @@ void Verifier::verifyAttachedCallBundle(const CallBase &Call, "void return type", Call); - Assert((BU.Inputs.empty() || - (BU.Inputs.size() == 1 && isa<Function>(BU.Inputs.front()))), - "operand bundle \"clang.arc.attachedcall\" can take either no " - "arguments or one function as an argument", + Assert(BU.Inputs.size() == 1 && isa<Function>(BU.Inputs.front()), + "operand bundle \"clang.arc.attachedcall\" requires one function as " + "an argument", Call); - if (BU.Inputs.empty()) - return; - auto *Fn = cast<Function>(BU.Inputs.front()); Intrinsic::ID IID = Fn->getIntrinsicID(); diff --git a/llvm/lib/LTO/LTO.cpp b/llvm/lib/LTO/LTO.cpp index f26ef4b21996..418aad26fdd6 100644 --- a/llvm/lib/LTO/LTO.cpp +++ b/llvm/lib/LTO/LTO.cpp @@ -46,6 +46,7 @@ #include "llvm/Support/ThreadPool.h" #include "llvm/Support/Threading.h" #include "llvm/Support/TimeProfiler.h" +#include "llvm/Support/ToolOutputFile.h" #include "llvm/Support/VCSRevision.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetMachine.h" @@ -1372,7 +1373,7 @@ public: sys::fs::OpenFlags::OF_None); if (EC) return errorCodeToError(EC); - WriteIndexToFile(CombinedIndex, OS, &ModuleToSummariesForIndex); + writeIndexToFile(CombinedIndex, OS, &ModuleToSummariesForIndex); if (ShouldEmitImportsFiles) { EC = EmitImportsFiles(ModulePath, NewModulePath + ".imports", diff --git a/llvm/lib/LTO/LTOBackend.cpp b/llvm/lib/LTO/LTOBackend.cpp index 7694c9848384..3877def53c3f 100644 --- a/llvm/lib/LTO/LTOBackend.cpp +++ b/llvm/lib/LTO/LTOBackend.cpp @@ -38,6 +38,7 @@ #include "llvm/Support/Path.h" #include "llvm/Support/Program.h" #include "llvm/Support/ThreadPool.h" +#include "llvm/Support/ToolOutputFile.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Transforms/IPO.h" @@ -144,7 +145,7 @@ Error Config::addSaveTemps(std::string OutputFileName, // directly and exit. if (EC) reportOpenError(Path, EC.message()); - WriteIndexToFile(Index, OS); + writeIndexToFile(Index, OS); Path = OutputFileName + "index.dot"; raw_fd_ostream OSDot(Path, EC, sys::fs::OpenFlags::OF_None); @@ -359,7 +360,7 @@ bool lto::opt(const Config &Conf, TargetMachine *TM, unsigned Task, Module &Mod, LLVM_DEBUG( dbgs() << "Post-(Thin)LTO merge bitcode embedding was requested, but " "command line arguments are not available"); - llvm::EmbedBitcodeInModule(Mod, llvm::MemoryBufferRef(), + llvm::embedBitcodeInModule(Mod, llvm::MemoryBufferRef(), /*EmbedBitcode*/ true, /*EmbedCmdline*/ true, /*Cmdline*/ CmdArgs); } @@ -380,7 +381,7 @@ static void codegen(const Config &Conf, TargetMachine *TM, return; if (EmbedBitcode == LTOBitcodeEmbedding::EmbedOptimized) - llvm::EmbedBitcodeInModule(Mod, llvm::MemoryBufferRef(), + llvm::embedBitcodeInModule(Mod, llvm::MemoryBufferRef(), /*EmbedBitcode*/ true, /*EmbedCmdline*/ false, /*CmdArgs*/ std::vector<uint8_t>()); diff --git a/llvm/lib/LTO/ThinLTOCodeGenerator.cpp b/llvm/lib/LTO/ThinLTOCodeGenerator.cpp index 9aea27f0fdba..37e85b6af6ba 100644 --- a/llvm/lib/LTO/ThinLTOCodeGenerator.cpp +++ b/llvm/lib/LTO/ThinLTOCodeGenerator.cpp @@ -1052,7 +1052,7 @@ void ThinLTOCodeGenerator::run() { if (EC) report_fatal_error(Twine("Failed to open ") + SaveTempPath + " to save optimized bitcode\n"); - WriteIndexToFile(*Index, OS); + writeIndexToFile(*Index, OS); } diff --git a/llvm/lib/MC/MCAsmStreamer.cpp b/llvm/lib/MC/MCAsmStreamer.cpp index 119237bb052e..61ec941f50b8 100644 --- a/llvm/lib/MC/MCAsmStreamer.cpp +++ b/llvm/lib/MC/MCAsmStreamer.cpp @@ -788,7 +788,7 @@ void MCAsmStreamer::emitSyntaxDirective() { } void MCAsmStreamer::BeginCOFFSymbolDef(const MCSymbol *Symbol) { - OS << "\t.def\t "; + OS << "\t.def\t"; Symbol->print(OS, MAI); OS << ';'; EmitEOL(); diff --git a/llvm/lib/MC/MCContext.cpp b/llvm/lib/MC/MCContext.cpp index 7f639e9c408f..eafcee1e0607 100644 --- a/llvm/lib/MC/MCContext.cpp +++ b/llvm/lib/MC/MCContext.cpp @@ -67,10 +67,10 @@ static void defaultDiagHandler(const SMDiagnostic &SMD, bool, const SourceMgr &, MCContext::MCContext(const Triple &TheTriple, const MCAsmInfo *mai, const MCRegisterInfo *mri, const MCSubtargetInfo *msti, const SourceMgr *mgr, MCTargetOptions const *TargetOpts, - bool DoAutoReset) - : TT(TheTriple), SrcMgr(mgr), InlineSrcMgr(nullptr), - DiagHandler(defaultDiagHandler), MAI(mai), MRI(mri), MSTI(msti), - Symbols(Allocator), UsedNames(Allocator), + bool DoAutoReset, StringRef Swift5ReflSegmentName) + : Swift5ReflectionSegmentName(Swift5ReflSegmentName), TT(TheTriple), + SrcMgr(mgr), InlineSrcMgr(nullptr), DiagHandler(defaultDiagHandler), + MAI(mai), MRI(mri), MSTI(msti), Symbols(Allocator), UsedNames(Allocator), InlineAsmUsedLabelNames(Allocator), CurrentDwarfLoc(0, 0, 0, DWARF2_FLAG_IS_STMT, 0, 0), AutoReset(DoAutoReset), TargetOptions(TargetOpts) { diff --git a/llvm/lib/MC/MCObjectFileInfo.cpp b/llvm/lib/MC/MCObjectFileInfo.cpp index d7f85f793c55..b7890e7f0937 100644 --- a/llvm/lib/MC/MCObjectFileInfo.cpp +++ b/llvm/lib/MC/MCObjectFileInfo.cpp @@ -299,6 +299,18 @@ void MCObjectFileInfo::initMachOMCObjectFileInfo(const Triple &T) { RemarksSection = Ctx->getMachOSection( "__LLVM", "__remarks", MachO::S_ATTR_DEBUG, SectionKind::getMetadata()); + // The architecture of dsymutil makes it very difficult to copy the Swift + // reflection metadata sections into the __TEXT segment, so dsymutil creates + // these sections in the __DWARF segment instead. + if (!Ctx->getSwift5ReflectionSegmentName().empty()) { +#define HANDLE_SWIFT_SECTION(KIND, MACHO, ELF, COFF) \ + Swift5ReflectionSections \ + [llvm::binaryformat::Swift5ReflectionSectionKind::KIND] = \ + Ctx->getMachOSection(Ctx->getSwift5ReflectionSegmentName().data(), \ + MACHO, 0, SectionKind::getMetadata()); +#include "llvm/BinaryFormat/Swift.def" + } + TLSExtraDataSection = TLSTLVSection; } diff --git a/llvm/lib/Object/MachOObjectFile.cpp b/llvm/lib/Object/MachOObjectFile.cpp index 42e257516f4e..3d95b18f4672 100644 --- a/llvm/lib/Object/MachOObjectFile.cpp +++ b/llvm/lib/Object/MachOObjectFile.cpp @@ -20,6 +20,7 @@ #include "llvm/ADT/Triple.h" #include "llvm/ADT/Twine.h" #include "llvm/BinaryFormat/MachO.h" +#include "llvm/BinaryFormat/Swift.h" #include "llvm/Object/Error.h" #include "llvm/Object/MachO.h" #include "llvm/Object/ObjectFile.h" @@ -4765,3 +4766,15 @@ MachOObjectFile::findDsymObjectMembers(StringRef Path) { Path.str().c_str()); return ObjectPaths; } + +llvm::binaryformat::Swift5ReflectionSectionKind +MachOObjectFile::mapReflectionSectionNameToEnumValue( + StringRef SectionName) const { +#define HANDLE_SWIFT_SECTION(KIND, MACHO, ELF, COFF) \ + .Case(MACHO, llvm::binaryformat::Swift5ReflectionSectionKind::KIND) + return StringSwitch<llvm::binaryformat::Swift5ReflectionSectionKind>( + SectionName) +#include "llvm/BinaryFormat/Swift.def" + .Default(llvm::binaryformat::Swift5ReflectionSectionKind::unknown); +#undef HANDLE_SWIFT_SECTION +} diff --git a/llvm/lib/ObjectYAML/ELFYAML.cpp b/llvm/lib/ObjectYAML/ELFYAML.cpp index ffe2599beaf8..d597148b98ab 100644 --- a/llvm/lib/ObjectYAML/ELFYAML.cpp +++ b/llvm/lib/ObjectYAML/ELFYAML.cpp @@ -579,6 +579,7 @@ void ScalarBitSetTraits<ELFYAML::ELF_EF>::bitset(IO &IO, BCase(EF_AMDGPU_FEATURE_SRAMECC_V3); break; case ELF::ELFABIVERSION_AMDGPU_HSA_V4: + case ELF::ELFABIVERSION_AMDGPU_HSA_V5: BCaseMask(EF_AMDGPU_FEATURE_XNACK_UNSUPPORTED_V4, EF_AMDGPU_FEATURE_XNACK_V4); BCaseMask(EF_AMDGPU_FEATURE_XNACK_ANY_V4, diff --git a/llvm/lib/ObjectYAML/WasmEmitter.cpp b/llvm/lib/ObjectYAML/WasmEmitter.cpp index 80a8c56f6912..2aa2ef3e5541 100644 --- a/llvm/lib/ObjectYAML/WasmEmitter.cpp +++ b/llvm/lib/ObjectYAML/WasmEmitter.cpp @@ -585,19 +585,8 @@ void WasmWriter::writeRelocSection(raw_ostream &OS, WasmYAML::Section &Sec, writeUint8(OS, Reloc.Type); encodeULEB128(Reloc.Offset, OS); encodeULEB128(Reloc.Index, OS); - switch (Reloc.Type) { - case wasm::R_WASM_MEMORY_ADDR_LEB: - case wasm::R_WASM_MEMORY_ADDR_LEB64: - case wasm::R_WASM_MEMORY_ADDR_SLEB: - case wasm::R_WASM_MEMORY_ADDR_SLEB64: - case wasm::R_WASM_MEMORY_ADDR_I32: - case wasm::R_WASM_MEMORY_ADDR_I64: - case wasm::R_WASM_FUNCTION_OFFSET_I32: - case wasm::R_WASM_FUNCTION_OFFSET_I64: - case wasm::R_WASM_SECTION_OFFSET_I32: + if (wasm::relocTypeHasAddend(Reloc.Type)) encodeSLEB128(Reloc.Addend, OS); - break; - } } } diff --git a/llvm/lib/Passes/PassBuilderPipelines.cpp b/llvm/lib/Passes/PassBuilderPipelines.cpp index 6110bda02406..93637c890c4f 100644 --- a/llvm/lib/Passes/PassBuilderPipelines.cpp +++ b/llvm/lib/Passes/PassBuilderPipelines.cpp @@ -1454,6 +1454,9 @@ PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level, MPM.addPass(RequireAnalysisPass<ProfileSummaryAnalysis, Module>()); } + // Try to run OpenMP optimizations, quick no-op if no OpenMP metadata present. + MPM.addPass(OpenMPOptPass()); + // Remove unused virtual tables to improve the quality of code generated by // whole-program devirtualization and bitset lowering. MPM.addPass(GlobalDCEPass()); @@ -1648,6 +1651,10 @@ PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level, addVectorPasses(Level, MainFPM, /* IsFullLTO */ true); + // Run the OpenMPOpt CGSCC pass again late. + MPM.addPass( + createModuleToPostOrderCGSCCPassAdaptor(OpenMPOptCGSCCPass())); + invokePeepholeEPCallbacks(MainFPM, Level); MainFPM.addPass(JumpThreadingPass(/*InsertFreezeWhenUnfoldingSelect*/ true)); MPM.addPass(createModuleToFunctionPassAdaptor(std::move(MainFPM), diff --git a/llvm/lib/ProfileData/InstrProf.cpp b/llvm/lib/ProfileData/InstrProf.cpp index 051655e1fed6..07d467305ae5 100644 --- a/llvm/lib/ProfileData/InstrProf.cpp +++ b/llvm/lib/ProfileData/InstrProf.cpp @@ -1181,32 +1181,6 @@ bool canRenameComdatFunc(const Function &F, bool CheckAddressTaken) { return true; } -// Create a COMDAT variable INSTR_PROF_RAW_VERSION_VAR to make the runtime -// aware this is an ir_level profile so it can set the version flag. -GlobalVariable *createIRLevelProfileFlagVar(Module &M, bool IsCS, - bool InstrEntryBBEnabled, - bool DebugInfoCorrelate) { - const StringRef VarName(INSTR_PROF_QUOTE(INSTR_PROF_RAW_VERSION_VAR)); - Type *IntTy64 = Type::getInt64Ty(M.getContext()); - uint64_t ProfileVersion = (INSTR_PROF_RAW_VERSION | VARIANT_MASK_IR_PROF); - if (IsCS) - ProfileVersion |= VARIANT_MASK_CSIR_PROF; - if (InstrEntryBBEnabled) - ProfileVersion |= VARIANT_MASK_INSTR_ENTRY; - if (DebugInfoCorrelate) - ProfileVersion |= VARIANT_MASK_DBG_CORRELATE; - auto IRLevelVersionVariable = new GlobalVariable( - M, IntTy64, true, GlobalValue::WeakAnyLinkage, - Constant::getIntegerValue(IntTy64, APInt(64, ProfileVersion)), VarName); - IRLevelVersionVariable->setVisibility(GlobalValue::DefaultVisibility); - Triple TT(M.getTargetTriple()); - if (TT.supportsCOMDAT()) { - IRLevelVersionVariable->setLinkage(GlobalValue::ExternalLinkage); - IRLevelVersionVariable->setComdat(M.getOrInsertComdat(VarName)); - } - return IRLevelVersionVariable; -} - // Create the variable for the profile file name. void createProfileFileNameVar(Module &M, StringRef InstrProfileOutput) { if (InstrProfileOutput.empty()) diff --git a/llvm/lib/ProfileData/InstrProfReader.cpp b/llvm/lib/ProfileData/InstrProfReader.cpp index 861ff61df510..138b1532d778 100644 --- a/llvm/lib/ProfileData/InstrProfReader.cpp +++ b/llvm/lib/ProfileData/InstrProfReader.cpp @@ -38,6 +38,28 @@ using namespace llvm; +// Extracts the variant information from the top 8 bits in the version and +// returns an enum specifying the variants present. +static InstrProfKind getProfileKindFromVersion(uint64_t Version) { + InstrProfKind ProfileKind = InstrProfKind::Unknown; + if (Version & VARIANT_MASK_IR_PROF) { + ProfileKind |= InstrProfKind::IR; + } + if (Version & VARIANT_MASK_CSIR_PROF) { + ProfileKind |= InstrProfKind::CS; + } + if (Version & VARIANT_MASK_INSTR_ENTRY) { + ProfileKind |= InstrProfKind::BB; + } + if (Version & VARIANT_MASK_BYTE_COVERAGE) { + ProfileKind |= InstrProfKind::SingleByteCoverage; + } + if (Version & VARIANT_MASK_FUNCTION_ENTRY_ONLY) { + ProfileKind |= InstrProfKind::FunctionEntryOnly; + } + return ProfileKind; +} + static Expected<std::unique_ptr<MemoryBuffer>> setupMemoryBuffer(const Twine &Path) { ErrorOr<std::unique_ptr<MemoryBuffer>> BufferOrErr = @@ -154,30 +176,24 @@ bool TextInstrProfReader::hasFormat(const MemoryBuffer &Buffer) { // with a leading ':' will be reported an error format. Error TextInstrProfReader::readHeader() { Symtab.reset(new InstrProfSymtab()); - bool IsIRInstr = false; - bool IsEntryFirst = false; - bool IsCS = false; while (Line->startswith(":")) { StringRef Str = Line->substr(1); if (Str.equals_insensitive("ir")) - IsIRInstr = true; + ProfileKind |= InstrProfKind::IR; else if (Str.equals_insensitive("fe")) - IsIRInstr = false; + ProfileKind |= InstrProfKind::FE; else if (Str.equals_insensitive("csir")) { - IsIRInstr = true; - IsCS = true; + ProfileKind |= InstrProfKind::IR; + ProfileKind |= InstrProfKind::CS; } else if (Str.equals_insensitive("entry_first")) - IsEntryFirst = true; + ProfileKind |= InstrProfKind::BB; else if (Str.equals_insensitive("not_entry_first")) - IsEntryFirst = false; + ProfileKind &= ~InstrProfKind::BB; else return error(instrprof_error::bad_header); ++Line; } - IsIRLevelProfile = IsIRInstr; - InstrEntryBBEnabled = IsEntryFirst; - HasCSIRLevelProfile = IsCS; return success(); } @@ -304,6 +320,11 @@ Error TextInstrProfReader::readNextRecord(NamedInstrProfRecord &Record) { } template <class IntPtrT> +InstrProfKind RawInstrProfReader<IntPtrT>::getProfileKind() const { + return getProfileKindFromVersion(Version); +} + +template <class IntPtrT> bool RawInstrProfReader<IntPtrT>::hasFormat(const MemoryBuffer &DataBuffer) { if (DataBuffer.getBufferSize() < sizeof(uint64_t)) return false; @@ -485,9 +506,15 @@ Error RawInstrProfReader<IntPtrT>::readRawCounts( Record.Counts.clear(); Record.Counts.reserve(NumCounters); for (uint32_t I = 0; I < NumCounters; I++) { - const auto *CounterValue = reinterpret_cast<const uint64_t *>( - CountersStart + CounterBaseOffset + I * getCounterTypeSize()); - Record.Counts.push_back(swap(*CounterValue)); + const char *Ptr = + CountersStart + CounterBaseOffset + I * getCounterTypeSize(); + if (hasSingleByteCoverage()) { + // A value of zero signifies the block is covered. + Record.Counts.push_back(*Ptr == 0 ? 1 : 0); + } else { + const auto *CounterValue = reinterpret_cast<const uint64_t *>(Ptr); + Record.Counts.push_back(swap(*CounterValue)); + } } return success(); @@ -718,6 +745,11 @@ InstrProfReaderIndex<HashTableImpl>::InstrProfReaderIndex( RecordIterator = HashTable->data_begin(); } +template <typename HashTableImpl> +InstrProfKind InstrProfReaderIndex<HashTableImpl>::getProfileKind() const { + return getProfileKindFromVersion(FormatVersion); +} + namespace { /// A remapper that does not apply any remappings. class InstrProfReaderNullRemapper : public InstrProfReaderRemapper { diff --git a/llvm/lib/ProfileData/InstrProfWriter.cpp b/llvm/lib/ProfileData/InstrProfWriter.cpp index 6628eea80640..8ded1c0426e5 100644 --- a/llvm/lib/ProfileData/InstrProfWriter.cpp +++ b/llvm/lib/ProfileData/InstrProfWriter.cpp @@ -166,9 +166,8 @@ public: } // end namespace llvm -InstrProfWriter::InstrProfWriter(bool Sparse, bool InstrEntryBBEnabled) - : Sparse(Sparse), InstrEntryBBEnabled(InstrEntryBBEnabled), - InfoObj(new InstrProfRecordWriterTrait()) {} +InstrProfWriter::InstrProfWriter(bool Sparse) + : Sparse(Sparse), InfoObj(new InstrProfRecordWriterTrait()) {} InstrProfWriter::~InstrProfWriter() { delete InfoObj; } @@ -303,14 +302,16 @@ Error InstrProfWriter::writeImpl(ProfOStream &OS) { IndexedInstrProf::Header Header; Header.Magic = IndexedInstrProf::Magic; Header.Version = IndexedInstrProf::ProfVersion::CurrentVersion; - if (ProfileKind == PF_IRLevel) - Header.Version |= VARIANT_MASK_IR_PROF; - if (ProfileKind == PF_IRLevelWithCS) { + if (static_cast<bool>(ProfileKind & InstrProfKind::IR)) Header.Version |= VARIANT_MASK_IR_PROF; + if (static_cast<bool>(ProfileKind & InstrProfKind::CS)) Header.Version |= VARIANT_MASK_CSIR_PROF; - } - if (InstrEntryBBEnabled) + if (static_cast<bool>(ProfileKind & InstrProfKind::BB)) Header.Version |= VARIANT_MASK_INSTR_ENTRY; + if (static_cast<bool>(ProfileKind & InstrProfKind::SingleByteCoverage)) + Header.Version |= VARIANT_MASK_BYTE_COVERAGE; + if (static_cast<bool>(ProfileKind & InstrProfKind::FunctionEntryOnly)) + Header.Version |= VARIANT_MASK_FUNCTION_ENTRY_ONLY; Header.Unused = 0; Header.HashType = static_cast<uint64_t>(IndexedInstrProf::HashType); @@ -337,7 +338,7 @@ Error InstrProfWriter::writeImpl(ProfOStream &OS) { OS.write(0); uint64_t CSSummaryOffset = 0; uint64_t CSSummarySize = 0; - if (ProfileKind == PF_IRLevelWithCS) { + if (static_cast<bool>(ProfileKind & InstrProfKind::CS)) { CSSummaryOffset = OS.tell(); CSSummarySize = SummarySize / sizeof(uint64_t); for (unsigned I = 0; I < CSSummarySize; I++) @@ -358,7 +359,7 @@ Error InstrProfWriter::writeImpl(ProfOStream &OS) { // For Context Sensitive summary. std::unique_ptr<IndexedInstrProf::Summary> TheCSSummary = nullptr; - if (ProfileKind == PF_IRLevelWithCS) { + if (static_cast<bool>(ProfileKind & InstrProfKind::CS)) { TheCSSummary = IndexedInstrProf::allocSummary(SummarySize); std::unique_ptr<ProfileSummary> CSPS = CSISB.getSummary(); setSummary(TheCSSummary.get(), *CSPS); @@ -470,11 +471,13 @@ void InstrProfWriter::writeRecordInText(StringRef Name, uint64_t Hash, } Error InstrProfWriter::writeText(raw_fd_ostream &OS) { - if (ProfileKind == PF_IRLevel) - OS << "# IR level Instrumentation Flag\n:ir\n"; - else if (ProfileKind == PF_IRLevelWithCS) + // Check CS first since it implies an IR level profile. + if (static_cast<bool>(ProfileKind & InstrProfKind::CS)) OS << "# CSIR level Instrumentation Flag\n:csir\n"; - if (InstrEntryBBEnabled) + else if (static_cast<bool>(ProfileKind & InstrProfKind::IR)) + OS << "# IR level Instrumentation Flag\n:ir\n"; + + if (static_cast<bool>(ProfileKind & InstrProfKind::BB)) OS << "# Always instrument the function entry block\n:entry_first\n"; InstrProfSymtab Symtab; diff --git a/llvm/lib/Remarks/BitstreamRemarkParser.cpp b/llvm/lib/Remarks/BitstreamRemarkParser.cpp index 3d586a247962..d74fff4ca7c5 100644 --- a/llvm/lib/Remarks/BitstreamRemarkParser.cpp +++ b/llvm/lib/Remarks/BitstreamRemarkParser.cpp @@ -13,6 +13,7 @@ #include "llvm/Remarks/BitstreamRemarkParser.h" #include "BitstreamRemarkParser.h" +#include "llvm/Remarks/Remark.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/Path.h" diff --git a/llvm/lib/Remarks/BitstreamRemarkParser.h b/llvm/lib/Remarks/BitstreamRemarkParser.h index 0e40e5d66e00..988bc30da6e1 100644 --- a/llvm/lib/Remarks/BitstreamRemarkParser.h +++ b/llvm/lib/Remarks/BitstreamRemarkParser.h @@ -16,7 +16,6 @@ #include "llvm/ADT/Optional.h" #include "llvm/Remarks/BitstreamRemarkContainer.h" #include "llvm/Remarks/BitstreamRemarkParser.h" -#include "llvm/Remarks/Remark.h" #include "llvm/Remarks/RemarkFormat.h" #include "llvm/Remarks/RemarkParser.h" #include <cstdint> @@ -24,6 +23,9 @@ namespace llvm { namespace remarks { + +struct Remark; + /// Parses and holds the state of the latest parsed remark. struct BitstreamRemarkParser : public RemarkParser { /// The buffer to parse. diff --git a/llvm/lib/Remarks/RemarkLinker.cpp b/llvm/lib/Remarks/RemarkLinker.cpp index dd1bba3d1762..62f80918ea1d 100644 --- a/llvm/lib/Remarks/RemarkLinker.cpp +++ b/llvm/lib/Remarks/RemarkLinker.cpp @@ -12,10 +12,12 @@ #include "llvm/Remarks/RemarkLinker.h" #include "llvm/ADT/StringRef.h" -#include "llvm/Remarks/BitstreamRemarkContainer.h" +#include "llvm/Object/ObjectFile.h" +#include "llvm/Object/SymbolicFile.h" #include "llvm/Remarks/RemarkParser.h" #include "llvm/Remarks/RemarkSerializer.h" #include "llvm/Support/Error.h" +#include "llvm/Support/raw_ostream.h" using namespace llvm; using namespace llvm::remarks; diff --git a/llvm/lib/Remarks/RemarkParser.cpp b/llvm/lib/Remarks/RemarkParser.cpp index c5c3d0badd3e..f36767efcbf4 100644 --- a/llvm/lib/Remarks/RemarkParser.cpp +++ b/llvm/lib/Remarks/RemarkParser.cpp @@ -15,7 +15,6 @@ #include "BitstreamRemarkParser.h" #include "YAMLRemarkParser.h" #include "llvm-c/Remarks.h" -#include "llvm/ADT/STLExtras.h" #include "llvm/Support/CBindingWrapping.h" using namespace llvm; diff --git a/llvm/lib/Remarks/YAMLRemarkParser.h b/llvm/lib/Remarks/YAMLRemarkParser.h index df3b908f4779..88b3003010d3 100644 --- a/llvm/lib/Remarks/YAMLRemarkParser.h +++ b/llvm/lib/Remarks/YAMLRemarkParser.h @@ -14,14 +14,12 @@ #define LLVM_REMARKS_YAML_REMARK_PARSER_H #include "llvm/ADT/Optional.h" -#include "llvm/ADT/SmallVector.h" #include "llvm/Remarks/Remark.h" #include "llvm/Remarks/RemarkParser.h" #include "llvm/Support/Error.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/SourceMgr.h" #include "llvm/Support/YAMLParser.h" -#include "llvm/Support/YAMLTraits.h" #include "llvm/Support/raw_ostream.h" #include <string> diff --git a/llvm/lib/Remarks/YAMLRemarkSerializer.cpp b/llvm/lib/Remarks/YAMLRemarkSerializer.cpp index 827e04f0b10f..9e965aa4f6c4 100644 --- a/llvm/lib/Remarks/YAMLRemarkSerializer.cpp +++ b/llvm/lib/Remarks/YAMLRemarkSerializer.cpp @@ -12,7 +12,6 @@ //===----------------------------------------------------------------------===// #include "llvm/Remarks/YAMLRemarkSerializer.h" -#include "llvm/Support/CommandLine.h" #include "llvm/Support/FileSystem.h" using namespace llvm; diff --git a/llvm/lib/Support/ARMAttributeParser.cpp b/llvm/lib/Support/ARMAttributeParser.cpp index 908e56319025..9ba224cee0ca 100644 --- a/llvm/lib/Support/ARMAttributeParser.cpp +++ b/llvm/lib/Support/ARMAttributeParser.cpp @@ -7,7 +7,7 @@ //===----------------------------------------------------------------------===// #include "llvm/Support/ARMAttributeParser.h" -#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/STLArrayExtras.h" #include "llvm/ADT/StringExtras.h" #include "llvm/Support/ScopedPrinter.h" diff --git a/llvm/lib/Support/Host.cpp b/llvm/lib/Support/Host.cpp index 9a4470289bcf..f6003b783245 100644 --- a/llvm/lib/Support/Host.cpp +++ b/llvm/lib/Support/Host.cpp @@ -211,6 +211,7 @@ StringRef sys::detail::getHostCPUNameForARM(StringRef ProcCpuinfoContent) { .Case("0xd0d", "cortex-a77") .Case("0xd41", "cortex-a78") .Case("0xd44", "cortex-x1") + .Case("0xd4c", "cortex-x1c") .Case("0xd0c", "neoverse-n1") .Case("0xd49", "neoverse-n2") .Case("0xd40", "neoverse-v1") diff --git a/llvm/lib/Support/RISCVISAInfo.cpp b/llvm/lib/Support/RISCVISAInfo.cpp index 6c59d8a7ef04..2b3395b669b8 100644 --- a/llvm/lib/Support/RISCVISAInfo.cpp +++ b/llvm/lib/Support/RISCVISAInfo.cpp @@ -461,15 +461,7 @@ RISCVISAInfo::parseFeatures(unsigned XLen, ISAInfo->Exts.erase(ExtName.str()); } - ISAInfo->updateImplication(); - ISAInfo->updateFLen(); - ISAInfo->updateMinVLen(); - ISAInfo->updateMaxELen(); - - if (Error Result = ISAInfo->checkDependency()) - return std::move(Result); - - return std::move(ISAInfo); + return RISCVISAInfo::postProcessAndChecking(std::move(ISAInfo)); } llvm::Expected<std::unique_ptr<RISCVISAInfo>> @@ -686,26 +678,18 @@ RISCVISAInfo::parseArchString(StringRef Arch, bool EnableExperimentalExtension, } } - ISAInfo->updateImplication(); - ISAInfo->updateFLen(); - ISAInfo->updateMinVLen(); - ISAInfo->updateMaxELen(); - - if (Error Result = ISAInfo->checkDependency()) - return std::move(Result); - - return std::move(ISAInfo); + return RISCVISAInfo::postProcessAndChecking(std::move(ISAInfo)); } Error RISCVISAInfo::checkDependency() { bool IsRv32 = XLen == 32; - bool HasE = Exts.count("e") == 1; - bool HasD = Exts.count("d") == 1; - bool HasF = Exts.count("f") == 1; - bool HasZve32x = Exts.count("zve32x") == 1; - bool HasZve32f = Exts.count("zve32f") == 1; - bool HasZve64d = Exts.count("zve64d") == 1; - bool HasV = Exts.count("v") == 1; + bool HasE = Exts.count("e") != 0; + bool HasD = Exts.count("d") != 0; + bool HasF = Exts.count("f") != 0; + bool HasZve32x = Exts.count("zve32x") != 0; + bool HasZve32f = Exts.count("zve32f") != 0; + bool HasZve64d = Exts.count("zve64d") != 0; + bool HasV = Exts.count("v") != 0; bool HasVector = HasZve32x || HasV; bool HasZvl = MinVLen != 0; @@ -739,12 +723,6 @@ Error RISCVISAInfo::checkDependency() { errc::invalid_argument, "zvl*b requires v or zve* extension to also be specified"); - // Could not implement Zve* extension and the V extension at the same time. - if (HasZve32x && HasV) - return createStringError( - errc::invalid_argument, - "It is illegal to specify the v extension with zve* extensions"); - // Additional dependency checks. // TODO: The 'q' extension requires rv64. // TODO: It is illegal to specify 'e' extensions with 'f' and 'd'. @@ -753,7 +731,8 @@ Error RISCVISAInfo::checkDependency() { } static const char *ImpliedExtsV[] = {"zvl128b", "f", "d"}; -static const char *ImpliedExtsZfh[] = {"zfhmin"}; +static const char *ImpliedExtsZfhmin[] = {"f"}; +static const char *ImpliedExtsZfh[] = {"f"}; static const char *ImpliedExtsZve64d[] = {"zve64f"}; static const char *ImpliedExtsZve64f[] = {"zve64x", "zve32f"}; static const char *ImpliedExtsZve64x[] = {"zve32x", "zvl64b"}; @@ -785,9 +764,11 @@ struct ImpliedExtsEntry { bool operator<(StringRef Other) const { return Name < Other; } }; +// Note: The table needs to be sorted by name. static constexpr ImpliedExtsEntry ImpliedExts[] = { {{"v"}, {ImpliedExtsV}}, {{"zfh"}, {ImpliedExtsZfh}}, + {{"zfhmin"}, {ImpliedExtsZfhmin}}, {{"zk"}, {ImpliedExtsZk}}, {{"zkn"}, {ImpliedExtsZkn}}, {{"zks"}, {ImpliedExtsZks}}, @@ -810,8 +791,8 @@ static constexpr ImpliedExtsEntry ImpliedExts[] = { }; void RISCVISAInfo::updateImplication() { - bool HasE = Exts.count("e") == 1; - bool HasI = Exts.count("i") == 1; + bool HasE = Exts.count("e") != 0; + bool HasI = Exts.count("i") != 0; // If not in e extension and i extension does not exist, i extension is // implied @@ -919,3 +900,15 @@ std::vector<std::string> RISCVISAInfo::toFeatureVector() const { } return FeatureVector; } + +llvm::Expected<std::unique_ptr<RISCVISAInfo>> +RISCVISAInfo::postProcessAndChecking(std::unique_ptr<RISCVISAInfo> &&ISAInfo) { + ISAInfo->updateImplication(); + ISAInfo->updateFLen(); + ISAInfo->updateMinVLen(); + ISAInfo->updateMaxELen(); + + if (Error Result = ISAInfo->checkDependency()) + return std::move(Result); + return std::move(ISAInfo); +} diff --git a/llvm/lib/Support/Signals.cpp b/llvm/lib/Support/Signals.cpp index 5ce41c987029..1d61f2bf7525 100644 --- a/llvm/lib/Support/Signals.cpp +++ b/llvm/lib/Support/Signals.cpp @@ -15,7 +15,7 @@ #include "DebugOptions.h" -#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/STLArrayExtras.h" #include "llvm/ADT/StringRef.h" #include "llvm/Config/llvm-config.h" #include "llvm/Support/CommandLine.h" diff --git a/llvm/lib/Support/Triple.cpp b/llvm/lib/Support/Triple.cpp index 20dea8c302a5..a9afcc9db96a 100644 --- a/llvm/lib/Support/Triple.cpp +++ b/llvm/lib/Support/Triple.cpp @@ -7,14 +7,14 @@ //===----------------------------------------------------------------------===// #include "llvm/ADT/Triple.h" -#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/STLArrayExtras.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringSwitch.h" +#include "llvm/Support/ARMTargetParser.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/Host.h" #include "llvm/Support/SwapByteOrder.h" -#include "llvm/Support/ARMTargetParser.h" #include "llvm/Support/VersionTuple.h" #include <cassert> #include <cstring> diff --git a/llvm/lib/Support/Valgrind.cpp b/llvm/lib/Support/Valgrind.cpp index 3cf41faeb55d..5994656c5c03 100644 --- a/llvm/lib/Support/Valgrind.cpp +++ b/llvm/lib/Support/Valgrind.cpp @@ -12,9 +12,9 @@ // //===----------------------------------------------------------------------===// +#include <stddef.h> #include "llvm/Support/Valgrind.h" #include "llvm/Config/config.h" -#include <cstddef> #if HAVE_VALGRIND_VALGRIND_H #include <valgrind/valgrind.h> diff --git a/llvm/lib/Support/Windows/Host.inc b/llvm/lib/Support/Windows/Host.inc index 5583db909045..fa6b00f19b9a 100644 --- a/llvm/lib/Support/Windows/Host.inc +++ b/llvm/lib/Support/Windows/Host.inc @@ -10,6 +10,9 @@ // //===----------------------------------------------------------------------===// +// We need to include config.h here because LLVM_DEFAULT_TARGET_TRIPLE is not +// defined in llvm-config.h if it is unset. +#include "llvm/Config/config.h" #include "llvm/Support/Windows/WindowsSupport.h" #include <cstdio> #include <string> diff --git a/llvm/lib/Support/raw_ostream.cpp b/llvm/lib/Support/raw_ostream.cpp index e4b747b68bea..69d4fe96bee8 100644 --- a/llvm/lib/Support/raw_ostream.cpp +++ b/llvm/lib/Support/raw_ostream.cpp @@ -11,7 +11,7 @@ //===----------------------------------------------------------------------===// #include "llvm/Support/raw_ostream.h" -#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/STLArrayExtras.h" #include "llvm/ADT/StringExtras.h" #include "llvm/Config/config.h" #include "llvm/Support/Compiler.h" diff --git a/llvm/lib/TableGen/DetailedRecordsBackend.cpp b/llvm/lib/TableGen/DetailedRecordsBackend.cpp index e181f79b903d..500aa4c78225 100644 --- a/llvm/lib/TableGen/DetailedRecordsBackend.cpp +++ b/llvm/lib/TableGen/DetailedRecordsBackend.cpp @@ -13,15 +13,16 @@ //===----------------------------------------------------------------------===// #include "llvm/ADT/ArrayRef.h" -#include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/StringExtras.h" -#include "llvm/Support/Format.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/FormatVariadic.h" -#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/SMLoc.h" #include "llvm/Support/SourceMgr.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/TableGen/Error.h" #include "llvm/TableGen/Record.h" -#include "llvm/TableGen/TableGenBackend.h" +#include <map> +#include <memory> #include <string> #include <utility> diff --git a/llvm/lib/TableGen/JSONBackend.cpp b/llvm/lib/TableGen/JSONBackend.cpp index 8ddfd9f04524..e38903910275 100644 --- a/llvm/lib/TableGen/JSONBackend.cpp +++ b/llvm/lib/TableGen/JSONBackend.cpp @@ -11,12 +11,12 @@ // //===----------------------------------------------------------------------===// -#include "llvm/ADT/BitVector.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/Support/Casting.h" #include "llvm/Support/Debug.h" -#include "llvm/TableGen/Error.h" -#include "llvm/TableGen/Record.h" -#include "llvm/TableGen/TableGenBackend.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/JSON.h" +#include "llvm/TableGen/Record.h" #define DEBUG_TYPE "json-emitter" diff --git a/llvm/lib/TableGen/Main.cpp b/llvm/lib/TableGen/Main.cpp index 762255b43136..1d5f130737ee 100644 --- a/llvm/lib/TableGen/Main.cpp +++ b/llvm/lib/TableGen/Main.cpp @@ -16,7 +16,6 @@ #include "llvm/TableGen/Main.h" #include "TGParser.h" -#include "llvm/ADT/StringExtras.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/FileSystem.h" #include "llvm/Support/MemoryBuffer.h" @@ -24,7 +23,6 @@ #include "llvm/TableGen/Error.h" #include "llvm/TableGen/Record.h" #include <algorithm> -#include <cstdio> #include <system_error> using namespace llvm; diff --git a/llvm/lib/TableGen/Record.cpp b/llvm/lib/TableGen/Record.cpp index eb7d4838a9f6..58d8c9936896 100644 --- a/llvm/lib/TableGen/Record.cpp +++ b/llvm/lib/TableGen/Record.cpp @@ -10,16 +10,15 @@ // //===----------------------------------------------------------------------===// +#include "llvm/TableGen/Record.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/FoldingSet.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/Statistic.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringRef.h" -#include "llvm/ADT/StringSet.h" #include "llvm/Config/llvm-config.h" #include "llvm/Support/Allocator.h" #include "llvm/Support/Casting.h" @@ -29,11 +28,10 @@ #include "llvm/Support/SMLoc.h" #include "llvm/Support/raw_ostream.h" #include "llvm/TableGen/Error.h" -#include "llvm/TableGen/Record.h" #include <cassert> #include <cstdint> -#include <memory> #include <map> +#include <memory> #include <string> #include <utility> #include <vector> @@ -2289,8 +2287,8 @@ bool RecordVal::setValue(Init *V, SMLoc NewLoc) { return false; } -#include "llvm/TableGen/Record.h" #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +#include "llvm/TableGen/Record.h" LLVM_DUMP_METHOD void RecordVal::dump() const { errs() << *this; } #endif diff --git a/llvm/lib/TableGen/SetTheory.cpp b/llvm/lib/TableGen/SetTheory.cpp index f7ba75243c15..3db46aae6d96 100644 --- a/llvm/lib/TableGen/SetTheory.cpp +++ b/llvm/lib/TableGen/SetTheory.cpp @@ -11,9 +11,9 @@ // //===----------------------------------------------------------------------===// +#include "llvm/TableGen/SetTheory.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringRef.h" #include "llvm/Support/Casting.h" #include "llvm/Support/Format.h" @@ -21,7 +21,6 @@ #include "llvm/Support/raw_ostream.h" #include "llvm/TableGen/Error.h" #include "llvm/TableGen/Record.h" -#include "llvm/TableGen/SetTheory.h" #include <algorithm> #include <cstdint> #include <string> diff --git a/llvm/lib/TableGen/TGParser.cpp b/llvm/lib/TableGen/TGParser.cpp index 3709a375ed1b..90646a0c642d 100644 --- a/llvm/lib/TableGen/TGParser.cpp +++ b/llvm/lib/TableGen/TGParser.cpp @@ -11,8 +11,8 @@ //===----------------------------------------------------------------------===// #include "TGParser.h" +#include "llvm/ADT/DenseMapInfo.h" #include "llvm/ADT/None.h" -#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/Twine.h" @@ -21,7 +21,6 @@ #include "llvm/Support/Compiler.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Support/SourceMgr.h" #include <algorithm> #include <cassert> #include <cstdint> diff --git a/llvm/lib/TableGen/TableGenBackendSkeleton.cpp b/llvm/lib/TableGen/TableGenBackendSkeleton.cpp index 4ce88e003e65..0ba00c8d8ab1 100644 --- a/llvm/lib/TableGen/TableGenBackendSkeleton.cpp +++ b/llvm/lib/TableGen/TableGenBackendSkeleton.cpp @@ -10,22 +10,17 @@ // //===----------------------------------------------------------------------===// -#include "llvm/ADT/ArrayRef.h" -#include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/StringExtras.h" -#include "llvm/Support/Format.h" -#include "llvm/Support/MemoryBuffer.h" -#include "llvm/Support/SourceMgr.h" -#include "llvm/TableGen/Error.h" -#include "llvm/TableGen/Record.h" +#include "llvm/ADT/DenseMapInfo.h" +#include "llvm/ADT/StringRef.h" #include "llvm/TableGen/TableGenBackend.h" -#include <algorithm> -#include <set> -#include <string> -#include <vector> #define DEBUG_TYPE "skeleton-emitter" +namespace llvm { +class RecordKeeper; +class raw_ostream; +} // namespace llvm + using namespace llvm; namespace { diff --git a/llvm/lib/Target/AArch64/AArch64.td b/llvm/lib/Target/AArch64/AArch64.td index b87468d5c8de..9a04b28a8b8f 100644 --- a/llvm/lib/Target/AArch64/AArch64.td +++ b/llvm/lib/Target/AArch64/AArch64.td @@ -972,6 +972,10 @@ def ProcessorFeatures { list<SubtargetFeature> X1 = [HasV8_2aOps, FeatureCrypto, FeatureFPARMv8, FeatureNEON, FeatureRCPC, FeaturePerfMon, FeatureSPE, FeatureFullFP16, FeatureDotProd]; + list<SubtargetFeature> X1C = [HasV8_2aOps, FeatureCrypto, FeatureFPARMv8, + FeatureNEON, FeatureRCPC, FeaturePerfMon, + FeatureSPE, FeatureFullFP16, FeatureDotProd, + FeaturePAuth]; list<SubtargetFeature> X2 = [HasV9_0aOps, FeatureNEON, FeaturePerfMon, FeatureMatMulInt8, FeatureBF16, FeatureAM, FeatureMTE, FeatureETE, FeatureSVE2BitPerm, @@ -1086,6 +1090,8 @@ def : ProcessorModel<"cortex-r82", CortexA55Model, ProcessorFeatures.R82, [TuneR82]>; def : ProcessorModel<"cortex-x1", CortexA57Model, ProcessorFeatures.X1, [TuneX1]>; +def : ProcessorModel<"cortex-x1c", CortexA57Model, ProcessorFeatures.X1C, + [TuneX1]>; def : ProcessorModel<"cortex-x2", CortexA57Model, ProcessorFeatures.X2, [TuneX2]>; def : ProcessorModel<"neoverse-e1", CortexA53Model, diff --git a/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp b/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp index 85a9c04a3fef..b54a0eaba7d1 100644 --- a/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp +++ b/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp @@ -95,6 +95,8 @@ public: void LowerJumpTableDest(MCStreamer &OutStreamer, const MachineInstr &MI); + void LowerMOPS(MCStreamer &OutStreamer, const MachineInstr &MI); + void LowerSTACKMAP(MCStreamer &OutStreamer, StackMaps &SM, const MachineInstr &MI); void LowerPATCHPOINT(MCStreamer &OutStreamer, StackMaps &SM, @@ -936,6 +938,43 @@ void AArch64AsmPrinter::LowerJumpTableDest(llvm::MCStreamer &OutStreamer, .addImm(Size == 4 ? 0 : 2)); } +void AArch64AsmPrinter::LowerMOPS(llvm::MCStreamer &OutStreamer, + const llvm::MachineInstr &MI) { + unsigned Opcode = MI.getOpcode(); + assert(STI->hasMOPS()); + assert(STI->hasMTE() || Opcode != AArch64::MOPSMemorySetTaggingPseudo); + + const auto Ops = [Opcode]() -> std::array<unsigned, 3> { + if (Opcode == AArch64::MOPSMemoryCopyPseudo) + return {AArch64::CPYFP, AArch64::CPYFM, AArch64::CPYFE}; + if (Opcode == AArch64::MOPSMemoryMovePseudo) + return {AArch64::CPYP, AArch64::CPYM, AArch64::CPYE}; + if (Opcode == AArch64::MOPSMemorySetPseudo) + return {AArch64::SETP, AArch64::SETM, AArch64::SETE}; + if (Opcode == AArch64::MOPSMemorySetTaggingPseudo) + return {AArch64::SETGP, AArch64::SETGM, AArch64::MOPSSETGE}; + llvm_unreachable("Unhandled memory operation pseudo"); + }(); + const bool IsSet = Opcode == AArch64::MOPSMemorySetPseudo || + Opcode == AArch64::MOPSMemorySetTaggingPseudo; + + for (auto Op : Ops) { + int i = 0; + auto MCIB = MCInstBuilder(Op); + // Destination registers + MCIB.addReg(MI.getOperand(i++).getReg()); + MCIB.addReg(MI.getOperand(i++).getReg()); + if (!IsSet) + MCIB.addReg(MI.getOperand(i++).getReg()); + // Input registers + MCIB.addReg(MI.getOperand(i++).getReg()); + MCIB.addReg(MI.getOperand(i++).getReg()); + MCIB.addReg(MI.getOperand(i++).getReg()); + + EmitToStreamer(OutStreamer, MCIB); + } +} + void AArch64AsmPrinter::LowerSTACKMAP(MCStreamer &OutStreamer, StackMaps &SM, const MachineInstr &MI) { unsigned NumNOPBytes = StackMapOpers(&MI).getNumPatchBytes(); @@ -1363,6 +1402,13 @@ void AArch64AsmPrinter::emitInstruction(const MachineInstr *MI) { emitFMov0(*MI); return; + case AArch64::MOPSMemoryCopyPseudo: + case AArch64::MOPSMemoryMovePseudo: + case AArch64::MOPSMemorySetPseudo: + case AArch64::MOPSMemorySetTaggingPseudo: + LowerMOPS(*OutStreamer, *MI); + return; + case TargetOpcode::STACKMAP: return LowerSTACKMAP(*OutStreamer, SM, *MI); diff --git a/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp b/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp index 109b739528bf..b0f739cc26e6 100644 --- a/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp +++ b/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp @@ -709,20 +709,24 @@ bool AArch64ExpandPseudo::expandSVESpillFill(MachineBasicBlock &MBB, bool AArch64ExpandPseudo::expandCALL_RVMARKER( MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) { - // Expand CALL_RVMARKER pseudo to a branch, followed by the special `mov x29, - // x29` marker. Mark the sequence as bundle, to avoid passes moving other code - // in between. + // Expand CALL_RVMARKER pseudo to: + // - a branch to the call target, followed by + // - the special `mov x29, x29` marker, and + // - another branch, to the runtime function + // Mark the sequence as bundle, to avoid passes moving other code in between. MachineInstr &MI = *MBBI; MachineInstr *OriginalCall; - MachineOperand &CallTarget = MI.getOperand(0); + MachineOperand &RVTarget = MI.getOperand(0); + MachineOperand &CallTarget = MI.getOperand(1); assert((CallTarget.isGlobal() || CallTarget.isReg()) && "invalid operand for regular call"); + assert(RVTarget.isGlobal() && "invalid operand for attached call"); unsigned Opc = CallTarget.isGlobal() ? AArch64::BL : AArch64::BLR; OriginalCall = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc)).getInstr(); OriginalCall->addOperand(CallTarget); - unsigned RegMaskStartIdx = 1; + unsigned RegMaskStartIdx = 2; // Skip register arguments. Those are added during ISel, but are not // needed for the concrete branch. while (!MI.getOperand(RegMaskStartIdx).isRegMask()) { @@ -736,17 +740,22 @@ bool AArch64ExpandPseudo::expandCALL_RVMARKER( llvm::drop_begin(MI.operands(), RegMaskStartIdx)) OriginalCall->addOperand(MO); - auto *Marker = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ORRXrs)) + BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ORRXrs)) .addReg(AArch64::FP, RegState::Define) .addReg(AArch64::XZR) .addReg(AArch64::FP) - .addImm(0) + .addImm(0); + + auto *RVCall = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::BL)) + .add(RVTarget) .getInstr(); + if (MI.shouldUpdateCallSiteInfo()) - MBB.getParent()->moveCallSiteInfo(&MI, Marker); + MBB.getParent()->moveCallSiteInfo(&MI, OriginalCall); + MI.eraseFromParent(); finalizeBundle(MBB, OriginalCall->getIterator(), - std::next(Marker->getIterator())); + std::next(RVCall->getIterator())); return true; } diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index a26bbc77f248..c539c8617d99 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -29,6 +29,7 @@ #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Triple.h" #include "llvm/ADT/Twine.h" +#include "llvm/Analysis/MemoryLocation.h" #include "llvm/Analysis/ObjCARCUtil.h" #include "llvm/Analysis/VectorUtils.h" #include "llvm/CodeGen/Analysis.h" @@ -938,19 +939,20 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM, // In case of strict alignment, avoid an excessive number of byte wide stores. MaxStoresPerMemsetOptSize = 8; - MaxStoresPerMemset = Subtarget->requiresStrictAlign() - ? MaxStoresPerMemsetOptSize : 32; + MaxStoresPerMemset = + Subtarget->requiresStrictAlign() ? MaxStoresPerMemsetOptSize : 32; MaxGluedStoresPerMemcpy = 4; MaxStoresPerMemcpyOptSize = 4; - MaxStoresPerMemcpy = Subtarget->requiresStrictAlign() - ? MaxStoresPerMemcpyOptSize : 16; + MaxStoresPerMemcpy = + Subtarget->requiresStrictAlign() ? MaxStoresPerMemcpyOptSize : 16; - MaxStoresPerMemmoveOptSize = MaxStoresPerMemmove = 4; + MaxStoresPerMemmoveOptSize = 4; + MaxStoresPerMemmove = 4; MaxLoadsPerMemcmpOptSize = 4; - MaxLoadsPerMemcmp = Subtarget->requiresStrictAlign() - ? MaxLoadsPerMemcmpOptSize : 8; + MaxLoadsPerMemcmp = + Subtarget->requiresStrictAlign() ? MaxLoadsPerMemcmpOptSize : 8; setStackPointerRegisterToSaveRestore(AArch64::SP); @@ -1426,6 +1428,11 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM, setOperationPromotedToType(ISD::VECTOR_SPLICE, MVT::nxv16i1, MVT::nxv16i8); } + if (Subtarget->hasMOPS() && Subtarget->hasMTE()) { + // Only required for llvm.aarch64.mops.memset.tag + setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i8, Custom); + } + PredictableSelectIsExpensive = Subtarget->predictableSelectIsExpensive(); } @@ -2201,7 +2208,6 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const { MAKE_CASE(AArch64ISD::INSR) MAKE_CASE(AArch64ISD::PTEST) MAKE_CASE(AArch64ISD::PTRUE) - MAKE_CASE(AArch64ISD::PFALSE) MAKE_CASE(AArch64ISD::LD1_MERGE_ZERO) MAKE_CASE(AArch64ISD::LD1S_MERGE_ZERO) MAKE_CASE(AArch64ISD::LDNF1_MERGE_ZERO) @@ -2268,6 +2274,10 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const { MAKE_CASE(AArch64ISD::UADDLP) MAKE_CASE(AArch64ISD::CALL_RVMARKER) MAKE_CASE(AArch64ISD::ASSERT_ZEXT_BOOL) + MAKE_CASE(AArch64ISD::MOPS_MEMSET) + MAKE_CASE(AArch64ISD::MOPS_MEMSET_TAGGING) + MAKE_CASE(AArch64ISD::MOPS_MEMCOPY) + MAKE_CASE(AArch64ISD::MOPS_MEMMOVE) } #undef MAKE_CASE return nullptr; @@ -3746,6 +3756,10 @@ SDValue AArch64TargetLowering::LowerBITCAST(SDValue Op, if (OpVT != MVT::f16 && OpVT != MVT::bf16) return SDValue(); + // Bitcasts between f16 and bf16 are legal. + if (ArgVT == MVT::f16 || ArgVT == MVT::bf16) + return Op; + assert(ArgVT == MVT::i16); SDLoc DL(Op); @@ -4056,6 +4070,39 @@ static SDValue lowerConvertToSVBool(SDValue Op, SelectionDAG &DAG) { return DAG.getNode(ISD::AND, DL, OutVT, Reinterpret, MaskReinterpret); } +SDValue AArch64TargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op, + SelectionDAG &DAG) const { + unsigned IntNo = Op.getConstantOperandVal(1); + switch (IntNo) { + default: + return SDValue(); // Don't custom lower most intrinsics. + case Intrinsic::aarch64_mops_memset_tag: { + auto Node = cast<MemIntrinsicSDNode>(Op.getNode()); + SDLoc DL(Op); + SDValue Chain = Node->getChain(); + SDValue Dst = Op.getOperand(2); + SDValue Val = Op.getOperand(3); + Val = DAG.getAnyExtOrTrunc(Val, DL, MVT::i64); + SDValue Size = Op.getOperand(4); + auto Alignment = Node->getMemOperand()->getAlign(); + bool IsVol = Node->isVolatile(); + auto DstPtrInfo = Node->getPointerInfo(); + + const auto &SDI = + static_cast<const AArch64SelectionDAGInfo &>(DAG.getSelectionDAGInfo()); + SDValue MS = + SDI.EmitMOPS(AArch64ISD::MOPS_MEMSET_TAGGING, DAG, DL, Chain, Dst, Val, + Size, Alignment, IsVol, DstPtrInfo, MachinePointerInfo{}); + + // MOPS_MEMSET_TAGGING has 3 results (DstWb, SizeWb, Chain) whereas the + // intrinsic has 2. So hide SizeWb using MERGE_VALUES. Otherwise + // LowerOperationWrapper will complain that the number of results has + // changed. + return DAG.getMergeValues({MS.getValue(0), MS.getValue(2)}, DL); + } + } +} + SDValue AArch64TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const { unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); @@ -5123,6 +5170,8 @@ SDValue AArch64TargetLowering::LowerOperation(SDValue Op, case ISD::MULHU: return LowerToPredicatedOp(Op, DAG, AArch64ISD::MULHU_PRED, /*OverrideNEON=*/true); + case ISD::INTRINSIC_W_CHAIN: + return LowerINTRINSIC_W_CHAIN(Op, DAG); case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG); case ISD::ATOMIC_STORE: @@ -6475,12 +6524,18 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI, unsigned CallOpc = AArch64ISD::CALL; // Calls with operand bundle "clang.arc.attachedcall" are special. They should - // be expanded to the call, directly followed by a special marker sequence. - // Use the CALL_RVMARKER to do that. + // be expanded to the call, directly followed by a special marker sequence and + // a call to an ObjC library function. Use CALL_RVMARKER to do that. if (CLI.CB && objcarc::hasAttachedCallOpBundle(CLI.CB)) { assert(!IsTailCall && "tail calls cannot be marked with clang.arc.attachedcall"); CallOpc = AArch64ISD::CALL_RVMARKER; + + // Add a target global address for the retainRV/claimRV runtime function + // just before the call target. + Function *ARCFn = *objcarc::getAttachedARCFunction(CLI.CB); + auto GA = DAG.getTargetGlobalAddress(ARCFn, DL, PtrVT); + Ops.insert(Ops.begin() + 1, GA); } // Returns a chain and a flag for retval copy to use. @@ -9985,8 +10040,9 @@ SDValue AArch64TargetLowering::LowerSPLAT_VECTOR(SDValue Op, // The only legal i1 vectors are SVE vectors, so we can use SVE-specific // lowering code. if (auto *ConstVal = dyn_cast<ConstantSDNode>(SplatVal)) { + // We can hande the zero case during isel. if (ConstVal->isZero()) - return DAG.getNode(AArch64ISD::PFALSE, dl, VT); + return Op; if (ConstVal->isOne()) return getPTrue(DAG, dl, VT, AArch64SVEPredPattern::all); } @@ -11869,6 +11925,19 @@ bool AArch64TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, Info.flags = MachineMemOperand::MOStore | MachineMemOperand::MONonTemporal; return true; } + case Intrinsic::aarch64_mops_memset_tag: { + Value *Dst = I.getArgOperand(0); + Value *Val = I.getArgOperand(1); + Info.opc = ISD::INTRINSIC_W_CHAIN; + Info.memVT = MVT::getVT(Val->getType()); + Info.ptrVal = Dst; + Info.offset = 0; + Info.align = I.getParamAlign(0).valueOrOne(); + Info.flags = MachineMemOperand::MOStore; + // The size of the memory being operated on is unknown at this point + Info.size = MemoryLocation::UnknownSize; + return true; + } default: break; } @@ -15092,7 +15161,7 @@ static bool isAllInactivePredicate(SDValue N) { while (N.getOpcode() == AArch64ISD::REINTERPRET_CAST) N = N.getOperand(0); - return N.getOpcode() == AArch64ISD::PFALSE; + return ISD::isConstantSplatVectorAllZeros(N.getNode()); } static bool isAllActivePredicate(SelectionDAG &DAG, SDValue N) { @@ -15393,6 +15462,52 @@ static SDValue performIntrinsicCombine(SDNode *N, return SDValue(); } +static bool isCheapToExtend(const SDValue &N) { + unsigned OC = N->getOpcode(); + return OC == ISD::LOAD || OC == ISD::MLOAD || + ISD::isConstantSplatVectorAllZeros(N.getNode()); +} + +static SDValue +performSignExtendSetCCCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, + SelectionDAG &DAG) { + // If we have (sext (setcc A B)) and A and B are cheap to extend, + // we can move the sext into the arguments and have the same result. For + // example, if A and B are both loads, we can make those extending loads and + // avoid an extra instruction. This pattern appears often in VLS code + // generation where the inputs to the setcc have a different size to the + // instruction that wants to use the result of the setcc. + assert(N->getOpcode() == ISD::SIGN_EXTEND && + N->getOperand(0)->getOpcode() == ISD::SETCC); + const SDValue SetCC = N->getOperand(0); + + const SDValue CCOp0 = SetCC.getOperand(0); + const SDValue CCOp1 = SetCC.getOperand(1); + if (!CCOp0->getValueType(0).isInteger() || + !CCOp1->getValueType(0).isInteger()) + return SDValue(); + + ISD::CondCode Code = + cast<CondCodeSDNode>(SetCC->getOperand(2).getNode())->get(); + + ISD::NodeType ExtType = + isSignedIntSetCC(Code) ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND; + + if (isCheapToExtend(SetCC.getOperand(0)) && + isCheapToExtend(SetCC.getOperand(1))) { + const SDValue Ext1 = + DAG.getNode(ExtType, SDLoc(N), N->getValueType(0), CCOp0); + const SDValue Ext2 = + DAG.getNode(ExtType, SDLoc(N), N->getValueType(0), CCOp1); + + return DAG.getSetCC( + SDLoc(SetCC), N->getValueType(0), Ext1, Ext2, + cast<CondCodeSDNode>(SetCC->getOperand(2).getNode())->get()); + } + + return SDValue(); +} + static SDValue performExtendCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG) { @@ -15411,6 +15526,12 @@ static SDValue performExtendCombine(SDNode *N, return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), N->getValueType(0), NewABD); } + + if (N->getValueType(0).isFixedLengthVector() && + N->getOpcode() == ISD::SIGN_EXTEND && + N->getOperand(0)->getOpcode() == ISD::SETCC) + return performSignExtendSetCCCombine(N, DCI, DAG); + return SDValue(); } diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h index ca6c70297c0b..2138c0ffe70a 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h @@ -323,7 +323,6 @@ enum NodeType : unsigned { INSR, PTEST, PTRUE, - PFALSE, BITREVERSE_MERGE_PASSTHRU, BSWAP_MERGE_PASSTHRU, @@ -453,6 +452,12 @@ enum NodeType : unsigned { LDP, STP, STNP, + + // Memory Operations + MOPS_MEMSET, + MOPS_MEMSET_TAGGING, + MOPS_MEMCOPY, + MOPS_MEMMOVE, }; } // end namespace AArch64ISD @@ -890,6 +895,7 @@ private: SDValue LowerMLOAD(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) const; SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const; bool isEligibleForTailCallOptimization( diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp index 93c17133c845..a9191924129c 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -93,9 +93,18 @@ unsigned AArch64InstrInfo::getInstSizeInBytes(const MachineInstr &MI) const { // before the assembly printer. unsigned NumBytes = 0; const MCInstrDesc &Desc = MI.getDesc(); + + // Size should be preferably set in + // llvm/lib/Target/AArch64/AArch64InstrInfo.td (default case). + // Specific cases handle instructions of variable sizes switch (Desc.getOpcode()) { default: - // Anything not explicitly designated otherwise is a normal 4-byte insn. + if (Desc.getSize()) + return Desc.getSize(); + + // Anything not explicitly designated otherwise (i.e. pseudo-instructions + // with fixed constant size but not specified in .td file) is a normal + // 4-byte insn. NumBytes = 4; break; case TargetOpcode::STACKMAP: @@ -115,29 +124,9 @@ unsigned AArch64InstrInfo::getInstSizeInBytes(const MachineInstr &MI) const { if (NumBytes == 0) NumBytes = 4; break; - case AArch64::TLSDESC_CALLSEQ: - // This gets lowered to an instruction sequence which takes 16 bytes - NumBytes = 16; - break; - case AArch64::SpeculationBarrierISBDSBEndBB: - // This gets lowered to 2 4-byte instructions. - NumBytes = 8; - break; - case AArch64::SpeculationBarrierSBEndBB: - // This gets lowered to 1 4-byte instructions. - NumBytes = 4; - break; - case AArch64::JumpTableDest32: - case AArch64::JumpTableDest16: - case AArch64::JumpTableDest8: - NumBytes = 12; - break; case AArch64::SPACE: NumBytes = MI.getOperand(1).getImm(); break; - case AArch64::StoreSwiftAsyncContext: - NumBytes = 20; - break; case TargetOpcode::BUNDLE: NumBytes = getInstBundleLength(MI); break; diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td index c8a697c8b82f..83bf89ff97c5 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -780,6 +780,7 @@ def : Pat<(AArch64LOADgot texternalsym:$addr), def : Pat<(AArch64LOADgot tconstpool:$addr), (LOADgot tconstpool:$addr)>; +// In general these get lowered into a sequence of three 4-byte instructions. // 32-bit jump table destination is actually only 2 instructions since we can // use the table itself as a PC-relative base. But optimization occurs after // branch relaxation so be pessimistic. @@ -815,8 +816,12 @@ let hasSideEffects = 1, isCodeGenOnly = 1 in { // SpeculationBarrierEndBB must only be used after an unconditional control // flow, i.e. after a terminator for which isBarrier is True. let hasSideEffects = 1, isCodeGenOnly = 1, isTerminator = 1, isBarrier = 1 in { + // This gets lowered to a pair of 4-byte instructions. + let Size = 8 in def SpeculationBarrierISBDSBEndBB : Pseudo<(outs), (ins), []>, Sched<[]>; + // This gets lowered to a 4-byte instruction. + let Size = 4 in def SpeculationBarrierSBEndBB : Pseudo<(outs), (ins), []>, Sched<[]>; } @@ -2324,8 +2329,8 @@ def : Pat<(AArch64call GPR64noip:$Rn), (BLRNoIP GPR64noip:$Rn)>, Requires<[SLSBLRMitigation]>; -def : Pat<(AArch64call_rvmarker GPR64:$Rn), - (BLR_RVMARKER GPR64:$Rn)>, +def : Pat<(AArch64call_rvmarker (i64 tglobaladdr:$rvfunc), GPR64:$Rn), + (BLR_RVMARKER tglobaladdr:$rvfunc, GPR64:$Rn)>, Requires<[NoSLSBLRMitigation]>; let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in { @@ -2356,7 +2361,8 @@ def EMITBKEY : Pseudo<(outs), (ins), []>, Sched<[]> {} // FIXME: maybe the scratch register used shouldn't be fixed to X1? // FIXME: can "hasSideEffects be dropped? -let isCall = 1, Defs = [LR, X0, X1], hasSideEffects = 1, +// This gets lowered to an instruction sequence which takes 16 bytes +let isCall = 1, Defs = [LR, X0, X1], hasSideEffects = 1, Size = 16, isCodeGenOnly = 1 in def TLSDESC_CALLSEQ : Pseudo<(outs), (ins i64imm:$sym), @@ -7546,6 +7552,9 @@ def : Pat<(i64 (bitconvert (f64 FPR64:$Xn))), def : Pat<(i64 (bitconvert (v1f64 V64:$Vn))), (COPY_TO_REGCLASS V64:$Vn, GPR64)>; +def : Pat<(f16 (bitconvert (bf16 FPR16:$src))), (f16 FPR16:$src)>; +def : Pat<(bf16 (bitconvert (f16 FPR16:$src))), (bf16 FPR16:$src)>; + let Predicates = [IsLE] in { def : Pat<(v1i64 (bitconvert (v2i32 FPR64:$src))), (v1i64 FPR64:$src)>; def : Pat<(v1i64 (bitconvert (v4i16 FPR64:$src))), (v1i64 FPR64:$src)>; @@ -8330,26 +8339,67 @@ let Predicates = [HasLS64] in { } let Predicates = [HasMOPS] in { - defm CPYFP : MOPSMemoryCopyInsns<0b00, "cpyfp">; - defm CPYFM : MOPSMemoryCopyInsns<0b01, "cpyfm">; - defm CPYFE : MOPSMemoryCopyInsns<0b10, "cpyfe">; + let Defs = [NZCV] in { + defm CPYFP : MOPSMemoryCopyInsns<0b00, "cpyfp">; + + defm CPYP : MOPSMemoryMoveInsns<0b00, "cpyp">; + + defm SETP : MOPSMemorySetInsns<0b00, "setp">; + } + let Uses = [NZCV] in { + defm CPYFM : MOPSMemoryCopyInsns<0b01, "cpyfm">; + defm CPYFE : MOPSMemoryCopyInsns<0b10, "cpyfe">; - defm CPYP : MOPSMemoryMoveInsns<0b00, "cpyp">; - defm CPYM : MOPSMemoryMoveInsns<0b01, "cpym">; - defm CPYE : MOPSMemoryMoveInsns<0b10, "cpye">; + defm CPYM : MOPSMemoryMoveInsns<0b01, "cpym">; + defm CPYE : MOPSMemoryMoveInsns<0b10, "cpye">; - defm SETP : MOPSMemorySetInsns<0b00, "setp">; - defm SETM : MOPSMemorySetInsns<0b01, "setm">; - defm SETE : MOPSMemorySetInsns<0b10, "sete">; + defm SETM : MOPSMemorySetInsns<0b01, "setm">; + defm SETE : MOPSMemorySetInsns<0b10, "sete">; + } } let Predicates = [HasMOPS, HasMTE] in { - defm SETGP : MOPSMemorySetTaggingInsns<0b00, "setgp">; - defm SETGM : MOPSMemorySetTaggingInsns<0b01, "setgm">; - // Can't use SETGE because it's a reserved name in TargetSelectionDAG.td - defm MOPSSETGE : MOPSMemorySetTaggingInsns<0b10, "setge">; + let Defs = [NZCV] in { + defm SETGP : MOPSMemorySetTaggingInsns<0b00, "setgp">; + } + let Uses = [NZCV] in { + defm SETGM : MOPSMemorySetTaggingInsns<0b01, "setgm">; + // Can't use SETGE because it's a reserved name in TargetSelectionDAG.td + defm MOPSSETGE : MOPSMemorySetTaggingInsns<0b10, "setge">; + } +} + +// MOPS Node operands: 0: Dst, 1: Src or Value, 2: Size, 3: Chain +// MOPS Node results: 0: Dst writeback, 1: Size writeback, 2: Chain +def SDT_AArch64mops : SDTypeProfile<2, 3, [ SDTCisInt<0>, SDTCisInt<1>, SDTCisInt<2> ]>; +def AArch64mops_memset : SDNode<"AArch64ISD::MOPS_MEMSET", SDT_AArch64mops>; +def AArch64mops_memset_tagging : SDNode<"AArch64ISD::MOPS_MEMSET_TAGGING", SDT_AArch64mops>; +def AArch64mops_memcopy : SDNode<"AArch64ISD::MOPS_MEMCOPY", SDT_AArch64mops>; +def AArch64mops_memmove : SDNode<"AArch64ISD::MOPS_MEMMOVE", SDT_AArch64mops>; + +// MOPS operations always contain three 4-byte instructions +let Predicates = [HasMOPS], Defs = [NZCV], Size = 12, mayStore = 1 in { + let mayLoad = 1 in { + def MOPSMemoryCopyPseudo : Pseudo<(outs GPR64common:$Rd_wb, GPR64common:$Rs_wb, GPR64:$Rn_wb), + (ins GPR64common:$Rd, GPR64common:$Rs, GPR64:$Rn), + [], "$Rd = $Rd_wb,$Rs = $Rs_wb,$Rn = $Rn_wb">, Sched<[]>; + def MOPSMemoryMovePseudo : Pseudo<(outs GPR64common:$Rd_wb, GPR64common:$Rs_wb, GPR64:$Rn_wb), + (ins GPR64common:$Rd, GPR64common:$Rs, GPR64:$Rn), + [], "$Rd = $Rd_wb,$Rs = $Rs_wb,$Rn = $Rn_wb">, Sched<[]>; + } + let mayLoad = 0 in { + def MOPSMemorySetPseudo : Pseudo<(outs GPR64common:$Rd_wb, GPR64:$Rn_wb), + (ins GPR64common:$Rd, GPR64:$Rn, GPR64:$Rm), + [], "$Rd = $Rd_wb,$Rn = $Rn_wb">, Sched<[]>; + } +} +let Predicates = [HasMOPS, HasMTE], Defs = [NZCV], Size = 12, mayLoad = 0, mayStore = 1 in { + def MOPSMemorySetTaggingPseudo : Pseudo<(outs GPR64common:$Rd_wb, GPR64:$Rn_wb), + (ins GPR64common:$Rd, GPR64:$Rn, GPR64:$Rm), + [], "$Rd = $Rd_wb,$Rn = $Rn_wb">, Sched<[]>; } -let Defs = [X16, X17], mayStore = 1, isCodeGenOnly = 1 in +// This gets lowered into an instruction sequence of 20 bytes +let Defs = [X16, X17], mayStore = 1, isCodeGenOnly = 1, Size = 20 in def StoreSwiftAsyncContext : Pseudo<(outs), (ins GPR64:$ctx, GPR64sp:$base, simm9:$offset), []>, Sched<[]>; diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td index 73a680465f6f..1d162610de9c 100644 --- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -292,7 +292,13 @@ def SDT_AArch64Arith_Unpred : SDTypeProfile<1, 2, [ SDTCisSameAs<0,1>, SDTCisSameAs<1,2> ]>; -def AArch64bic : SDNode<"AArch64ISD::BIC", SDT_AArch64Arith_Unpred>; +def AArch64bic_node : SDNode<"AArch64ISD::BIC", SDT_AArch64Arith_Unpred>; + +def AArch64bic : PatFrags<(ops node:$op1, node:$op2), + [(and node:$op1, (xor node:$op2, (AArch64dup (i32 -1)))), + (and node:$op1, (xor node:$op2, (AArch64dup (i64 -1)))), + (and node:$op1, (xor node:$op2, (SVEAllActive))), + (AArch64bic_node node:$op1, node:$op2)]>; let Predicates = [HasSVE] in { defm RDFFR_PPz : sve_int_rdffr_pred<0b0, "rdffr", int_aarch64_sve_rdffr_z>; @@ -734,14 +740,14 @@ let Predicates = [HasSVEorStreamingSVE] in { defm PFIRST : sve_int_pfirst<0b00000, "pfirst", int_aarch64_sve_pfirst>; defm PNEXT : sve_int_pnext<0b00110, "pnext", int_aarch64_sve_pnext>; - defm AND_PPzPP : sve_int_pred_log_and<0b0000, "and", int_aarch64_sve_and_z>; - defm BIC_PPzPP : sve_int_pred_log<0b0001, "bic", int_aarch64_sve_bic_z>; + defm AND_PPzPP : sve_int_pred_log_v2<0b0000, "and", int_aarch64_sve_and_z, and>; + defm BIC_PPzPP : sve_int_pred_log_v2<0b0001, "bic", int_aarch64_sve_bic_z, AArch64bic>; defm EOR_PPzPP : sve_int_pred_log<0b0010, "eor", int_aarch64_sve_eor_z, xor>; - defm SEL_PPPP : sve_int_pred_log<0b0011, "sel", vselect>; + defm SEL_PPPP : sve_int_pred_log_v2<0b0011, "sel", vselect, or>; defm ANDS_PPzPP : sve_int_pred_log<0b0100, "ands", null_frag>; defm BICS_PPzPP : sve_int_pred_log<0b0101, "bics", null_frag>; defm EORS_PPzPP : sve_int_pred_log<0b0110, "eors", null_frag>; - defm ORR_PPzPP : sve_int_pred_log<0b1000, "orr", int_aarch64_sve_orr_z, or>; + defm ORR_PPzPP : sve_int_pred_log<0b1000, "orr", int_aarch64_sve_orr_z>; defm ORN_PPzPP : sve_int_pred_log<0b1001, "orn", int_aarch64_sve_orn_z>; defm NOR_PPzPP : sve_int_pred_log<0b1010, "nor", int_aarch64_sve_nor_z>; defm NAND_PPzPP : sve_int_pred_log<0b1011, "nand", int_aarch64_sve_nand_z>; diff --git a/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp b/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp index d2d84b2a3f6d..893269c1a7ef 100644 --- a/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp @@ -15,15 +15,95 @@ using namespace llvm; #define DEBUG_TYPE "aarch64-selectiondag-info" +SDValue AArch64SelectionDAGInfo::EmitMOPS(AArch64ISD::NodeType SDOpcode, + SelectionDAG &DAG, const SDLoc &DL, + SDValue Chain, SDValue Dst, + SDValue SrcOrValue, SDValue Size, + Align Alignment, bool isVolatile, + MachinePointerInfo DstPtrInfo, + MachinePointerInfo SrcPtrInfo) const { + + // Get the constant size of the copy/set. + uint64_t ConstSize = 0; + if (auto *C = dyn_cast<ConstantSDNode>(Size)) + ConstSize = C->getZExtValue(); + + const bool IsSet = SDOpcode == AArch64ISD::MOPS_MEMSET || + SDOpcode == AArch64ISD::MOPS_MEMSET_TAGGING; + + const auto MachineOpcode = [&]() { + switch (SDOpcode) { + case AArch64ISD::MOPS_MEMSET: + return AArch64::MOPSMemorySetPseudo; + case AArch64ISD::MOPS_MEMSET_TAGGING: + return AArch64::MOPSMemorySetTaggingPseudo; + case AArch64ISD::MOPS_MEMCOPY: + return AArch64::MOPSMemoryCopyPseudo; + case AArch64ISD::MOPS_MEMMOVE: + return AArch64::MOPSMemoryMovePseudo; + default: + llvm_unreachable("Unhandled MOPS ISD Opcode"); + } + }(); + + MachineMemOperand::Flags Flags = MachineMemOperand::MOStore; + if (isVolatile) + Flags |= MachineMemOperand::MOVolatile; + if (!IsSet) + Flags |= MachineMemOperand::MOLoad; + + MachineFunction &MF = DAG.getMachineFunction(); + + auto *DstOp = + MF.getMachineMemOperand(DstPtrInfo, Flags, ConstSize, Alignment); + auto *SrcOp = + MF.getMachineMemOperand(SrcPtrInfo, Flags, ConstSize, Alignment); + + if (IsSet) { + // Extend value to i64 if required + if (SrcOrValue.getValueType() != MVT::i64) + SrcOrValue = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, SrcOrValue); + SDValue Ops[] = {Dst, Size, SrcOrValue, Chain}; + const EVT ResultTys[] = {MVT::i64, MVT::i64, MVT::Other}; + MachineSDNode *Node = DAG.getMachineNode(MachineOpcode, DL, ResultTys, Ops); + DAG.setNodeMemRefs(Node, {DstOp}); + return SDValue(Node, 2); + } else { + SDValue Ops[] = {Dst, SrcOrValue, Size, Chain}; + const EVT ResultTys[] = {MVT::i64, MVT::i64, MVT::i64, MVT::Other}; + MachineSDNode *Node = DAG.getMachineNode(MachineOpcode, DL, ResultTys, Ops); + DAG.setNodeMemRefs(Node, {DstOp, SrcOp}); + return SDValue(Node, 3); + } +} + +SDValue AArch64SelectionDAGInfo::EmitTargetCodeForMemcpy( + SelectionDAG &DAG, const SDLoc &DL, SDValue Chain, SDValue Dst, SDValue Src, + SDValue Size, Align Alignment, bool isVolatile, bool AlwaysInline, + MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) const { + const AArch64Subtarget &STI = + DAG.getMachineFunction().getSubtarget<AArch64Subtarget>(); + if (STI.hasMOPS()) + return EmitMOPS(AArch64ISD::MOPS_MEMCOPY, DAG, DL, Chain, Dst, Src, Size, + Alignment, isVolatile, DstPtrInfo, SrcPtrInfo); + return SDValue(); +} + SDValue AArch64SelectionDAGInfo::EmitTargetCodeForMemset( SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src, SDValue Size, Align Alignment, bool isVolatile, MachinePointerInfo DstPtrInfo) const { + const AArch64Subtarget &STI = + DAG.getMachineFunction().getSubtarget<AArch64Subtarget>(); + + if (STI.hasMOPS()) { + return EmitMOPS(AArch64ISD::MOPS_MEMSET, DAG, dl, Chain, Dst, Src, Size, + Alignment, isVolatile, DstPtrInfo, MachinePointerInfo{}); + } + // Check to see if there is a specialized entry-point for memory zeroing. ConstantSDNode *V = dyn_cast<ConstantSDNode>(Src); ConstantSDNode *SizeValue = dyn_cast<ConstantSDNode>(Size); - const AArch64Subtarget &STI = - DAG.getMachineFunction().getSubtarget<AArch64Subtarget>(); const char *bzeroName = (V && V->isZero()) ? DAG.getTargetLoweringInfo().getLibcallName(RTLIB::BZERO) @@ -55,6 +135,19 @@ SDValue AArch64SelectionDAGInfo::EmitTargetCodeForMemset( return SDValue(); } +SDValue AArch64SelectionDAGInfo::EmitTargetCodeForMemmove( + SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src, + SDValue Size, Align Alignment, bool isVolatile, + MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) const { + const AArch64Subtarget &STI = + DAG.getMachineFunction().getSubtarget<AArch64Subtarget>(); + if (STI.hasMOPS()) { + return EmitMOPS(AArch64ISD::MOPS_MEMMOVE, DAG, dl, Chain, Dst, Src, Size, + Alignment, isVolatile, DstPtrInfo, SrcPtrInfo); + } + return SDValue(); +} + static const int kSetTagLoopThreshold = 176; static SDValue EmitUnrolledSetTag(SelectionDAG &DAG, const SDLoc &dl, diff --git a/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.h b/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.h index 7d53bd456975..47fe3bf7dcf5 100644 --- a/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.h +++ b/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.h @@ -19,11 +19,30 @@ namespace llvm { class AArch64SelectionDAGInfo : public SelectionDAGTargetInfo { public: + SDValue EmitMOPS(AArch64ISD::NodeType SDOpcode, SelectionDAG &DAG, + const SDLoc &DL, SDValue Chain, SDValue Dst, + SDValue SrcOrValue, SDValue Size, Align Alignment, + bool isVolatile, MachinePointerInfo DstPtrInfo, + MachinePointerInfo SrcPtrInfo) const; + + SDValue EmitTargetCodeForMemcpy(SelectionDAG &DAG, const SDLoc &dl, + SDValue Chain, SDValue Dst, SDValue Src, + SDValue Size, Align Alignment, + bool isVolatile, bool AlwaysInline, + MachinePointerInfo DstPtrInfo, + MachinePointerInfo SrcPtrInfo) const override; SDValue EmitTargetCodeForMemset(SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src, SDValue Size, Align Alignment, bool isVolatile, MachinePointerInfo DstPtrInfo) const override; + SDValue + EmitTargetCodeForMemmove(SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, + SDValue Dst, SDValue Src, SDValue Size, + Align Alignment, bool isVolatile, + MachinePointerInfo DstPtrInfo, + MachinePointerInfo SrcPtrInfo) const override; + SDValue EmitTargetCodeForSetTag(SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Op1, SDValue Op2, MachinePointerInfo DstPtrInfo, diff --git a/llvm/lib/Target/AArch64/AArch64Subtarget.cpp b/llvm/lib/Target/AArch64/AArch64Subtarget.cpp index a4f4b8582182..8a7e20237271 100644 --- a/llvm/lib/Target/AArch64/AArch64Subtarget.cpp +++ b/llvm/lib/Target/AArch64/AArch64Subtarget.cpp @@ -99,6 +99,7 @@ void AArch64Subtarget::initializeProperties() { case CortexA78C: case CortexR82: case CortexX1: + case CortexX1C: PrefFunctionLogAlignment = 4; break; case CortexA510: diff --git a/llvm/lib/Target/AArch64/AArch64Subtarget.h b/llvm/lib/Target/AArch64/AArch64Subtarget.h index 3e3c0f6aba15..7b2bbad30f85 100644 --- a/llvm/lib/Target/AArch64/AArch64Subtarget.h +++ b/llvm/lib/Target/AArch64/AArch64Subtarget.h @@ -63,6 +63,7 @@ public: CortexA710, CortexR82, CortexX1, + CortexX1C, CortexX2, ExynosM3, Falkor, @@ -217,7 +218,6 @@ protected: bool HasETE = false; bool HasTRBE = false; bool HasBRBE = false; - bool HasPAUTH = false; bool HasSPE_EEF = false; // HasZeroCycleRegMove - Has zero-cycle register mov instructions. @@ -510,7 +510,6 @@ public: bool hasRandGen() const { return HasRandGen; } bool hasMTE() const { return HasMTE; } bool hasTME() const { return HasTME; } - bool hasPAUTH() const { return HasPAUTH; } // Arm SVE2 extensions bool hasSVE2AES() const { return HasSVE2AES; } bool hasSVE2SM4() const { return HasSVE2SM4; } diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp index a4d666a0a3c2..b2ffdf949d8b 100644 --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -1886,14 +1886,21 @@ InstructionCost AArch64TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, m_Value()))) VecPred = CurrentPred; } - // Check if we have a compare/select chain that can be lowered using CMxx & - // BFI pair. - if (CmpInst::isIntPredicate(VecPred)) { - static const auto ValidMinMaxTys = {MVT::v8i8, MVT::v16i8, MVT::v4i16, - MVT::v8i16, MVT::v2i32, MVT::v4i32, - MVT::v2i64}; + // Check if we have a compare/select chain that can be lowered using + // a (F)CMxx & BFI pair. + if (CmpInst::isIntPredicate(VecPred) || VecPred == CmpInst::FCMP_OLE || + VecPred == CmpInst::FCMP_OLT || VecPred == CmpInst::FCMP_OGT || + VecPred == CmpInst::FCMP_OGE || VecPred == CmpInst::FCMP_OEQ || + VecPred == CmpInst::FCMP_UNE) { + static const auto ValidMinMaxTys = { + MVT::v8i8, MVT::v16i8, MVT::v4i16, MVT::v8i16, MVT::v2i32, + MVT::v4i32, MVT::v2i64, MVT::v2f32, MVT::v4f32, MVT::v2f64}; + static const auto ValidFP16MinMaxTys = {MVT::v4f16, MVT::v8f16}; + auto LT = TLI->getTypeLegalizationCost(DL, ValTy); - if (any_of(ValidMinMaxTys, [<](MVT M) { return M == LT.second; })) + if (any_of(ValidMinMaxTys, [<](MVT M) { return M == LT.second; }) || + (ST->hasFullFP16() && + any_of(ValidFP16MinMaxTys, [<](MVT M) { return M == LT.second; }))) return LT.first; } diff --git a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp index 1f546ad50d57..703e356f016d 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp @@ -192,6 +192,7 @@ private: bool selectBrJT(MachineInstr &I, MachineRegisterInfo &MRI); bool selectTLSGlobalValue(MachineInstr &I, MachineRegisterInfo &MRI); bool selectReduction(MachineInstr &I, MachineRegisterInfo &MRI); + bool selectMOPS(MachineInstr &I, MachineRegisterInfo &MRI); bool selectUSMovFromExtend(MachineInstr &I, MachineRegisterInfo &MRI); unsigned emitConstantPoolEntry(const Constant *CPVal, @@ -3424,6 +3425,12 @@ bool AArch64InstructionSelector::select(MachineInstr &I) { case TargetOpcode::G_VECREDUCE_FADD: case TargetOpcode::G_VECREDUCE_ADD: return selectReduction(I, MRI); + case TargetOpcode::G_MEMCPY: + case TargetOpcode::G_MEMCPY_INLINE: + case TargetOpcode::G_MEMMOVE: + case TargetOpcode::G_MEMSET: + assert(STI.hasMOPS() && "Shouldn't get here without +mops feature"); + return selectMOPS(I, MRI); } return false; @@ -3481,6 +3488,64 @@ bool AArch64InstructionSelector::selectReduction(MachineInstr &I, return false; } +bool AArch64InstructionSelector::selectMOPS(MachineInstr &GI, + MachineRegisterInfo &MRI) { + unsigned Mopcode; + switch (GI.getOpcode()) { + case TargetOpcode::G_MEMCPY: + case TargetOpcode::G_MEMCPY_INLINE: + Mopcode = AArch64::MOPSMemoryCopyPseudo; + break; + case TargetOpcode::G_MEMMOVE: + Mopcode = AArch64::MOPSMemoryMovePseudo; + break; + case TargetOpcode::G_MEMSET: + // For tagged memset see llvm.aarch64.mops.memset.tag + Mopcode = AArch64::MOPSMemorySetPseudo; + break; + } + + auto &DstPtr = GI.getOperand(0); + auto &SrcOrVal = GI.getOperand(1); + auto &Size = GI.getOperand(2); + + // Create copies of the registers that can be clobbered. + const Register DstPtrCopy = MRI.cloneVirtualRegister(DstPtr.getReg()); + const Register SrcValCopy = MRI.cloneVirtualRegister(SrcOrVal.getReg()); + const Register SizeCopy = MRI.cloneVirtualRegister(Size.getReg()); + + const bool IsSet = Mopcode == AArch64::MOPSMemorySetPseudo; + const auto &SrcValRegClass = + IsSet ? AArch64::GPR64RegClass : AArch64::GPR64commonRegClass; + + // Constrain to specific registers + RBI.constrainGenericRegister(DstPtrCopy, AArch64::GPR64commonRegClass, MRI); + RBI.constrainGenericRegister(SrcValCopy, SrcValRegClass, MRI); + RBI.constrainGenericRegister(SizeCopy, AArch64::GPR64RegClass, MRI); + + MIB.buildCopy(DstPtrCopy, DstPtr); + MIB.buildCopy(SrcValCopy, SrcOrVal); + MIB.buildCopy(SizeCopy, Size); + + // New instruction uses the copied registers because it must update them. + // The defs are not used since they don't exist in G_MEM*. They are still + // tied. + // Note: order of operands is different from G_MEMSET, G_MEMCPY, G_MEMMOVE + Register DefDstPtr = MRI.createVirtualRegister(&AArch64::GPR64commonRegClass); + Register DefSize = MRI.createVirtualRegister(&AArch64::GPR64RegClass); + if (IsSet) { + MIB.buildInstr(Mopcode, {DefDstPtr, DefSize}, + {DstPtrCopy, SizeCopy, SrcValCopy}); + } else { + Register DefSrcPtr = MRI.createVirtualRegister(&SrcValRegClass); + MIB.buildInstr(Mopcode, {DefDstPtr, DefSrcPtr, DefSize}, + {DstPtrCopy, SrcValCopy, SizeCopy}); + } + + GI.eraseFromParent(); + return true; +} + bool AArch64InstructionSelector::selectBrJT(MachineInstr &I, MachineRegisterInfo &MRI) { assert(I.getOpcode() == TargetOpcode::G_BRJT && "Expected G_BRJT"); @@ -5375,6 +5440,36 @@ bool AArch64InstructionSelector::selectIntrinsicWithSideEffects( constrainSelectedInstRegOperands(*Store, TII, TRI, RBI); break; } + case Intrinsic::aarch64_mops_memset_tag: { + // Transform + // %dst:gpr(p0) = \ + // G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aarch64.mops.memset.tag), + // \ %dst:gpr(p0), %val:gpr(s64), %n:gpr(s64) + // where %dst is updated, into + // %Rd:GPR64common, %Rn:GPR64) = \ + // MOPSMemorySetTaggingPseudo \ + // %Rd:GPR64common, %Rn:GPR64, %Rm:GPR64 + // where Rd and Rn are tied. + // It is expected that %val has been extended to s64 in legalization. + // Note that the order of the size/value operands are swapped. + + Register DstDef = I.getOperand(0).getReg(); + // I.getOperand(1) is the intrinsic function + Register DstUse = I.getOperand(2).getReg(); + Register ValUse = I.getOperand(3).getReg(); + Register SizeUse = I.getOperand(4).getReg(); + + // MOPSMemorySetTaggingPseudo has two defs; the intrinsic call has only one. + // Therefore an additional virtual register is requried for the updated size + // operand. This value is not accessible via the semantics of the intrinsic. + Register SizeDef = MRI.createGenericVirtualRegister(LLT::scalar(64)); + + auto Memset = MIB.buildInstr(AArch64::MOPSMemorySetTaggingPseudo, + {DstDef, SizeDef}, {DstUse, SizeUse, ValUse}); + Memset.cloneMemRefs(I); + constrainSelectedInstRegOperands(*Memset, TII, TRI, RBI); + break; + } } I.eraseFromParent(); diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp index e8894e7933d6..e9df7e001d38 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp @@ -699,8 +699,28 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) getActionDefinitionsBuilder(G_DYN_STACKALLOC).lower(); - getActionDefinitionsBuilder({G_BZERO, G_MEMCPY, G_MEMMOVE, G_MEMSET}) - .libcall(); + if (ST.hasMOPS()) { + // G_BZERO is not supported. Currently it is only emitted by + // PreLegalizerCombiner for G_MEMSET with zero constant. + getActionDefinitionsBuilder(G_BZERO).unsupported(); + + getActionDefinitionsBuilder(G_MEMSET) + .legalForCartesianProduct({p0}, {s64}, {s64}) + .customForCartesianProduct({p0}, {s8}, {s64}) + .immIdx(0); // Inform verifier imm idx 0 is handled. + + getActionDefinitionsBuilder({G_MEMCPY, G_MEMMOVE}) + .legalForCartesianProduct({p0}, {p0}, {s64}) + .immIdx(0); // Inform verifier imm idx 0 is handled. + + // G_MEMCPY_INLINE does not have a tailcall immediate + getActionDefinitionsBuilder(G_MEMCPY_INLINE) + .legalForCartesianProduct({p0}, {p0}, {s64}); + + } else { + getActionDefinitionsBuilder({G_BZERO, G_MEMCPY, G_MEMMOVE, G_MEMSET}) + .libcall(); + } // FIXME: Legal types are only legal with NEON. getActionDefinitionsBuilder(G_ABS) @@ -832,6 +852,11 @@ bool AArch64LegalizerInfo::legalizeCustom(LegalizerHelper &Helper, return legalizeAtomicCmpxchg128(MI, MRI, Helper); case TargetOpcode::G_CTTZ: return legalizeCTTZ(MI, Helper); + case TargetOpcode::G_BZERO: + case TargetOpcode::G_MEMCPY: + case TargetOpcode::G_MEMMOVE: + case TargetOpcode::G_MEMSET: + return legalizeMemOps(MI, Helper); } llvm_unreachable("expected switch to return"); @@ -989,6 +1014,15 @@ bool AArch64LegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper, MI.eraseFromParent(); return true; } + case Intrinsic::aarch64_mops_memset_tag: { + assert(MI.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS); + // Zext the value to 64 bit + MachineIRBuilder MIB(MI); + auto &Value = MI.getOperand(3); + Register ZExtValueReg = MIB.buildAnyExt(LLT::scalar(64), Value).getReg(0); + Value.setReg(ZExtValueReg); + return true; + } } return true; @@ -1359,3 +1393,20 @@ bool AArch64LegalizerInfo::legalizeCTTZ(MachineInstr &MI, MI.eraseFromParent(); return true; } + +bool AArch64LegalizerInfo::legalizeMemOps(MachineInstr &MI, + LegalizerHelper &Helper) const { + MachineIRBuilder &MIRBuilder = Helper.MIRBuilder; + + // Tagged version MOPSMemorySetTagged is legalised in legalizeIntrinsic + if (MI.getOpcode() == TargetOpcode::G_MEMSET) { + // Zext the value operand to 64 bit + auto &Value = MI.getOperand(1); + Register ZExtValueReg = + MIRBuilder.buildAnyExt(LLT::scalar(64), Value).getReg(0); + Value.setReg(ZExtValueReg); + return true; + } + + return false; +} diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.h b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.h index e2c46f4b4c1f..973f96ff4775 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.h +++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.h @@ -56,6 +56,7 @@ private: bool legalizeAtomicCmpxchg128(MachineInstr &MI, MachineRegisterInfo &MRI, LegalizerHelper &Helper) const; bool legalizeCTTZ(MachineInstr &MI, LegalizerHelper &Helper) const; + bool legalizeMemOps(MachineInstr &MI, LegalizerHelper &Helper) const; const AArch64Subtarget *ST; }; } // End llvm namespace. diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td index 574b22124957..9d4bdbe5d053 100644 --- a/llvm/lib/Target/AArch64/SVEInstrFormats.td +++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td @@ -334,8 +334,6 @@ multiclass sve_int_ptrue<bits<3> opc, string asm, SDPatternOperator op> { def SDT_AArch64PTrue : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVT<1, i32>]>; def AArch64ptrue : SDNode<"AArch64ISD::PTRUE", SDT_AArch64PTrue>; -def SDT_AArch64PFalse : SDTypeProfile<1, 0, [SDTCisVec<0>, SDTCVecEltisVT<0, i1>]>; -def AArch64pfalse : SDNode<"AArch64ISD::PFALSE", SDT_AArch64PFalse>; let Predicates = [HasSVEorStreamingSVE] in { defm PTRUE : sve_int_ptrue<0b000, "ptrue", AArch64ptrue>; @@ -614,10 +612,10 @@ class sve_int_pfalse<bits<6> opc, string asm> multiclass sve_int_pfalse<bits<6> opc, string asm> { def NAME : sve_int_pfalse<opc, asm>; - def : Pat<(nxv16i1 (AArch64pfalse)), (!cast<Instruction>(NAME))>; - def : Pat<(nxv8i1 (AArch64pfalse)), (!cast<Instruction>(NAME))>; - def : Pat<(nxv4i1 (AArch64pfalse)), (!cast<Instruction>(NAME))>; - def : Pat<(nxv2i1 (AArch64pfalse)), (!cast<Instruction>(NAME))>; + def : Pat<(nxv16i1 (splat_vector (i32 0))), (!cast<Instruction>(NAME))>; + def : Pat<(nxv8i1 (splat_vector (i32 0))), (!cast<Instruction>(NAME))>; + def : Pat<(nxv4i1 (splat_vector (i32 0))), (!cast<Instruction>(NAME))>; + def : Pat<(nxv2i1 (splat_vector (i32 0))), (!cast<Instruction>(NAME))>; } class sve_int_ptest<bits<6> opc, string asm> @@ -773,7 +771,7 @@ multiclass sve_int_count_r_x64<bits<5> opc, string asm, def : Pat<(i64 (op GPR64:$Rn, (nxv2i1 PPRAny:$Pg))), (!cast<Instruction>(NAME # _D) PPRAny:$Pg, $Rn)>; - // Combine cntp with combine_op + // combine_op(x, cntp(all_active, p)) ==> inst p, x def : Pat<(i64 (combine_op GPR64:$Rn, (int_aarch64_sve_cntp_oneuse (nxv16i1 (SVEAllActive)), (nxv16i1 PPRAny:$pred)))), (!cast<Instruction>(NAME # _B) PPRAny:$pred, $Rn)>; def : Pat<(i64 (combine_op GPR64:$Rn, (int_aarch64_sve_cntp_oneuse (nxv8i1 (SVEAllActive)), (nxv8i1 PPRAny:$pred)))), @@ -782,6 +780,16 @@ multiclass sve_int_count_r_x64<bits<5> opc, string asm, (!cast<Instruction>(NAME # _S) PPRAny:$pred, $Rn)>; def : Pat<(i64 (combine_op GPR64:$Rn, (int_aarch64_sve_cntp_oneuse (nxv2i1 (SVEAllActive)), (nxv2i1 PPRAny:$pred)))), (!cast<Instruction>(NAME # _D) PPRAny:$pred, $Rn)>; + + // combine_op(x, cntp(p, p)) ==> inst p, x + def : Pat<(i64 (combine_op GPR64:$Rn, (int_aarch64_sve_cntp_oneuse (nxv16i1 PPRAny:$pred), (nxv16i1 PPRAny:$pred)))), + (!cast<Instruction>(NAME # _B) PPRAny:$pred, $Rn)>; + def : Pat<(i64 (combine_op GPR64:$Rn, (int_aarch64_sve_cntp_oneuse (nxv8i1 PPRAny:$pred), (nxv8i1 PPRAny:$pred)))), + (!cast<Instruction>(NAME # _H) PPRAny:$pred, $Rn)>; + def : Pat<(i64 (combine_op GPR64:$Rn, (int_aarch64_sve_cntp_oneuse (nxv4i1 PPRAny:$pred), (nxv4i1 PPRAny:$pred)))), + (!cast<Instruction>(NAME # _S) PPRAny:$pred, $Rn)>; + def : Pat<(i64 (combine_op GPR64:$Rn, (int_aarch64_sve_cntp_oneuse (nxv2i1 PPRAny:$pred), (nxv2i1 PPRAny:$pred)))), + (!cast<Instruction>(NAME # _D) PPRAny:$pred, $Rn)>; } class sve_int_count_v<bits<2> sz8_64, bits<5> opc, string asm, @@ -1633,15 +1641,18 @@ multiclass sve_int_pred_log<bits<4> opc, string asm, SDPatternOperator op, !cast<Instruction>(NAME), PTRUE_D>; } -multiclass sve_int_pred_log_and<bits<4> opc, string asm, SDPatternOperator op> : +// An instance of sve_int_pred_log_and but uses op_nopred's first operand as the +// general predicate. +multiclass sve_int_pred_log_v2<bits<4> opc, string asm, SDPatternOperator op, + SDPatternOperator op_nopred> : sve_int_pred_log<opc, asm, op> { - def : Pat<(nxv16i1 (and nxv16i1:$Op1, nxv16i1:$Op2)), + def : Pat<(nxv16i1 (op_nopred nxv16i1:$Op1, nxv16i1:$Op2)), (!cast<Instruction>(NAME) $Op1, $Op1, $Op2)>; - def : Pat<(nxv8i1 (and nxv8i1:$Op1, nxv8i1:$Op2)), + def : Pat<(nxv8i1 (op_nopred nxv8i1:$Op1, nxv8i1:$Op2)), (!cast<Instruction>(NAME) $Op1, $Op1, $Op2)>; - def : Pat<(nxv4i1 (and nxv4i1:$Op1, nxv4i1:$Op2)), + def : Pat<(nxv4i1 (op_nopred nxv4i1:$Op1, nxv4i1:$Op2)), (!cast<Instruction>(NAME) $Op1, $Op1, $Op2)>; - def : Pat<(nxv2i1 (and nxv2i1:$Op1, nxv2i1:$Op2)), + def : Pat<(nxv2i1 (op_nopred nxv2i1:$Op1, nxv2i1:$Op2)), (!cast<Instruction>(NAME) $Op1, $Op1, $Op2)>; } diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.h b/llvm/lib/Target/AMDGPU/AMDGPU.h index 958e8c9e5bc5..11cc1a01d248 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPU.h +++ b/llvm/lib/Target/AMDGPU/AMDGPU.h @@ -11,6 +11,7 @@ #define LLVM_LIB_TARGET_AMDGPU_AMDGPU_H #include "llvm/IR/PassManager.h" +#include "llvm/Pass.h" #include "llvm/Support/CodeGen.h" namespace llvm { diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAnnotateUniformValues.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAnnotateUniformValues.cpp index 7d6845b287bc..bebf032b5535 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUAnnotateUniformValues.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUAnnotateUniformValues.cpp @@ -14,9 +14,12 @@ #include "AMDGPU.h" #include "Utils/AMDGPUBaseInfo.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/LegacyDivergenceAnalysis.h" #include "llvm/Analysis/MemorySSA.h" #include "llvm/IR/InstVisitor.h" +#include "llvm/IR/IntrinsicsAMDGPU.h" #include "llvm/InitializePasses.h" #define DEBUG_TYPE "amdgpu-annotate-uniform" @@ -29,6 +32,7 @@ class AMDGPUAnnotateUniformValues : public FunctionPass, public InstVisitor<AMDGPUAnnotateUniformValues> { LegacyDivergenceAnalysis *DA; MemorySSA *MSSA; + AliasAnalysis *AA; DenseMap<Value*, GetElementPtrInst*> noClobberClones; bool isEntryFunc; @@ -44,6 +48,7 @@ public: void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired<LegacyDivergenceAnalysis>(); AU.addRequired<MemorySSAWrapperPass>(); + AU.addRequired<AAResultsWrapperPass>(); AU.setPreservesAll(); } @@ -58,6 +63,7 @@ INITIALIZE_PASS_BEGIN(AMDGPUAnnotateUniformValues, DEBUG_TYPE, "Add AMDGPU uniform metadata", false, false) INITIALIZE_PASS_DEPENDENCY(LegacyDivergenceAnalysis) INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass) +INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) INITIALIZE_PASS_END(AMDGPUAnnotateUniformValues, DEBUG_TYPE, "Add AMDGPU uniform metadata", false, false) @@ -70,9 +76,79 @@ static void setNoClobberMetadata(Instruction *I) { I->setMetadata("amdgpu.noclobber", MDNode::get(I->getContext(), {})); } -bool AMDGPUAnnotateUniformValues::isClobberedInFunction(LoadInst * Load) { - const MemoryAccess *MA = MSSA->getWalker()->getClobberingMemoryAccess(Load); - return !MSSA->isLiveOnEntryDef(MA); +bool AMDGPUAnnotateUniformValues::isClobberedInFunction(LoadInst *Load) { + MemorySSAWalker *Walker = MSSA->getWalker(); + SmallVector<MemoryAccess *> WorkList{Walker->getClobberingMemoryAccess(Load)}; + SmallSet<MemoryAccess *, 8> Visited; + MemoryLocation Loc(MemoryLocation::get(Load)); + + const auto isReallyAClobber = [this, Load](MemoryDef *Def) -> bool { + Instruction *DefInst = Def->getMemoryInst(); + LLVM_DEBUG(dbgs() << " Def: " << *DefInst << '\n'); + + if (isa<FenceInst>(DefInst)) + return false; + + if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(DefInst)) { + switch (II->getIntrinsicID()) { + case Intrinsic::amdgcn_s_barrier: + case Intrinsic::amdgcn_wave_barrier: + return false; + default: + break; + } + } + + // Ignore atomics not aliasing with the original load, any atomic is a + // universal MemoryDef from MSSA's point of view too, just like a fence. + const auto checkNoAlias = [this, Load](auto I) -> bool { + return I && AA->isNoAlias(I->getPointerOperand(), + Load->getPointerOperand()); + }; + + if (checkNoAlias(dyn_cast<AtomicCmpXchgInst>(DefInst)) || + checkNoAlias(dyn_cast<AtomicRMWInst>(DefInst))) + return false; + + return true; + }; + + LLVM_DEBUG(dbgs() << "Checking clobbering of: " << *Load << '\n'); + + // Start with a nearest dominating clobbering access, it will be either + // live on entry (nothing to do, load is not clobbered), MemoryDef, or + // MemoryPhi if several MemoryDefs can define this memory state. In that + // case add all Defs to WorkList and continue going up and checking all + // the definitions of this memory location until the root. When all the + // defs are exhausted and came to the entry state we have no clobber. + // Along the scan ignore barriers and fences which are considered clobbers + // by the MemorySSA, but not really writing anything into the memory. + while (!WorkList.empty()) { + MemoryAccess *MA = WorkList.pop_back_val(); + if (!Visited.insert(MA).second) + continue; + + if (MSSA->isLiveOnEntryDef(MA)) + continue; + + if (MemoryDef *Def = dyn_cast<MemoryDef>(MA)) { + if (isReallyAClobber(Def)) { + LLVM_DEBUG(dbgs() << " -> load is clobbered\n"); + return true; + } + + WorkList.push_back( + Walker->getClobberingMemoryAccess(Def->getDefiningAccess(), Loc)); + continue; + } + + const MemoryPhi *Phi = cast<MemoryPhi>(MA); + for (auto &Use : Phi->incoming_values()) + WorkList.push_back(cast<MemoryAccess>(&Use)); + } + + LLVM_DEBUG(dbgs() << " -> no clobber\n"); + return false; } void AMDGPUAnnotateUniformValues::visitBranchInst(BranchInst &I) { @@ -84,9 +160,6 @@ void AMDGPUAnnotateUniformValues::visitLoadInst(LoadInst &I) { Value *Ptr = I.getPointerOperand(); if (!DA->isUniform(Ptr)) return; - auto isGlobalLoad = [&](LoadInst &Load)->bool { - return Load.getPointerAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS; - }; // We're tracking up to the Function boundaries, and cannot go beyond because // of FunctionPass restrictions. We can ensure that is memory not clobbered // for memory operations that are live in to entry points only. @@ -99,7 +172,7 @@ void AMDGPUAnnotateUniformValues::visitLoadInst(LoadInst &I) { } bool NotClobbered = false; - bool GlobalLoad = isGlobalLoad(I); + bool GlobalLoad = I.getPointerAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS; if (PtrI) NotClobbered = GlobalLoad && !isClobberedInFunction(&I); else if (isa<Argument>(Ptr) || isa<GlobalValue>(Ptr)) { @@ -139,6 +212,7 @@ bool AMDGPUAnnotateUniformValues::runOnFunction(Function &F) { DA = &getAnalysis<LegacyDivergenceAnalysis>(); MSSA = &getAnalysis<MemorySSAWrapperPass>().getMSSA(); + AA = &getAnalysis<AAResultsWrapperPass>().getAAResults(); isEntryFunc = AMDGPU::isEntryFunctionCC(F.getCallingConv()); visit(F); diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp index bb2e723f4ab0..6e2984f2a04f 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp @@ -88,6 +88,8 @@ AMDGPUAsmPrinter::AMDGPUAsmPrinter(TargetMachine &TM, HSAMetadataStream.reset(new HSAMD::MetadataStreamerV2()); } else if (isHsaAbiVersion3(getGlobalSTI())) { HSAMetadataStream.reset(new HSAMD::MetadataStreamerV3()); + } else if (isHsaAbiVersion5(getGlobalSTI())) { + HSAMetadataStream.reset(new HSAMD::MetadataStreamerV5()); } else { HSAMetadataStream.reset(new HSAMD::MetadataStreamerV4()); } @@ -118,7 +120,7 @@ void AMDGPUAsmPrinter::emitStartOfAsmFile(Module &M) { TM.getTargetTriple().getOS() != Triple::AMDPAL) return; - if (isHsaAbiVersion3Or4(getGlobalSTI())) + if (isHsaAbiVersion3AndAbove(getGlobalSTI())) getTargetStreamer()->EmitDirectiveAMDGCNTarget(); if (TM.getTargetTriple().getOS() == Triple::AMDHSA) @@ -127,7 +129,7 @@ void AMDGPUAsmPrinter::emitStartOfAsmFile(Module &M) { if (TM.getTargetTriple().getOS() == Triple::AMDPAL) getTargetStreamer()->getPALMetadata()->readFromIR(M); - if (isHsaAbiVersion3Or4(getGlobalSTI())) + if (isHsaAbiVersion3AndAbove(getGlobalSTI())) return; // HSA emits NT_AMD_HSA_CODE_OBJECT_VERSION for code objects v2. @@ -259,7 +261,7 @@ void AMDGPUAsmPrinter::emitFunctionBodyEnd() { void AMDGPUAsmPrinter::emitFunctionEntryLabel() { if (TM.getTargetTriple().getOS() == Triple::AMDHSA && - isHsaAbiVersion3Or4(getGlobalSTI())) { + isHsaAbiVersion3AndAbove(getGlobalSTI())) { AsmPrinter::emitFunctionEntryLabel(); return; } diff --git a/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp b/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp index 3ac7c45b3275..f5018e3a19ac 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp @@ -672,15 +672,15 @@ void MetadataStreamerV3::emitKernelAttrs(const Function &Func, Kern[".kind"] = Kern.getDocument()->getNode("fini"); } -void MetadataStreamerV3::emitKernelArgs(const Function &Func, - const GCNSubtarget &ST, +void MetadataStreamerV3::emitKernelArgs(const MachineFunction &MF, msgpack::MapDocNode Kern) { + auto &Func = MF.getFunction(); unsigned Offset = 0; auto Args = HSAMetadataDoc->getArrayNode(); for (auto &Arg : Func.args()) emitKernelArg(Arg, Offset, Args); - emitHiddenKernelArgs(Func, ST, Offset, Args); + emitHiddenKernelArgs(MF, Offset, Args); Kern[".args"] = Args; } @@ -789,10 +789,12 @@ void MetadataStreamerV3::emitKernelArg( Args.push_back(Arg); } -void MetadataStreamerV3::emitHiddenKernelArgs(const Function &Func, - const GCNSubtarget &ST, +void MetadataStreamerV3::emitHiddenKernelArgs(const MachineFunction &MF, unsigned &Offset, msgpack::ArrayDocNode Args) { + auto &Func = MF.getFunction(); + const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); + unsigned HiddenArgNumBytes = ST.getImplicitArgNumBytes(Func); if (!HiddenArgNumBytes) return; @@ -910,7 +912,6 @@ void MetadataStreamerV3::emitKernel(const MachineFunction &MF, const SIProgramInfo &ProgramInfo) { auto &Func = MF.getFunction(); auto Kern = getHSAKernelProps(MF, ProgramInfo); - const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); assert(Func.getCallingConv() == CallingConv::AMDGPU_KERNEL || Func.getCallingConv() == CallingConv::SPIR_KERNEL); @@ -924,7 +925,7 @@ void MetadataStreamerV3::emitKernel(const MachineFunction &MF, (Twine(Func.getName()) + Twine(".kd")).str(), /*Copy=*/true); emitKernelLanguage(Func, Kern); emitKernelAttrs(Func, Kern); - emitKernelArgs(Func, ST, Kern); + emitKernelArgs(MF, Kern); } Kernels.push_back(Kern); @@ -954,6 +955,97 @@ void MetadataStreamerV4::begin(const Module &Mod, getRootMetadata("amdhsa.kernels") = HSAMetadataDoc->getArrayNode(); } +//===----------------------------------------------------------------------===// +// HSAMetadataStreamerV5 +//===----------------------------------------------------------------------===// + +void MetadataStreamerV5::emitVersion() { + auto Version = HSAMetadataDoc->getArrayNode(); + Version.push_back(Version.getDocument()->getNode(VersionMajorV5)); + Version.push_back(Version.getDocument()->getNode(VersionMinorV5)); + getRootMetadata("amdhsa.version") = Version; +} + +void MetadataStreamerV5::emitHiddenKernelArgs(const MachineFunction &MF, + unsigned &Offset, + msgpack::ArrayDocNode Args) { + auto &Func = MF.getFunction(); + const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); + const Module *M = Func.getParent(); + auto &DL = M->getDataLayout(); + + auto Int64Ty = Type::getInt64Ty(Func.getContext()); + auto Int32Ty = Type::getInt32Ty(Func.getContext()); + auto Int16Ty = Type::getInt16Ty(Func.getContext()); + + emitKernelArg(DL, Int32Ty, Align(4), "hidden_block_count_x", Offset, Args); + emitKernelArg(DL, Int32Ty, Align(4), "hidden_block_count_y", Offset, Args); + emitKernelArg(DL, Int32Ty, Align(4), "hidden_block_count_z", Offset, Args); + + emitKernelArg(DL, Int16Ty, Align(2), "hidden_group_size_x", Offset, Args); + emitKernelArg(DL, Int16Ty, Align(2), "hidden_group_size_y", Offset, Args); + emitKernelArg(DL, Int16Ty, Align(2), "hidden_group_size_z", Offset, Args); + + emitKernelArg(DL, Int16Ty, Align(2), "hidden_remainder_x", Offset, Args); + emitKernelArg(DL, Int16Ty, Align(2), "hidden_remainder_y", Offset, Args); + emitKernelArg(DL, Int16Ty, Align(2), "hidden_remainder_z", Offset, Args); + + // Reserved for hidden_tool_correlation_id. + Offset += 8; + + Offset += 8; // Reserved. + + emitKernelArg(DL, Int64Ty, Align(8), "hidden_global_offset_x", Offset, Args); + emitKernelArg(DL, Int64Ty, Align(8), "hidden_global_offset_y", Offset, Args); + emitKernelArg(DL, Int64Ty, Align(8), "hidden_global_offset_z", Offset, Args); + + emitKernelArg(DL, Int16Ty, Align(2), "hidden_grid_dims", Offset, Args); + + Offset += 6; // Reserved. + auto Int8PtrTy = + Type::getInt8PtrTy(Func.getContext(), AMDGPUAS::GLOBAL_ADDRESS); + + if (M->getNamedMetadata("llvm.printf.fmts")) { + emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_printf_buffer", Offset, + Args); + } else + Offset += 8; // Skipped. + + if (M->getModuleFlag("amdgpu_hostcall")) { + emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_hostcall_buffer", Offset, + Args); + } else + Offset += 8; // Skipped. + + emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_multigrid_sync_arg", Offset, + Args); + + // Ignore temporarily until it is implemented. + // emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_heap_v1", Offset, Args); + Offset += 8; + + if (Func.hasFnAttribute("calls-enqueue-kernel")) { + emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_default_queue", Offset, + Args); + emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_completion_action", Offset, + Args); + } else + Offset += 16; // Skipped. + + Offset += 72; // Reserved. + + // hidden_private_base and hidden_shared_base are only used by GFX8. + if (ST.getGeneration() == AMDGPUSubtarget::VOLCANIC_ISLANDS) { + emitKernelArg(DL, Int32Ty, Align(4), "hidden_private_base", Offset, Args); + emitKernelArg(DL, Int32Ty, Align(4), "hidden_shared_base", Offset, Args); + } else + Offset += 8; // Skipped. + + const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>(); + if (MFI.hasQueuePtr()) + emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_queue_ptr", Offset, Args); +} + } // end namespace HSAMD } // end namespace AMDGPU } // end namespace llvm diff --git a/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.h b/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.h index 54ed0afbba6d..bcf7fc449094 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.h @@ -53,6 +53,11 @@ public: virtual void emitKernel(const MachineFunction &MF, const SIProgramInfo &ProgramInfo) = 0; + +protected: + virtual void emitVersion() = 0; + virtual void emitHiddenKernelArgs(const MachineFunction &MF, unsigned &Offset, + msgpack::ArrayDocNode Args) = 0; }; // TODO: Rename MetadataStreamerV3 -> MetadataStreamerMsgPackV3. @@ -79,7 +84,7 @@ protected: msgpack::MapDocNode getHSAKernelProps(const MachineFunction &MF, const SIProgramInfo &ProgramInfo) const; - void emitVersion(); + void emitVersion() override; void emitPrintf(const Module &Mod); @@ -87,8 +92,7 @@ protected: void emitKernelAttrs(const Function &Func, msgpack::MapDocNode Kern); - void emitKernelArgs(const Function &Func, const GCNSubtarget &ST, - msgpack::MapDocNode Kern); + void emitKernelArgs(const MachineFunction &MF, msgpack::MapDocNode Kern); void emitKernelArg(const Argument &Arg, unsigned &Offset, msgpack::ArrayDocNode Args); @@ -100,8 +104,8 @@ protected: StringRef BaseTypeName = "", StringRef AccQual = "", StringRef TypeQual = ""); - void emitHiddenKernelArgs(const Function &Func, const GCNSubtarget &ST, - unsigned &Offset, msgpack::ArrayDocNode Args); + void emitHiddenKernelArgs(const MachineFunction &MF, unsigned &Offset, + msgpack::ArrayDocNode Args) override; msgpack::DocNode &getRootMetadata(StringRef Key) { return HSAMetadataDoc->getRoot().getMap(/*Convert=*/true)[Key]; @@ -127,9 +131,9 @@ public: }; // TODO: Rename MetadataStreamerV4 -> MetadataStreamerMsgPackV4. -class MetadataStreamerV4 final : public MetadataStreamerV3 { - void emitVersion(); - +class MetadataStreamerV4 : public MetadataStreamerV3 { +protected: + void emitVersion() override; void emitTargetID(const IsaInfo::AMDGPUTargetID &TargetID); public: @@ -140,6 +144,18 @@ public: const IsaInfo::AMDGPUTargetID &TargetID) override; }; +// TODO: Rename MetadataStreamerV5 -> MetadataStreamerMsgPackV5. +class MetadataStreamerV5 final : public MetadataStreamerV4 { +protected: + void emitVersion() override; + void emitHiddenKernelArgs(const MachineFunction &MF, unsigned &Offset, + msgpack::ArrayDocNode Args) override; + +public: + MetadataStreamerV5() = default; + ~MetadataStreamerV5() = default; +}; + // TODO: Rename MetadataStreamerV2 -> MetadataStreamerYamlV2. class MetadataStreamerV2 final : public MetadataStreamer { private: @@ -167,8 +183,6 @@ private: const MachineFunction &MF, const SIProgramInfo &ProgramInfo) const; - void emitVersion(); - void emitPrintf(const Module &Mod); void emitKernelLanguage(const Function &Func); @@ -191,6 +205,13 @@ private: return HSAMetadata; } +protected: + void emitVersion() override; + void emitHiddenKernelArgs(const MachineFunction &MF, unsigned &Offset, + msgpack::ArrayDocNode Args) override { + llvm_unreachable("Dummy override should not be invoked!"); + } + public: MetadataStreamerV2() = default; ~MetadataStreamerV2() = default; diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp index 04c6f67ed339..645d05aa9238 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -4778,6 +4778,7 @@ bool AMDGPULegalizerInfo::legalizeTrapIntrinsic(MachineInstr &MI, case ELF::ELFABIVERSION_AMDGPU_HSA_V3: return legalizeTrapHsaQueuePtr(MI, MRI, B); case ELF::ELFABIVERSION_AMDGPU_HSA_V4: + case ELF::ELFABIVERSION_AMDGPU_HSA_V5: return ST.supportsGetDoorbellID() ? legalizeTrapHsa(MI, MRI, B) : legalizeTrapHsaQueuePtr(MI, MRI, B); diff --git a/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp b/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp index c28427758ac7..bbbadfdfd444 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp @@ -16,8 +16,9 @@ #include "GCNSubtarget.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/Loads.h" -#include "llvm/IR/IntrinsicsAMDGPU.h" #include "llvm/IR/IRBuilder.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/IntrinsicsAMDGPU.h" #include "llvm/InitializePasses.h" #include "llvm/Target/TargetMachine.h" diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp b/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp index 2d8126a49327..99b7ffb33884 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp @@ -13,15 +13,16 @@ #include "AMDGPU.h" #include "GCNSubtarget.h" +#include "Utils/AMDGPUBaseInfo.h" #include "llvm/Analysis/CaptureTracking.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/IR/IRBuilder.h" +#include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/IntrinsicsAMDGPU.h" #include "llvm/IR/IntrinsicsR600.h" #include "llvm/Pass.h" #include "llvm/Target/TargetMachine.h" -#include "Utils/AMDGPUBaseInfo.h" #define DEBUG_TYPE "amdgpu-promote-alloca" diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp index c1c88d9a7462..ffe626513d47 100644 --- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp +++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp @@ -1129,7 +1129,8 @@ class KernelScopeInfo { if (i >= SgprIndexUnusedMin) { SgprIndexUnusedMin = ++i; if (Ctx) { - MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count")); + MCSymbol* const Sym = + Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count")); Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx)); } } @@ -1139,7 +1140,8 @@ class KernelScopeInfo { if (i >= VgprIndexUnusedMin) { VgprIndexUnusedMin = ++i; if (Ctx) { - MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count")); + MCSymbol* const Sym = + Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count")); Sym->setVariableValue(MCConstantExpr::create(VgprIndexUnusedMin, *Ctx)); } } @@ -1296,7 +1298,7 @@ public: // AsmParser::parseDirectiveSet() cannot be specialized for specific target. AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); MCContext &Ctx = getContext(); - if (ISA.Major >= 6 && isHsaAbiVersion3Or4(&getSTI())) { + if (ISA.Major >= 6 && isHsaAbiVersion3AndAbove(&getSTI())) { MCSymbol *Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number")); Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx)); @@ -1313,7 +1315,7 @@ public: Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping")); Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx)); } - if (ISA.Major >= 6 && isHsaAbiVersion3Or4(&getSTI())) { + if (ISA.Major >= 6 && isHsaAbiVersion3AndAbove(&getSTI())) { initializeGprCountSymbol(IS_VGPR); initializeGprCountSymbol(IS_SGPR); } else @@ -2747,7 +2749,7 @@ AMDGPUAsmParser::parseRegister(bool RestoreOnFailure) { if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) { return nullptr; } - if (isHsaAbiVersion3Or4(&getSTI())) { + if (isHsaAbiVersion3AndAbove(&getSTI())) { if (!updateGprCountSymbols(RegKind, RegNum, RegWidth)) return nullptr; } else @@ -5099,7 +5101,7 @@ bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() { const char *AssemblerDirectiveBegin; const char *AssemblerDirectiveEnd; std::tie(AssemblerDirectiveBegin, AssemblerDirectiveEnd) = - isHsaAbiVersion3Or4(&getSTI()) + isHsaAbiVersion3AndAbove(&getSTI()) ? std::make_tuple(HSAMD::V3::AssemblerDirectiveBegin, HSAMD::V3::AssemblerDirectiveEnd) : std::make_tuple(HSAMD::AssemblerDirectiveBegin, @@ -5116,7 +5118,7 @@ bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() { HSAMetadataString)) return true; - if (isHsaAbiVersion3Or4(&getSTI())) { + if (isHsaAbiVersion3AndAbove(&getSTI())) { if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString)) return Error(getLoc(), "invalid HSA metadata"); } else { @@ -5266,7 +5268,7 @@ bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() { bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) { StringRef IDVal = DirectiveID.getString(); - if (isHsaAbiVersion3Or4(&getSTI())) { + if (isHsaAbiVersion3AndAbove(&getSTI())) { if (IDVal == ".amdhsa_kernel") return ParseDirectiveAMDHSAKernel(); @@ -7440,7 +7442,7 @@ void AMDGPUAsmParser::onBeginOfFile() { if (!getTargetStreamer().getTargetID()) getTargetStreamer().initializeTargetID(getSTI(), getSTI().getFeatureString()); - if (isHsaAbiVersion3Or4(&getSTI())) + if (isHsaAbiVersion3AndAbove(&getSTI())) getTargetStreamer().EmitDirectiveAMDGCNTarget(); } diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp index 9578bdb0bad0..7aa5f1abf65b 100644 --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp @@ -396,6 +396,7 @@ void AMDGPUTargetAsmStreamer::EmitAmdhsaKernelDescriptor( break; case ELF::ELFABIVERSION_AMDGPU_HSA_V3: case ELF::ELFABIVERSION_AMDGPU_HSA_V4: + case ELF::ELFABIVERSION_AMDGPU_HSA_V5: if (getTargetID()->isXnackSupported()) OS << "\t\t.amdhsa_reserve_xnack_mask " << getTargetID()->isXnackOnOrAny() << '\n'; break; @@ -578,6 +579,7 @@ unsigned AMDGPUTargetELFStreamer::getEFlagsAMDHSA() { case ELF::ELFABIVERSION_AMDGPU_HSA_V3: return getEFlagsV3(); case ELF::ELFABIVERSION_AMDGPU_HSA_V4: + case ELF::ELFABIVERSION_AMDGPU_HSA_V5: return getEFlagsV4(); } } diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index 561866b5a398..e2f4a0896bc3 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -5423,6 +5423,7 @@ SDValue SITargetLowering::lowerTRAP(SDValue Op, SelectionDAG &DAG) const { case ELF::ELFABIVERSION_AMDGPU_HSA_V3: return lowerTrapHsaQueuePtr(Op, DAG); case ELF::ELFABIVERSION_AMDGPU_HSA_V4: + case ELF::ELFABIVERSION_AMDGPU_HSA_V5: return Subtarget->supportsGetDoorbellID() ? lowerTrapHsa(Op, DAG) : lowerTrapHsaQueuePtr(Op, DAG); } diff --git a/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp b/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp index c18637bdbc43..44bdbe37dec0 100644 --- a/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp +++ b/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp @@ -938,12 +938,6 @@ bool SILoadStoreOptimizer::checkAndPrepareMerge( // 2. It is safe to move MBBI down past the instruction that I will // be merged into. - if (MBBI->hasUnmodeledSideEffects()) { - // We can't re-order this instruction with respect to other memory - // operations, so we fail both conditions mentioned above. - return false; - } - if (MBBI->mayLoadOrStore() && (!memAccessesCanBeReordered(*CI.I, *MBBI, AA) || !canMoveInstsAcrossMemOp(*MBBI, InstsToMove, AA))) { @@ -1977,10 +1971,10 @@ SILoadStoreOptimizer::collectMergeableInsts( if (promoteConstantOffsetToImm(MI, Visited, AnchorList)) Modified = true; - // Don't combine if volatile. We also won't be able to merge across this, so - // break the search. We can look after this barrier for separate merges. - if (MI.hasOrderedMemoryRef()) { - LLVM_DEBUG(dbgs() << "Breaking search on memory fence: " << MI); + // Treat volatile accesses, ordered accesses and unmodeled side effects as + // barriers. We can look after this barrier for separate merges. + if (MI.hasOrderedMemoryRef() || MI.hasUnmodeledSideEffects()) { + LLVM_DEBUG(dbgs() << "Breaking search on barrier: " << MI); // Search will resume after this instruction in a separate merge list. ++BlockI; diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp index 1e96266eb06c..683be871ff82 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp @@ -99,6 +99,8 @@ Optional<uint8_t> getHsaAbiVersion(const MCSubtargetInfo *STI) { return ELF::ELFABIVERSION_AMDGPU_HSA_V3; case 4: return ELF::ELFABIVERSION_AMDGPU_HSA_V4; + case 5: + return ELF::ELFABIVERSION_AMDGPU_HSA_V5; default: report_fatal_error(Twine("Unsupported AMDHSA Code Object Version ") + Twine(AmdhsaCodeObjectVersion)); @@ -123,8 +125,15 @@ bool isHsaAbiVersion4(const MCSubtargetInfo *STI) { return false; } -bool isHsaAbiVersion3Or4(const MCSubtargetInfo *STI) { - return isHsaAbiVersion3(STI) || isHsaAbiVersion4(STI); +bool isHsaAbiVersion5(const MCSubtargetInfo *STI) { + if (Optional<uint8_t> HsaAbiVer = getHsaAbiVersion(STI)) + return *HsaAbiVer == ELF::ELFABIVERSION_AMDGPU_HSA_V5; + return false; +} + +bool isHsaAbiVersion3AndAbove(const MCSubtargetInfo *STI) { + return isHsaAbiVersion3(STI) || isHsaAbiVersion4(STI) || + isHsaAbiVersion5(STI); } #define GET_MIMGBaseOpcodesTable_IMPL @@ -495,6 +504,7 @@ std::string AMDGPUTargetID::toString() const { Features += "+sram-ecc"; break; case ELF::ELFABIVERSION_AMDGPU_HSA_V4: + case ELF::ELFABIVERSION_AMDGPU_HSA_V5: // sramecc. if (getSramEccSetting() == TargetIDSetting::Off) Features += ":sramecc-"; diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h index 89f928eb8b92..4516b511f3c8 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h @@ -47,9 +47,12 @@ bool isHsaAbiVersion3(const MCSubtargetInfo *STI); /// \returns True if HSA OS ABI Version identification is 4, /// false otherwise. bool isHsaAbiVersion4(const MCSubtargetInfo *STI); +/// \returns True if HSA OS ABI Version identification is 5, +/// false otherwise. +bool isHsaAbiVersion5(const MCSubtargetInfo *STI); /// \returns True if HSA OS ABI Version identification is 3 or 4, /// false otherwise. -bool isHsaAbiVersion3Or4(const MCSubtargetInfo *STI); +bool isHsaAbiVersion3AndAbove(const MCSubtargetInfo *STI); struct GcnBufferFormatInfo { unsigned Format; diff --git a/llvm/lib/Target/ARM/ARM.td b/llvm/lib/Target/ARM/ARM.td index 4efbdbb2abc8..27edf69b4abf 100644 --- a/llvm/lib/Target/ARM/ARM.td +++ b/llvm/lib/Target/ARM/ARM.td @@ -656,6 +656,8 @@ def ProcA710 : SubtargetFeature<"cortex-a710", "ARMProcFamily", "CortexA710", "Cortex-A710 ARM processors", []>; def ProcX1 : SubtargetFeature<"cortex-x1", "ARMProcFamily", "CortexX1", "Cortex-X1 ARM processors", []>; +def ProcX1C : SubtargetFeature<"cortex-x1c", "ARMProcFamily", "CortexX1C", + "Cortex-X1C ARM processors", []>; def ProcV1 : SubtargetFeature<"neoverse-v1", "ARMProcFamily", "NeoverseV1", "Neoverse-V1 ARM processors", []>; @@ -1443,6 +1445,14 @@ def : ProcNoItin<"cortex-x1", [ARMv82a, ProcX1, FeatureFullFP16, FeatureDotProd]>; +def : ProcNoItin<"cortex-x1c", [ARMv82a, ProcX1C, + FeatureHWDivThumb, + FeatureHWDivARM, + FeatureCrypto, + FeatureCRC, + FeatureFullFP16, + FeatureDotProd]>; + def : ProcNoItin<"neoverse-v1", [ARMv84a, FeatureHWDivThumb, FeatureHWDivARM, diff --git a/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp b/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp index cde715880376..5b0bae4d9274 100644 --- a/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -752,23 +752,17 @@ unsigned ARMBaseInstrInfo::getInstSizeInBytes(const MachineInstr &MI) const { const MCAsmInfo *MAI = MF->getTarget().getMCAsmInfo(); const MCInstrDesc &MCID = MI.getDesc(); - if (MCID.getSize()) - return MCID.getSize(); switch (MI.getOpcode()) { default: - // pseudo-instruction sizes are zero. - return 0; + // Return the size specified in .td file. If there's none, return 0, as we + // can't define a default size (Thumb1 instructions are 2 bytes, Thumb2 + // instructions are 2-4 bytes, and ARM instructions are 4 bytes), in + // contrast to AArch64 instructions which have a default size of 4 bytes for + // example. + return MCID.getSize(); case TargetOpcode::BUNDLE: return getInstBundleLength(MI); - case ARM::MOVi16_ga_pcrel: - case ARM::MOVTi16_ga_pcrel: - case ARM::t2MOVi16_ga_pcrel: - case ARM::t2MOVTi16_ga_pcrel: - return 4; - case ARM::MOVi32imm: - case ARM::t2MOVi32imm: - return 8; case ARM::CONSTPOOL_ENTRY: case ARM::JUMPTABLE_INSTS: case ARM::JUMPTABLE_ADDRS: @@ -777,19 +771,6 @@ unsigned ARMBaseInstrInfo::getInstSizeInBytes(const MachineInstr &MI) const { // If this machine instr is a constant pool entry, its size is recorded as // operand #2. return MI.getOperand(2).getImm(); - case ARM::Int_eh_sjlj_longjmp: - return 16; - case ARM::tInt_eh_sjlj_longjmp: - return 10; - case ARM::tInt_WIN_eh_sjlj_longjmp: - return 12; - case ARM::Int_eh_sjlj_setjmp: - case ARM::Int_eh_sjlj_setjmp_nofp: - return 20; - case ARM::tInt_eh_sjlj_setjmp: - case ARM::t2Int_eh_sjlj_setjmp: - case ARM::t2Int_eh_sjlj_setjmp_nofp: - return 12; case ARM::SPACE: return MI.getOperand(1).getImm(); case ARM::INLINEASM: @@ -800,14 +781,6 @@ unsigned ARMBaseInstrInfo::getInstSizeInBytes(const MachineInstr &MI) const { Size = alignTo(Size, 4); return Size; } - case ARM::SpeculationBarrierISBDSBEndBB: - case ARM::t2SpeculationBarrierISBDSBEndBB: - // This gets lowered to 2 4-byte instructions. - return 8; - case ARM::SpeculationBarrierSBEndBB: - case ARM::t2SpeculationBarrierSBEndBB: - // This gets lowered to 1 4-byte instructions. - return 4; } } diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp index fe4e6b24367a..1b41427a1cab 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -14527,7 +14527,7 @@ static SDValue PerformXORCombine(SDNode *N, SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); const TargetLowering *TLI = Subtarget->getTargetLowering(); - if (TLI->isConstTrueVal(N1.getNode()) && + if (TLI->isConstTrueVal(N1) && (N0->getOpcode() == ARMISD::VCMP || N0->getOpcode() == ARMISD::VCMPZ)) { if (CanInvertMVEVCMP(N0)) { SDLoc DL(N0); diff --git a/llvm/lib/Target/ARM/ARMInstrInfo.td b/llvm/lib/Target/ARM/ARMInstrInfo.td index 1c1db473f866..32a3911d3369 100644 --- a/llvm/lib/Target/ARM/ARMInstrInfo.td +++ b/llvm/lib/Target/ARM/ARMInstrInfo.td @@ -3657,6 +3657,8 @@ def : InstAlias<"mov${p} $Rd, $imm", (MOVi16 GPR:$Rd, imm0_65535_expr:$imm, pred:$p), 0>, Requires<[IsARM, HasV6T2]>; +// This gets lowered to a single 4-byte instructions +let Size = 4 in def MOVi16_ga_pcrel : PseudoInst<(outs GPR:$Rd), (ins i32imm:$addr, pclabel:$id), IIC_iMOVi, []>, Sched<[WriteALU]>; @@ -3680,6 +3682,8 @@ def MOVTi16 : AI1<0b1010, (outs GPRnopc:$Rd), let DecoderMethod = "DecodeArmMOVTWInstruction"; } +// This gets lowered to a single 4-byte instructions +let Size = 4 in def MOVTi16_ga_pcrel : PseudoInst<(outs GPR:$Rd), (ins GPR:$src, i32imm:$addr, pclabel:$id), IIC_iMOVi, []>, Sched<[WriteALU]>; @@ -5895,27 +5899,30 @@ def : ARMPat<(ARMthread_pointer), (MRC 15, 0, 13, 0, 3)>, // // These are pseudo-instructions and are lowered to individual MC-insts, so // no encoding information is necessary. +// This gets lowered to an instruction sequence of 20 bytes let Defs = [ R0, R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, LR, CPSR, Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7, Q8, Q9, Q10, Q11, Q12, Q13, Q14, Q15 ], - hasSideEffects = 1, isBarrier = 1, usesCustomInserter = 1 in { + hasSideEffects = 1, isBarrier = 1, usesCustomInserter = 1, Size = 20 in { def Int_eh_sjlj_setjmp : PseudoInst<(outs), (ins GPR:$src, GPR:$val), NoItinerary, [(set R0, (ARMeh_sjlj_setjmp GPR:$src, GPR:$val))]>, Requires<[IsARM, HasVFP2]>; } +// This gets lowered to an instruction sequence of 20 bytes let Defs = [ R0, R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, LR, CPSR ], - hasSideEffects = 1, isBarrier = 1, usesCustomInserter = 1 in { + hasSideEffects = 1, isBarrier = 1, usesCustomInserter = 1, Size = 20 in { def Int_eh_sjlj_setjmp_nofp : PseudoInst<(outs), (ins GPR:$src, GPR:$val), NoItinerary, [(set R0, (ARMeh_sjlj_setjmp GPR:$src, GPR:$val))]>, Requires<[IsARM, NoVFP]>; } +// This gets lowered to an instruction sequence of 16 bytes // FIXME: Non-IOS version(s) -let isBarrier = 1, hasSideEffects = 1, isTerminator = 1, +let isBarrier = 1, hasSideEffects = 1, isTerminator = 1, Size = 16, Defs = [ R7, LR, SP ] in { def Int_eh_sjlj_longjmp : PseudoInst<(outs), (ins GPR:$src, GPR:$scratch), NoItinerary, @@ -5958,7 +5965,7 @@ let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [SP] in // This is a single pseudo instruction, the benefit is that it can be remat'd // as a single unit instead of having to handle reg inputs. // FIXME: Remove this when we can do generalized remat. -let isReMaterializable = 1, isMoveImm = 1 in +let isReMaterializable = 1, isMoveImm = 1, Size = 8 in def MOVi32imm : PseudoInst<(outs GPR:$dst), (ins i32imm:$src), IIC_iMOVix2, [(set GPR:$dst, (arm_i32imm:$src))]>, Requires<[IsARM]>; @@ -6419,8 +6426,12 @@ def SPACE : PseudoInst<(outs GPR:$Rd), (ins i32imm:$size, GPR:$Rn), // SpeculationBarrierEndBB must only be used after an unconditional control // flow, i.e. after a terminator for which isBarrier is True. let hasSideEffects = 1, isCodeGenOnly = 1, isTerminator = 1, isBarrier = 1 in { + // This gets lowered to a pair of 4-byte instructions + let Size = 8 in def SpeculationBarrierISBDSBEndBB : PseudoInst<(outs), (ins), NoItinerary, []>, Sched<[]>; + // This gets lowered to a single 4-byte instructions + let Size = 4 in def SpeculationBarrierSBEndBB : PseudoInst<(outs), (ins), NoItinerary, []>, Sched<[]>; } diff --git a/llvm/lib/Target/ARM/ARMInstrThumb.td b/llvm/lib/Target/ARM/ARMInstrThumb.td index f09ad8167600..71527ae1ab11 100644 --- a/llvm/lib/Target/ARM/ARMInstrThumb.td +++ b/llvm/lib/Target/ARM/ARMInstrThumb.td @@ -1537,25 +1537,28 @@ def tTPsoft : tPseudoInst<(outs), (ins), 4, IIC_Br, // Defs. By doing so, we also cause the prologue/epilogue code to actively // preserve all of the callee-saved registers, which is exactly what we want. // $val is a scratch register for our use. +// This gets lowered to an instruction sequence of 12 bytes let Defs = [ R0, R1, R2, R3, R4, R5, R6, R7, R12, CPSR ], - hasSideEffects = 1, isBarrier = 1, isCodeGenOnly = 1, + hasSideEffects = 1, isBarrier = 1, isCodeGenOnly = 1, Size = 12, usesCustomInserter = 1 in def tInt_eh_sjlj_setjmp : ThumbXI<(outs),(ins tGPR:$src, tGPR:$val), AddrModeNone, 0, NoItinerary, "","", [(set R0, (ARMeh_sjlj_setjmp tGPR:$src, tGPR:$val))]>; +// This gets lowered to an instruction sequence of 10 bytes // FIXME: Non-IOS version(s) let isBarrier = 1, hasSideEffects = 1, isTerminator = 1, isCodeGenOnly = 1, - Defs = [ R7, LR, SP ] in + Size = 10, Defs = [ R7, LR, SP ] in def tInt_eh_sjlj_longjmp : XI<(outs), (ins tGPR:$src, tGPR:$scratch), AddrModeNone, 0, IndexModeNone, Pseudo, NoItinerary, "", "", [(ARMeh_sjlj_longjmp tGPR:$src, tGPR:$scratch)]>, Requires<[IsThumb,IsNotWindows]>; +// This gets lowered to an instruction sequence of 12 bytes // (Windows is Thumb2-only) let isBarrier = 1, hasSideEffects = 1, isTerminator = 1, isCodeGenOnly = 1, - Defs = [ R11, LR, SP ] in + Size = 12, Defs = [ R11, LR, SP ] in def tInt_WIN_eh_sjlj_longjmp : XI<(outs), (ins GPR:$src, GPR:$scratch), AddrModeNone, 0, IndexModeNone, Pseudo, NoItinerary, "", "", [(ARMeh_sjlj_longjmp GPR:$src, GPR:$scratch)]>, diff --git a/llvm/lib/Target/ARM/ARMInstrThumb2.td b/llvm/lib/Target/ARM/ARMInstrThumb2.td index 6e8e61ca2b8e..f80b9a5053f7 100644 --- a/llvm/lib/Target/ARM/ARMInstrThumb2.td +++ b/llvm/lib/Target/ARM/ARMInstrThumb2.td @@ -2194,6 +2194,8 @@ def : InstAlias<"mov${p} $Rd, $imm", (t2MOVi16 rGPR:$Rd, imm256_65535_expr:$imm, pred:$p), 0>, Requires<[IsThumb, HasV8MBaseline]>, Sched<[WriteALU]>; +// This gets lowered to a single 4-byte instructions +let Size = 4 in def t2MOVi16_ga_pcrel : PseudoInst<(outs rGPR:$Rd), (ins i32imm:$addr, pclabel:$id), IIC_iMOVi, []>, Sched<[WriteALU]>; @@ -2223,6 +2225,8 @@ def t2MOVTi16 : T2I<(outs rGPR:$Rd), let DecoderMethod = "DecodeT2MOVTWInstruction"; } +// This gets lowered to a single 4-byte instructions +let Size = 4 in def t2MOVTi16_ga_pcrel : PseudoInst<(outs rGPR:$Rd), (ins rGPR:$src, i32imm:$addr, pclabel:$id), IIC_iMOVi, []>, Sched<[WriteALU]>, Requires<[IsThumb, HasV8MBaseline]>; @@ -3814,10 +3818,11 @@ def : T2Pat<(stlex_2 (and GPR:$Rt, 0xffff), addr_offset_none:$addr), // doing so, we also cause the prologue/epilogue code to actively preserve // all of the callee-saved registers, which is exactly what we want. // $val is a scratch register for our use. +// This gets lowered to an instruction sequence of 12 bytes let Defs = [ R0, R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, LR, CPSR, Q0, Q1, Q2, Q3, Q8, Q9, Q10, Q11, Q12, Q13, Q14, Q15], - hasSideEffects = 1, isBarrier = 1, isCodeGenOnly = 1, + hasSideEffects = 1, isBarrier = 1, isCodeGenOnly = 1, Size = 12, usesCustomInserter = 1 in { def t2Int_eh_sjlj_setjmp : Thumb2XI<(outs), (ins tGPR:$src, tGPR:$val), AddrModeNone, 0, NoItinerary, "", "", @@ -3825,9 +3830,10 @@ let Defs = Requires<[IsThumb2, HasVFP2]>; } +// This gets lowered to an instruction sequence of 12 bytes let Defs = [ R0, R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, LR, CPSR ], - hasSideEffects = 1, isBarrier = 1, isCodeGenOnly = 1, + hasSideEffects = 1, isBarrier = 1, isCodeGenOnly = 1, Size = 12, usesCustomInserter = 1 in { def t2Int_eh_sjlj_setjmp_nofp : Thumb2XI<(outs), (ins tGPR:$src, tGPR:$val), AddrModeNone, 0, NoItinerary, "", "", @@ -4224,7 +4230,7 @@ def : t2InstAlias<"hvc\t$imm16", (t2HVC imm0_65535:$imm16)>; // 32-bit immediate using movw + movt. // This is a single pseudo instruction to make it re-materializable. // FIXME: Remove this when we can do generalized remat. -let isReMaterializable = 1, isMoveImm = 1 in +let isReMaterializable = 1, isMoveImm = 1, Size = 8 in def t2MOVi32imm : PseudoInst<(outs rGPR:$dst), (ins i32imm:$src), IIC_iMOVix2, [(set rGPR:$dst, (i32 imm:$src))]>, Requires<[IsThumb, UseMovt]>; @@ -5006,8 +5012,12 @@ def : InstAlias<"dfb${p}", (t2DSB 0xc, pred:$p), 1>, Requires<[HasDFB]>; // SpeculationBarrierEndBB must only be used after an unconditional control // flow, i.e. after a terminator for which isBarrier is True. let hasSideEffects = 1, isCodeGenOnly = 1, isTerminator = 1, isBarrier = 1 in { + // This gets lowered to a pair of 4-byte instructions + let Size = 8 in def t2SpeculationBarrierISBDSBEndBB : PseudoInst<(outs), (ins), NoItinerary, []>, Sched<[]>; + // This gets lowered to a single 4-byte instructions + let Size = 4 in def t2SpeculationBarrierSBEndBB : PseudoInst<(outs), (ins), NoItinerary, []>, Sched<[]>; } diff --git a/llvm/lib/Target/ARM/ARMSubtarget.cpp b/llvm/lib/Target/ARM/ARMSubtarget.cpp index 2dd25234dc50..32160b109343 100644 --- a/llvm/lib/Target/ARM/ARMSubtarget.cpp +++ b/llvm/lib/Target/ARM/ARMSubtarget.cpp @@ -304,6 +304,7 @@ void ARMSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) { case CortexM7: case CortexR52: case CortexX1: + case CortexX1C: break; case Exynos: LdStMultipleTiming = SingleIssuePlusExtras; diff --git a/llvm/lib/Target/ARM/ARMSubtarget.h b/llvm/lib/Target/ARM/ARMSubtarget.h index 1c2b7ee6ba35..7cbdc014299f 100644 --- a/llvm/lib/Target/ARM/ARMSubtarget.h +++ b/llvm/lib/Target/ARM/ARMSubtarget.h @@ -77,6 +77,7 @@ protected: CortexR52, CortexR7, CortexX1, + CortexX1C, Exynos, Krait, Kryo, diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp index e0750a9945d2..d9d563ead260 100644 --- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp +++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp @@ -2109,9 +2109,6 @@ static bool canTailPredicateLoop(Loop *L, LoopInfo *LI, ScalarEvolution &SE, } Type *T = I.getType(); - if (T->isPointerTy()) - T = T->getPointerElementType(); - if (T->getScalarSizeInBits() > 32) { LLVM_DEBUG(dbgs() << "Unsupported Type: "; T->dump()); return false; diff --git a/llvm/lib/Target/Hexagon/HexagonOptAddrMode.cpp b/llvm/lib/Target/Hexagon/HexagonOptAddrMode.cpp index ea6a7498e27f..311e43d77210 100644 --- a/llvm/lib/Target/Hexagon/HexagonOptAddrMode.cpp +++ b/llvm/lib/Target/Hexagon/HexagonOptAddrMode.cpp @@ -313,12 +313,18 @@ bool HexagonOptAddrMode::isSafeToExtLR(NodeAddr<StmtNode *> SN, return false; } + // If the register is undefined (for example if it's a reserved register), + // it may still be possible to extend the range, but it's safer to be + // conservative and just punt. + if (LRExtRegRD == 0) + return false; + MachineInstr *UseMI = NodeAddr<StmtNode *>(IA).Addr->getCode(); NodeAddr<DefNode *> LRExtRegDN = DFG->addr<DefNode *>(LRExtRegRD); // Reaching Def to LRExtReg can't be a phi. if ((LRExtRegDN.Addr->getFlags() & NodeAttrs::PhiRef) && MI->getParent() != UseMI->getParent()) - return false; + return false; } return true; } diff --git a/llvm/lib/Target/M68k/GISel/M68kLegalizerInfo.cpp b/llvm/lib/Target/M68k/GISel/M68kLegalizerInfo.cpp index 860c0ce29326..79e9ad4dd1d2 100644 --- a/llvm/lib/Target/M68k/GISel/M68kLegalizerInfo.cpp +++ b/llvm/lib/Target/M68k/GISel/M68kLegalizerInfo.cpp @@ -21,13 +21,32 @@ using namespace llvm; M68kLegalizerInfo::M68kLegalizerInfo(const M68kSubtarget &ST) { using namespace TargetOpcode; - const LLT S32 = LLT::scalar(32); - const LLT P0 = LLT::pointer(0, 32); - getActionDefinitionsBuilder(G_LOAD).legalFor({S32}); - getActionDefinitionsBuilder(G_FRAME_INDEX).legalFor({P0}); - getActionDefinitionsBuilder(G_ADD).legalFor({S32}); - getActionDefinitionsBuilder(G_SUB).legalFor({S32}); - getActionDefinitionsBuilder(G_MUL).legalFor({S32}); - getActionDefinitionsBuilder(G_UDIV).legalFor({S32}); + const LLT s8 = LLT::scalar(8); + const LLT s16 = LLT::scalar(16); + const LLT s32 = LLT::scalar(32); + const LLT p0 = LLT::pointer(0, 32); + + getActionDefinitionsBuilder({G_ADD, G_SUB, G_MUL, G_UDIV, G_AND}) + .legalFor({s8, s16, s32}) + .clampScalar(0, s8, s32) + .widenScalarToNextPow2(0, 8); + + getActionDefinitionsBuilder(G_CONSTANT) + .legalFor({s32, p0}) + .clampScalar(0, s32, s32); + + getActionDefinitionsBuilder({G_FRAME_INDEX, G_GLOBAL_VALUE}).legalFor({p0}); + + getActionDefinitionsBuilder({G_STORE, G_LOAD}) + .legalForTypesWithMemDesc({{s32, p0, s32, 4}, + {s32, p0, s16, 4}, + {s32, p0, s8, 4}, + {s16, p0, s16, 2}, + {s8, p0, s8, 1}, + {p0, p0, s32, 4}}) + .clampScalar(0, s8, s32); + + getActionDefinitionsBuilder(G_PTR_ADD).legalFor({{p0, s32}}); + getLegacyLegalizerInfo().computeTables(); } diff --git a/llvm/lib/Target/M68k/M68kInstrBits.td b/llvm/lib/Target/M68k/M68kInstrBits.td index d610bce5c277..0d1278102378 100644 --- a/llvm/lib/Target/M68k/M68kInstrBits.td +++ b/llvm/lib/Target/M68k/M68kInstrBits.td @@ -79,6 +79,10 @@ def BTST32di : MxBTST_RI<MxType32d>; // Memory BTST limited to 8 bits only def BTST8jd : MxBTST_MR<MxType8d, MxType8.JOp, MxType8.JPat, MxEncEAj_0, MxExtEmpty>; +def BTST8od : MxBTST_MR<MxType8d, MxType8.OOp, MxType8.OPat, + MxEncEAo_0, MxExtEmpty>; +def BTST8ed : MxBTST_MR<MxType8d, MxType8.EOp, MxType8.EPat, + MxEncEAe_0, MxExtEmpty>; def BTST8pd : MxBTST_MR<MxType8d, MxType8.POp, MxType8.PPat, MxEncEAp_0, MxExtI16_0>; def BTST8fd : MxBTST_MR<MxType8d, MxType8.FOp, MxType8.FPat, @@ -90,6 +94,10 @@ def BTST8kd : MxBTST_MR<MxType8d, MxType8.KOp, MxType8.KPat, def BTST8ji : MxBTST_MI<MxType8d, MxType8.JOp, MxType8.JPat, MxEncEAj_0, MxExtEmpty>; +def BTST8oi : MxBTST_MI<MxType8d, MxType8.OOp, MxType8.OPat, + MxEncEAo_0, MxExtEmpty>; +def BTST8ei : MxBTST_MI<MxType8d, MxType8.EOp, MxType8.EPat, + MxEncEAe_0, MxExtEmpty>; def BTST8pi : MxBTST_MI<MxType8d, MxType8.POp, MxType8.PPat, MxEncEAp_0, MxExtI16_0>; def BTST8fi : MxBTST_MI<MxType8d, MxType8.FOp, MxType8.FPat, diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp index eac237bb27bb..7b5248906b56 100644 --- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp @@ -574,7 +574,6 @@ NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM, for (const auto &Op : {ISD::FMINIMUM, ISD::FMAXIMUM}) { setFP16OperationAction(Op, MVT::f16, GetMinMaxAction(Expand), Expand); setOperationAction(Op, MVT::f32, GetMinMaxAction(Expand)); - setOperationAction(Op, MVT::f64, GetMinMaxAction(Expand)); setFP16OperationAction(Op, MVT::v2f16, GetMinMaxAction(Expand), Expand); } diff --git a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td index 22e200e77831..22084cddc092 100644 --- a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td +++ b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td @@ -896,6 +896,7 @@ defm FMUL : F3_fma_component<"mul", fmul>; defm FMIN : F3<"min", fminnum>; defm FMAX : F3<"max", fmaxnum>; +// Note: min.NaN.f64 and max.NaN.f64 do not actually exist. defm FMINNAN : F3<"min.NaN", fminimum>; defm FMAXNAN : F3<"max.NaN", fmaximum>; diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index 25cc34badda0..cbeae0ab03b8 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -1252,7 +1252,6 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i16, Legal); setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v16i8, Legal); setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Legal); - setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Legal); } else { setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i16, Custom); setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v16i8, Custom); @@ -9093,22 +9092,30 @@ bool llvm::checkConvertToNonDenormSingle(APFloat &ArgAPFloat) { static bool isValidSplatLoad(const PPCSubtarget &Subtarget, const SDValue &Op, unsigned &Opcode) { - const SDNode *InputNode = Op.getOperand(0).getNode(); - if (!InputNode || !ISD::isUNINDEXEDLoad(InputNode)) - return false; - - if (!Subtarget.hasVSX()) + LoadSDNode *InputNode = dyn_cast<LoadSDNode>(Op.getOperand(0)); + if (!InputNode || !Subtarget.hasVSX() || !ISD::isUNINDEXEDLoad(InputNode)) return false; EVT Ty = Op->getValueType(0); - if (Ty == MVT::v2f64 || Ty == MVT::v4f32 || Ty == MVT::v4i32 || - Ty == MVT::v8i16 || Ty == MVT::v16i8) + // For v2f64, v4f32 and v4i32 types, we require the load to be non-extending + // as we cannot handle extending loads for these types. + if ((Ty == MVT::v2f64 || Ty == MVT::v4f32 || Ty == MVT::v4i32) && + ISD::isNON_EXTLoad(InputNode)) + return true; + + EVT MemVT = InputNode->getMemoryVT(); + // For v8i16 and v16i8 types, extending loads can be handled as long as the + // memory VT is the same vector element VT type. + // The loads feeding into the v8i16 and v16i8 types will be extending because + // scalar i8/i16 are not legal types. + if ((Ty == MVT::v8i16 || Ty == MVT::v16i8) && ISD::isEXTLoad(InputNode) && + (MemVT == Ty.getVectorElementType())) return true; if (Ty == MVT::v2i64) { // Check the extend type, when the input type is i32, and the output vector // type is v2i64. - if (cast<LoadSDNode>(Op.getOperand(0))->getMemoryVT() == MVT::i32) { + if (MemVT == MVT::i32) { if (ISD::isZEXTLoad(InputNode)) Opcode = PPCISD::ZEXT_LD_SPLAT; if (ISD::isSEXTLoad(InputNode)) @@ -10755,6 +10762,26 @@ SDValue PPCTargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op, if (VT == MVT::v2f64 && C) return Op; + if (Subtarget.hasP9Vector()) { + // A f32 load feeding into a v4f32 insert_vector_elt is handled in this way + // because on P10, it allows this specific insert_vector_elt load pattern to + // utilize the refactored load and store infrastructure in order to exploit + // prefixed loads. + // On targets with inexpensive direct moves (Power9 and up), a + // (insert_vector_elt v4f32:$vec, (f32 load)) is always better as an integer + // load since a single precision load will involve conversion to double + // precision on the load followed by another conversion to single precision. + if ((VT == MVT::v4f32) && (V2.getValueType() == MVT::f32) && + (isa<LoadSDNode>(V2))) { + SDValue BitcastVector = DAG.getBitcast(MVT::v4i32, V1); + SDValue BitcastLoad = DAG.getBitcast(MVT::i32, V2); + SDValue InsVecElt = + DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v4i32, BitcastVector, + BitcastLoad, Op.getOperand(2)); + return DAG.getBitcast(MVT::v4f32, InsVecElt); + } + } + if (Subtarget.isISA3_1()) { if ((VT == MVT::v2i64 || VT == MVT::v2f64) && !Subtarget.isPPC64()) return SDValue(); diff --git a/llvm/lib/Target/PowerPC/PPCInstrPrefix.td b/llvm/lib/Target/PowerPC/PPCInstrPrefix.td index fe354208533b..ff43426dd1ef 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrPrefix.td +++ b/llvm/lib/Target/PowerPC/PPCInstrPrefix.td @@ -2816,32 +2816,20 @@ let Predicates = [IsISA3_1, HasVSX, IsLittleEndian] in { def : Pat<(v4f32 (insertelt v4f32:$vDi, f32:$rA, i64:$rB)), (VINSWVRX $vDi, InsertEltShift.Sub32Left2, (XSCVDPSPN $rA))>; - def : Pat<(v4f32 (insertelt v4f32:$vDi, (f32 (load iaddr:$rA)), i64:$rB)), - (VINSWRX $vDi, InsertEltShift.Sub32Left2, (LWZ memri:$rA))>; - def : Pat<(v4f32 (insertelt v4f32:$vDi, (f32 (load iaddrX34:$rA)), i64:$rB)), - (VINSWRX $vDi, InsertEltShift.Sub32Left2, (PLWZ memri34:$rA))>; - def : Pat<(v4f32 (insertelt v4f32:$vDi, (f32 (load xaddr:$rA)), i64:$rB)), - (VINSWRX $vDi, InsertEltShift.Sub32Left2, (LWZX memrr:$rA))>; def : Pat<(v2f64 (insertelt v2f64:$vDi, f64:$A, i64:$rB)), (VINSDRX $vDi, InsertEltShift.Left3, Bitcast.DblToLong)>; - def : Pat<(v2f64 (insertelt v2f64:$vDi, (f64 (load iaddrX4:$rA)), i64:$rB)), + def : Pat<(v2f64 (insertelt v2f64:$vDi, (f64 (load DSForm:$rA)), i64:$rB)), (VINSDRX $vDi, InsertEltShift.Left3, (LD memrix:$rA))>; - def : Pat<(v2f64 (insertelt v2f64:$vDi, (f64 (load iaddrX34:$rA)), i64:$rB)), + def : Pat<(v2f64 (insertelt v2f64:$vDi, (f64 (load PDForm:$rA)), i64:$rB)), (VINSDRX $vDi, InsertEltShift.Left3, (PLD memri34:$rA))>; - def : Pat<(v2f64 (insertelt v2f64:$vDi, (f64 (load xaddrX4:$rA)), i64:$rB)), + def : Pat<(v2f64 (insertelt v2f64:$vDi, (f64 (load XForm:$rA)), i64:$rB)), (VINSDRX $vDi, InsertEltShift.Left3, (LDX memrr:$rA))>; let AddedComplexity = 400 in { // Immediate vector insert element foreach Idx = [0, 1, 2, 3] in { def : Pat<(v4i32 (insertelt v4i32:$vDi, i32:$rA, Idx)), (VINSW $vDi, !mul(!sub(3, Idx), 4), $rA)>; - def : Pat<(v4f32 (insertelt v4f32:$vDi, (f32 (load iaddr:$rA)), Idx)), - (VINSW $vDi, !mul(!sub(3, Idx), 4), (LWZ memri:$rA))>; - def : Pat<(v4f32 (insertelt v4f32:$vDi, (f32 (load iaddrX34:$rA)), Idx)), - (VINSW $vDi, !mul(!sub(3, Idx), 4), (PLWZ memri34:$rA))>; - def : Pat<(v4f32 (insertelt v4f32:$vDi, (f32 (load xaddr:$rA)), Idx)), - (VINSW $vDi, !mul(!sub(3, Idx), 4), (LWZX memrr:$rA))>; } foreach i = [0, 1] in def : Pat<(v2i64 (insertelt v2i64:$vDi, i64:$rA, (i64 i))), @@ -2860,12 +2848,6 @@ let Predicates = [IsISA3_1, HasVSX, IsBigEndian, IsPPC32] in { def : Pat<(v4f32 (insertelt v4f32:$vDi, f32:$rA, i32:$rB)), (VINSWVLX $vDi, InsertEltShift.Left2, (XSCVDPSPN $rA))>; - def : Pat<(v4f32 (insertelt v4f32:$vDi, (f32 (load iaddr:$rA)), i32:$rB)), - (VINSWLX v4f32:$vDi, InsertEltShift.Left2, (LWZ memri:$rA))>; - def : Pat<(v4f32 (insertelt v4f32:$vDi, (f32 (load iaddrX34:$rA)), i32:$rB)), - (VINSWLX v4f32:$vDi, InsertEltShift.Left2, (PLWZ memri34:$rA))>; - def: Pat<(v4f32(insertelt v4f32 : $vDi, (f32(load xaddr : $rA)), i32 : $rB)), - (VINSWLX v4f32 : $vDi, InsertEltShift.Left2, (LWZX memrr : $rA))>; } let Predicates = [IsISA3_1, HasVSX, IsBigEndian, IsPPC64] in { @@ -2881,20 +2863,14 @@ let Predicates = [IsISA3_1, HasVSX, IsBigEndian, IsPPC64] in { def : Pat<(v4f32 (insertelt v4f32:$vDi, f32:$rA, i64:$rB)), (VINSWVLX $vDi, InsertEltShift.Sub32Left2, (XSCVDPSPN $rA))>; - def : Pat<(v4f32 (insertelt v4f32:$vDi, (f32 (load iaddr:$rA)), i64:$rB)), - (VINSWLX $vDi, InsertEltShift.Sub32Left2, (LWZ memri:$rA))>; - def : Pat<(v4f32 (insertelt v4f32:$vDi, (f32 (load iaddrX34:$rA)), i64:$rB)), - (VINSWLX $vDi, InsertEltShift.Sub32Left2, (PLWZ memri34:$rA))>; - def : Pat<(v4f32 (insertelt v4f32:$vDi, (f32 (load xaddr:$rA)), i64:$rB)), - (VINSWLX $vDi, InsertEltShift.Sub32Left2, (LWZX memrr:$rA))>; def : Pat<(v2f64 (insertelt v2f64:$vDi, f64:$A, i64:$rB)), (VINSDLX $vDi, InsertEltShift.Left3, Bitcast.DblToLong)>; - def : Pat<(v2f64 (insertelt v2f64:$vDi, (f64 (load iaddrX4:$rA)), i64:$rB)), + def : Pat<(v2f64 (insertelt v2f64:$vDi, (f64 (load DSForm:$rA)), i64:$rB)), (VINSDLX $vDi, InsertEltShift.Left3, (LD memrix:$rA))>; - def : Pat<(v2f64 (insertelt v2f64:$vDi, (f64 (load iaddrX34:$rA)), i64:$rB)), + def : Pat<(v2f64 (insertelt v2f64:$vDi, (f64 (load PDForm:$rA)), i64:$rB)), (VINSDLX $vDi, InsertEltShift.Left3, (PLD memri34:$rA))>; - def : Pat<(v2f64 (insertelt v2f64:$vDi, (f64 (load xaddrX4:$rA)), i64:$rB)), + def : Pat<(v2f64 (insertelt v2f64:$vDi, (f64 (load XForm:$rA)), i64:$rB)), (VINSDLX $vDi, InsertEltShift.Left3, (LDX memrr:$rA))>; } @@ -2904,15 +2880,6 @@ let AddedComplexity = 400, Predicates = [IsISA3_1, HasVSX, IsBigEndian] in { foreach Idx = [0, 1, 2, 3] in { def : Pat<(v4i32 (insertelt v4i32:$vDi, i32:$rA, (Ty Idx))), (VINSW $vDi, !mul(Idx, 4), $rA)>; - def : Pat<(v4f32 (insertelt v4f32:$vDi, (f32 (load iaddr:$rA)), - (Ty Idx))), - (VINSW $vDi, !mul(Idx, 4), (LWZ memri:$rA))>; - def : Pat<(v4f32 (insertelt v4f32:$vDi, (f32 (load iaddrX34:$rA)), - (Ty Idx))), - (VINSW $vDi, !mul(Idx, 4), (PLWZ memri34:$rA))>; - def : Pat<(v4f32 (insertelt v4f32:$vDi, (f32 (load xaddr:$rA)), - (Ty Idx))), - (VINSW $vDi, !mul(Idx, 4), (LWZX memrr:$rA))>; } } diff --git a/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp b/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp index a2ea34fe11c7..01f36e6dcdd2 100644 --- a/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp +++ b/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp @@ -2266,8 +2266,8 @@ void RISCVAsmParser::emitLoadImm(MCRegister DestReg, int64_t Value, if (Inst.Opc == RISCV::LUI) { emitToStreamer( Out, MCInstBuilder(RISCV::LUI).addReg(DestReg).addImm(Inst.Imm)); - } else if (Inst.Opc == RISCV::ADDUW) { - emitToStreamer(Out, MCInstBuilder(RISCV::ADDUW) + } else if (Inst.Opc == RISCV::ADD_UW) { + emitToStreamer(Out, MCInstBuilder(RISCV::ADD_UW) .addReg(DestReg) .addReg(SrcReg) .addReg(RISCV::X0)); diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp index 14d0191a505f..1078403a3fd2 100644 --- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp +++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp @@ -197,9 +197,9 @@ void RISCVMCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS, // Get byte count of instruction. unsigned Size = Desc.getSize(); - // RISCVInstrInfo::getInstSizeInBytes hard-codes the number of expanded - // instructions for each pseudo, and must be updated when adding new pseudos - // or changing existing ones. + // RISCVInstrInfo::getInstSizeInBytes expects that the total size of the + // expanded instructions for each pseudo is correct in the Size field of the + // tablegen definition for the pseudo. if (MI.getOpcode() == RISCV::PseudoCALLReg || MI.getOpcode() == RISCV::PseudoCALL || MI.getOpcode() == RISCV::PseudoTAIL || diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.cpp index 18858209aa9b..e935179e5f9b 100644 --- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.cpp +++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.cpp @@ -31,7 +31,7 @@ static int getInstSeqCost(RISCVMatInt::InstSeq &Res, bool HasRVC) { case RISCV::LUI: Compressed = isInt<6>(Instr.Imm); break; - case RISCV::ADDUW: + case RISCV::ADD_UW: Compressed = false; break; } @@ -123,10 +123,11 @@ static void generateInstSeqImpl(int64_t Val, } } - // Try to use SLLIUW for Hi52 when it is uint32 but not int32. + // Try to use SLLI_UW for Hi52 when it is uint32 but not int32. if (isUInt<32>((uint64_t)Hi52) && !isInt<32>((uint64_t)Hi52) && ActiveFeatures[RISCV::FeatureStdExtZba]) { - // Use LUI+ADDI or LUI to compose, then clear the upper 32 bits with SLLIUW. + // Use LUI+ADDI or LUI to compose, then clear the upper 32 bits with + // SLLI_UW. Hi52 = ((uint64_t)Hi52) | (0xffffffffull << 32); Unsigned = true; } @@ -134,7 +135,7 @@ static void generateInstSeqImpl(int64_t Val, generateInstSeqImpl(Hi52, ActiveFeatures, Res); if (Unsigned) - Res.push_back(RISCVMatInt::Inst(RISCV::SLLIUW, ShiftAmount)); + Res.push_back(RISCVMatInt::Inst(RISCV::SLLI_UW, ShiftAmount)); else Res.push_back(RISCVMatInt::Inst(RISCV::SLLI, ShiftAmount)); if (Lo12) @@ -210,7 +211,7 @@ InstSeq generateInstSeq(int64_t Val, const FeatureBitset &ActiveFeatures) { uint64_t LeadingOnesVal = Val | maskLeadingOnes<uint64_t>(LeadingZeros); TmpSeq.clear(); generateInstSeqImpl(LeadingOnesVal, ActiveFeatures, TmpSeq); - TmpSeq.push_back(RISCVMatInt::Inst(RISCV::ADDUW, 0)); + TmpSeq.push_back(RISCVMatInt::Inst(RISCV::ADD_UW, 0)); // Keep the new sequence if it is an improvement. if (TmpSeq.size() < Res.size()) { diff --git a/llvm/lib/Target/RISCV/RISCV.td b/llvm/lib/Target/RISCV/RISCV.td index 5b0f27c5e937..e32a8fb010de 100644 --- a/llvm/lib/Target/RISCV/RISCV.td +++ b/llvm/lib/Target/RISCV/RISCV.td @@ -52,11 +52,17 @@ def HasStdExtZfhmin : Predicate<"Subtarget->hasStdExtZfhmin()">, def FeatureStdExtZfh : SubtargetFeature<"zfh", "HasStdExtZfh", "true", "'Zfh' (Half-Precision Floating-Point)", - [FeatureStdExtZfhmin, FeatureStdExtF]>; + [FeatureStdExtF]>; def HasStdExtZfh : Predicate<"Subtarget->hasStdExtZfh()">, AssemblerPredicate<(all_of FeatureStdExtZfh), "'Zfh' (Half-Precision Floating-Point)">; +def HasStdExtZfhOrZfhmin + : Predicate<"Subtarget->hasStdExtZfh() || Subtarget->hasStdExtZfhmin()">, + AssemblerPredicate<(any_of FeatureStdExtZfh, FeatureStdExtZfhmin), + "'Zfh' (Half-Precision Floating-Point) or " + "'Zfhmin' (Half-Precision Floating-Point Minimal)">; + def FeatureStdExtC : SubtargetFeature<"c", "HasStdExtC", "true", "'C' (Compressed Instructions)">; diff --git a/llvm/lib/Target/RISCV/RISCVExpandAtomicPseudoInsts.cpp b/llvm/lib/Target/RISCV/RISCVExpandAtomicPseudoInsts.cpp index 26ce16486bd9..40ee7ca6bc1c 100644 --- a/llvm/lib/Target/RISCV/RISCVExpandAtomicPseudoInsts.cpp +++ b/llvm/lib/Target/RISCV/RISCVExpandAtomicPseudoInsts.cpp @@ -86,9 +86,9 @@ bool RISCVExpandAtomicPseudo::expandMBB(MachineBasicBlock &MBB) { bool RISCVExpandAtomicPseudo::expandMI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, MachineBasicBlock::iterator &NextMBBI) { - // RISCVInstrInfo::getInstSizeInBytes hard-codes the number of expanded - // instructions for each pseudo, and must be updated when adding new pseudos - // or changing existing ones. + // RISCVInstrInfo::getInstSizeInBytes expects that the total size of the + // expanded instructions for each pseudo is correct in the Size field of the + // tablegen definition for the pseudo. switch (MBBI->getOpcode()) { case RISCV::PseudoAtomicLoadNand32: return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Nand, false, 32, diff --git a/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp b/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp index 80340ee81509..0c5c13db7112 100644 --- a/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp +++ b/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp @@ -92,9 +92,9 @@ bool RISCVExpandPseudo::expandMBB(MachineBasicBlock &MBB) { bool RISCVExpandPseudo::expandMI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, MachineBasicBlock::iterator &NextMBBI) { - // RISCVInstrInfo::getInstSizeInBytes hard-codes the number of expanded - // instructions for each pseudo, and must be updated when adding new pseudos - // or changing existing ones. + // RISCVInstrInfo::getInstSizeInBytes expects that the total size of the + // expanded instructions for each pseudo is correct in the Size field of the + // tablegen definition for the pseudo. switch (MBBI->getOpcode()) { case RISCV::PseudoLLA: return expandLoadLocalAddress(MBB, MBBI, NextMBBI); diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp index 5870502d74d5..6f77428ae721 100644 --- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp @@ -166,8 +166,8 @@ static SDNode *selectImm(SelectionDAG *CurDAG, const SDLoc &DL, const MVT VT, SDValue SDImm = CurDAG->getTargetConstant(Inst.Imm, DL, XLenVT); if (Inst.Opc == RISCV::LUI) Result = CurDAG->getMachineNode(RISCV::LUI, DL, XLenVT, SDImm); - else if (Inst.Opc == RISCV::ADDUW) - Result = CurDAG->getMachineNode(RISCV::ADDUW, DL, XLenVT, SrcReg, + else if (Inst.Opc == RISCV::ADD_UW) + Result = CurDAG->getMachineNode(RISCV::ADD_UW, DL, XLenVT, SrcReg, CurDAG->getRegister(RISCV::X0, XLenVT)); else if (Inst.Opc == RISCV::SH1ADD || Inst.Opc == RISCV::SH2ADD || Inst.Opc == RISCV::SH3ADD) @@ -775,10 +775,10 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) { C1 == (maskTrailingOnes<uint64_t>(XLen - (C2 + C3)) << C2)) { // Use slli.uw when possible. if ((XLen - (C2 + C3)) == 32 && Subtarget->hasStdExtZba()) { - SDNode *SLLIUW = - CurDAG->getMachineNode(RISCV::SLLIUW, DL, XLenVT, X, + SDNode *SLLI_UW = + CurDAG->getMachineNode(RISCV::SLLI_UW, DL, XLenVT, X, CurDAG->getTargetConstant(C2, DL, XLenVT)); - ReplaceNode(Node, SLLIUW); + ReplaceNode(Node, SLLI_UW); return; } @@ -1811,7 +1811,7 @@ bool RISCVDAGToDAGISel::hasAllNBitUsers(SDNode *Node, unsigned Bits) const { case RISCV::CLZW: case RISCV::CTZW: case RISCV::CPOPW: - case RISCV::SLLIUW: + case RISCV::SLLI_UW: case RISCV::FCVT_H_W: case RISCV::FCVT_H_WU: case RISCV::FCVT_S_W: @@ -1830,20 +1830,20 @@ bool RISCVDAGToDAGISel::hasAllNBitUsers(SDNode *Node, unsigned Bits) const { if (Bits < (64 - countLeadingZeros(User->getConstantOperandVal(1)))) return false; break; - case RISCV::SEXTB: + case RISCV::SEXT_B: if (Bits < 8) return false; break; - case RISCV::SEXTH: - case RISCV::ZEXTH_RV32: - case RISCV::ZEXTH_RV64: + case RISCV::SEXT_H: + case RISCV::ZEXT_H_RV32: + case RISCV::ZEXT_H_RV64: if (Bits < 16) return false; break; - case RISCV::ADDUW: - case RISCV::SH1ADDUW: - case RISCV::SH2ADDUW: - case RISCV::SH3ADDUW: + case RISCV::ADD_UW: + case RISCV::SH1ADD_UW: + case RISCV::SH2ADD_UW: + case RISCV::SH3ADD_UW: // The first operand to add.uw/shXadd.uw is implicitly zero extended from // 32 bits. if (UI.getOperandNo() != 0 || Bits < 32) diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 5cc3aa35d4d2..97d24c8e9c0b 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -282,6 +282,9 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()) ? Legal : Expand); + // Zbkb can use rev8+brev8 to implement bitreverse. + setOperationAction(ISD::BITREVERSE, XLenVT, + Subtarget.hasStdExtZbkb() ? Custom : Expand); } if (Subtarget.hasStdExtZbb()) { @@ -1082,6 +1085,9 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, setTargetDAGCombine(ISD::SHL); setTargetDAGCombine(ISD::STORE); } + + setLibcallName(RTLIB::FPEXT_F16_F32, "__extendhfsf2"); + setLibcallName(RTLIB::FPROUND_F32_F16, "__truncsfhf2"); } EVT RISCVTargetLowering::getSetCCResultType(const DataLayout &DL, @@ -1115,17 +1121,15 @@ bool RISCVTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, case Intrinsic::riscv_masked_atomicrmw_min_i32: case Intrinsic::riscv_masked_atomicrmw_umax_i32: case Intrinsic::riscv_masked_atomicrmw_umin_i32: - case Intrinsic::riscv_masked_cmpxchg_i32: { - PointerType *PtrTy = cast<PointerType>(I.getArgOperand(0)->getType()); + case Intrinsic::riscv_masked_cmpxchg_i32: Info.opc = ISD::INTRINSIC_W_CHAIN; - Info.memVT = MVT::getVT(PtrTy->getPointerElementType()); + Info.memVT = MVT::i32; Info.ptrVal = I.getArgOperand(0); Info.offset = 0; Info.align = Align(4); Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore | MachineMemOperand::MOVolatile; return true; - } case Intrinsic::riscv_masked_strided_load: Info.opc = ISD::INTRINSIC_W_CHAIN; Info.ptrVal = I.getArgOperand(1); @@ -2952,17 +2956,26 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op, return LowerINTRINSIC_VOID(Op, DAG); case ISD::BSWAP: case ISD::BITREVERSE: { - // Convert BSWAP/BITREVERSE to GREVI to enable GREVI combinining. - assert(Subtarget.hasStdExtZbp() && "Unexpected custom legalisation"); MVT VT = Op.getSimpleValueType(); SDLoc DL(Op); - // Start with the maximum immediate value which is the bitwidth - 1. - unsigned Imm = VT.getSizeInBits() - 1; - // If this is BSWAP rather than BITREVERSE, clear the lower 3 bits. - if (Op.getOpcode() == ISD::BSWAP) - Imm &= ~0x7U; - return DAG.getNode(RISCVISD::GREV, DL, VT, Op.getOperand(0), - DAG.getConstant(Imm, DL, VT)); + if (Subtarget.hasStdExtZbp()) { + // Convert BSWAP/BITREVERSE to GREVI to enable GREVI combinining. + // Start with the maximum immediate value which is the bitwidth - 1. + unsigned Imm = VT.getSizeInBits() - 1; + // If this is BSWAP rather than BITREVERSE, clear the lower 3 bits. + if (Op.getOpcode() == ISD::BSWAP) + Imm &= ~0x7U; + return DAG.getNode(RISCVISD::GREV, DL, VT, Op.getOperand(0), + DAG.getConstant(Imm, DL, VT)); + } + assert(Subtarget.hasStdExtZbkb() && "Unexpected custom legalization"); + assert(Op.getOpcode() == ISD::BITREVERSE && "Unexpected opcode"); + // Expand bitreverse to a bswap(rev8) followed by brev8. + SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT, Op.getOperand(0)); + // We use the Zbp grevi encoding for rev.b/brev8 which will be recognized + // as brev8 by an isel pattern. + return DAG.getNode(RISCVISD::GREV, DL, VT, BSwap, + DAG.getConstant(7, DL, VT)); } case ISD::FSHL: case ISD::FSHR: { @@ -3063,6 +3076,8 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op, // minimum size. e.g. <vscale x 2 x i32>. VLENB is in bytes so we calculate // vscale as VLENB / 8. static_assert(RISCV::RVVBitsPerBlock == 64, "Unexpected bits per block!"); + if (Subtarget.getMinVLen() < RISCV::RVVBitsPerBlock) + report_fatal_error("Support for VLEN==32 is incomplete."); if (isa<ConstantSDNode>(Op.getOperand(0))) { // We assume VLENB is a multiple of 8. We manually choose the best shift // here because SimplifyDemandedBits isn't always able to simplify it. @@ -4288,8 +4303,47 @@ SDValue RISCVTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op, MVT XLenVT = Subtarget.getXLenVT(); if (VecVT.getVectorElementType() == MVT::i1) { - // FIXME: For now we just promote to an i8 vector and extract from that, - // but this is probably not optimal. + if (VecVT.isFixedLengthVector()) { + unsigned NumElts = VecVT.getVectorNumElements(); + if (NumElts >= 8) { + MVT WideEltVT; + unsigned WidenVecLen; + SDValue ExtractElementIdx; + SDValue ExtractBitIdx; + unsigned MaxEEW = Subtarget.getMaxELENForFixedLengthVectors(); + MVT LargestEltVT = MVT::getIntegerVT( + std::min(MaxEEW, unsigned(XLenVT.getSizeInBits()))); + if (NumElts <= LargestEltVT.getSizeInBits()) { + assert(isPowerOf2_32(NumElts) && + "the number of elements should be power of 2"); + WideEltVT = MVT::getIntegerVT(NumElts); + WidenVecLen = 1; + ExtractElementIdx = DAG.getConstant(0, DL, XLenVT); + ExtractBitIdx = Idx; + } else { + WideEltVT = LargestEltVT; + WidenVecLen = NumElts / WideEltVT.getSizeInBits(); + // extract element index = index / element width + ExtractElementIdx = DAG.getNode( + ISD::SRL, DL, XLenVT, Idx, + DAG.getConstant(Log2_64(WideEltVT.getSizeInBits()), DL, XLenVT)); + // mask bit index = index % element width + ExtractBitIdx = DAG.getNode( + ISD::AND, DL, XLenVT, Idx, + DAG.getConstant(WideEltVT.getSizeInBits() - 1, DL, XLenVT)); + } + MVT WideVT = MVT::getVectorVT(WideEltVT, WidenVecLen); + Vec = DAG.getNode(ISD::BITCAST, DL, WideVT, Vec); + SDValue ExtractElt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, XLenVT, + Vec, ExtractElementIdx); + // Extract the bit from GPR. + SDValue ShiftRight = + DAG.getNode(ISD::SRL, DL, XLenVT, ExtractElt, ExtractBitIdx); + return DAG.getNode(ISD::AND, DL, XLenVT, ShiftRight, + DAG.getConstant(1, DL, XLenVT)); + } + } + // Otherwise, promote to an i8 vector and extract from that. MVT WideVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorElementCount()); Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT, Vec); return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Vec, Idx); @@ -4411,15 +4465,30 @@ SDValue RISCVTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, return DAG.getRegister(RISCV::X4, PtrVT); } case Intrinsic::riscv_orc_b: - // Lower to the GORCI encoding for orc.b. - return DAG.getNode(RISCVISD::GORC, DL, XLenVT, Op.getOperand(1), + case Intrinsic::riscv_brev8: { + // Lower to the GORCI encoding for orc.b or the GREVI encoding for brev8. + unsigned Opc = + IntNo == Intrinsic::riscv_brev8 ? RISCVISD::GREV : RISCVISD::GORC; + return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1), DAG.getConstant(7, DL, XLenVT)); + } case Intrinsic::riscv_grev: case Intrinsic::riscv_gorc: { unsigned Opc = IntNo == Intrinsic::riscv_grev ? RISCVISD::GREV : RISCVISD::GORC; return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1), Op.getOperand(2)); } + case Intrinsic::riscv_zip: + case Intrinsic::riscv_unzip: { + // Lower to the SHFLI encoding for zip or the UNSHFLI encoding for unzip. + // For i32 the immdiate is 15. For i64 the immediate is 31. + unsigned Opc = + IntNo == Intrinsic::riscv_zip ? RISCVISD::SHFL : RISCVISD::UNSHFL; + unsigned BitWidth = Op.getValueSizeInBits(); + assert(isPowerOf2_32(BitWidth) && BitWidth >= 2 && "Unexpected bit width"); + return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1), + DAG.getConstant((BitWidth / 2) - 1, DL, XLenVT)); + } case Intrinsic::riscv_shfl: case Intrinsic::riscv_unshfl: { unsigned Opc = @@ -5829,14 +5898,17 @@ SDValue RISCVTargetLowering::lowerMaskedGather(SDValue Op, } } - if (XLenVT == MVT::i32 && IndexVT.getVectorElementType().bitsGT(XLenVT)) { - IndexVT = IndexVT.changeVectorElementType(XLenVT); - Index = DAG.getNode(ISD::TRUNCATE, DL, IndexVT, Index); - } - if (!VL) VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second; + if (XLenVT == MVT::i32 && IndexVT.getVectorElementType().bitsGT(XLenVT)) { + IndexVT = IndexVT.changeVectorElementType(XLenVT); + SDValue TrueMask = DAG.getNode(RISCVISD::VMSET_VL, DL, Mask.getValueType(), + VL); + Index = DAG.getNode(RISCVISD::TRUNCATE_VECTOR_VL, DL, IndexVT, Index, + TrueMask, VL); + } + unsigned IntID = IsUnmasked ? Intrinsic::riscv_vluxei : Intrinsic::riscv_vluxei_mask; SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)}; @@ -5937,14 +6009,17 @@ SDValue RISCVTargetLowering::lowerMaskedScatter(SDValue Op, } } - if (XLenVT == MVT::i32 && IndexVT.getVectorElementType().bitsGT(XLenVT)) { - IndexVT = IndexVT.changeVectorElementType(XLenVT); - Index = DAG.getNode(ISD::TRUNCATE, DL, IndexVT, Index); - } - if (!VL) VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second; + if (XLenVT == MVT::i32 && IndexVT.getVectorElementType().bitsGT(XLenVT)) { + IndexVT = IndexVT.changeVectorElementType(XLenVT); + SDValue TrueMask = DAG.getNode(RISCVISD::VMSET_VL, DL, Mask.getValueType(), + VL); + Index = DAG.getNode(RISCVISD::TRUNCATE_VECTOR_VL, DL, IndexVT, Index, + TrueMask, VL); + } + unsigned IntID = IsUnmasked ? Intrinsic::riscv_vsoxei : Intrinsic::riscv_vsoxei_mask; SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)}; @@ -6568,7 +6643,11 @@ void RISCVTargetLowering::ReplaceNodeResults(SDNode *N, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2)); unsigned Opc = IntNo == Intrinsic::riscv_shfl ? RISCVISD::SHFLW : RISCVISD::UNSHFLW; - if (isa<ConstantSDNode>(N->getOperand(2))) { + // There is no (UN)SHFLIW. If the control word is a constant, we can use + // (UN)SHFLI with bit 4 of the control word cleared. The upper 32 bit half + // will be shuffled the same way as the lower 32 bit half, but the two + // halves won't cross. + if (isa<ConstantSDNode>(NewOp2)) { NewOp2 = DAG.getNode(ISD::AND, DL, MVT::i64, NewOp2, DAG.getConstant(0xf, DL, MVT::i64)); Opc = @@ -7284,8 +7363,8 @@ static SDValue performANY_EXTENDCombine(SDNode *N, return SDValue(N, 0); } -// Try to form VWMUL or VWMULU. -// FIXME: Support VWMULSU. +// Try to form VWMUL, VWMULU or VWMULSU. +// TODO: Support VWMULSU.vx with a sign extend Op and a splat of scalar Op. static SDValue combineMUL_VLToVWMUL_VL(SDNode *N, SelectionDAG &DAG, bool Commute) { assert(N->getOpcode() == RISCVISD::MUL_VL && "Unexpected opcode"); @@ -7296,6 +7375,7 @@ static SDValue combineMUL_VLToVWMUL_VL(SDNode *N, SelectionDAG &DAG, bool IsSignExt = Op0.getOpcode() == RISCVISD::VSEXT_VL; bool IsZeroExt = Op0.getOpcode() == RISCVISD::VZEXT_VL; + bool IsVWMULSU = IsSignExt && Op1.getOpcode() == RISCVISD::VZEXT_VL; if ((!IsSignExt && !IsZeroExt) || !Op0.hasOneUse()) return SDValue(); @@ -7316,7 +7396,7 @@ static SDValue combineMUL_VLToVWMUL_VL(SDNode *N, SelectionDAG &DAG, SDLoc DL(N); // See if the other operand is the same opcode. - if (Op0.getOpcode() == Op1.getOpcode()) { + if (IsVWMULSU || Op0.getOpcode() == Op1.getOpcode()) { if (!Op1.hasOneUse()) return SDValue(); @@ -7366,7 +7446,9 @@ static SDValue combineMUL_VLToVWMUL_VL(SDNode *N, SelectionDAG &DAG, if (Op1.getValueType() != NarrowVT) Op1 = DAG.getNode(ExtOpc, DL, NarrowVT, Op1, Mask, VL); - unsigned WMulOpc = IsSignExt ? RISCVISD::VWMUL_VL : RISCVISD::VWMULU_VL; + unsigned WMulOpc = RISCVISD::VWMULSU_VL; + if (!IsVWMULSU) + WMulOpc = IsSignExt ? RISCVISD::VWMUL_VL : RISCVISD::VWMULU_VL; return DAG.getNode(WMulOpc, DL, VT, Op0, Op1, Mask, VL); } @@ -8194,12 +8276,17 @@ void RISCVTargetLowering::computeKnownBitsForTargetNode(const SDValue Op, } break; } - case RISCVISD::READ_VLENB: - // We assume VLENB is at least 16 bytes. - Known.Zero.setLowBits(4); + case RISCVISD::READ_VLENB: { + // If we know the minimum VLen from Zvl extensions, we can use that to + // determine the trailing zeros of VLENB. + // FIXME: Limit to 128 bit vectors until we have more testing. + unsigned MinVLenB = std::min(128U, Subtarget.getMinVLen()) / 8; + if (MinVLenB > 0) + Known.Zero.setLowBits(Log2_32(MinVLenB)); // We assume VLENB is no more than 65536 / 8 bytes. Known.Zero.setBitsFrom(14); break; + } case ISD::INTRINSIC_W_CHAIN: case ISD::INTRINSIC_WO_CHAIN: { unsigned IntNo = @@ -8230,9 +8317,11 @@ unsigned RISCVTargetLowering::ComputeNumSignBitsForTargetNode( default: break; case RISCVISD::SELECT_CC: { - unsigned Tmp = DAG.ComputeNumSignBits(Op.getOperand(3), DemandedElts, Depth + 1); + unsigned Tmp = + DAG.ComputeNumSignBits(Op.getOperand(3), DemandedElts, Depth + 1); if (Tmp == 1) return 1; // Early out. - unsigned Tmp2 = DAG.ComputeNumSignBits(Op.getOperand(4), DemandedElts, Depth + 1); + unsigned Tmp2 = + DAG.ComputeNumSignBits(Op.getOperand(4), DemandedElts, Depth + 1); return std::min(Tmp, Tmp2); } case RISCVISD::SLLW: @@ -8275,15 +8364,18 @@ unsigned RISCVTargetLowering::ComputeNumSignBitsForTargetNode( } break; } - case RISCVISD::VMV_X_S: + case RISCVISD::VMV_X_S: { // The number of sign bits of the scalar result is computed by obtaining the // element type of the input vector operand, subtracting its width from the // XLEN, and then adding one (sign bit within the element type). If the // element type is wider than XLen, the least-significant XLEN bits are // taken. - if (Op.getOperand(0).getScalarValueSizeInBits() > Subtarget.getXLen()) - return 1; - return Subtarget.getXLen() - Op.getOperand(0).getScalarValueSizeInBits() + 1; + unsigned XLen = Subtarget.getXLen(); + unsigned EltBits = Op.getOperand(0).getScalarValueSizeInBits(); + if (EltBits <= XLen) + return XLen - EltBits + 1; + break; + } } return 1; @@ -10129,6 +10221,7 @@ const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const { NODE_NAME_CASE(FP_ROUND_VL) NODE_NAME_CASE(VWMUL_VL) NODE_NAME_CASE(VWMULU_VL) + NODE_NAME_CASE(VWMULSU_VL) NODE_NAME_CASE(VWADDU_VL) NODE_NAME_CASE(SETCC_VL) NODE_NAME_CASE(VSELECT_VL) diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h index 58b7ec89f875..840a821870a7 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.h +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h @@ -245,6 +245,7 @@ enum NodeType : unsigned { // Widening instructions VWMUL_VL, VWMULU_VL, + VWMULSU_VL, VWADDU_VL, // Vector compare producing a mask. Fourth operand is input mask. Fifth diff --git a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp index d39e0805a79c..649eb57b325b 100644 --- a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp +++ b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp @@ -999,6 +999,12 @@ bool RISCVInsertVSETVLI::needVSETVLIPHI(const VSETVLIInfo &Require, void RISCVInsertVSETVLI::emitVSETVLIs(MachineBasicBlock &MBB) { VSETVLIInfo CurInfo; + // BBLocalInfo tracks the VL/VTYPE state the same way BBInfo.Change was + // calculated in computeIncomingVLVTYPE. We need this to apply + // canSkipVSETVLIForLoadStore the same way computeIncomingVLVTYPE did. We + // can't include predecessor information in that decision to avoid disagreeing + // with the global analysis. + VSETVLIInfo BBLocalInfo; // Only be set if current VSETVLIInfo is from an explicit VSET(I)VLI. MachineInstr *PrevVSETVLIMI = nullptr; @@ -1014,6 +1020,7 @@ void RISCVInsertVSETVLI::emitVSETVLIs(MachineBasicBlock &MBB) { MI.getOperand(3).setIsDead(false); MI.getOperand(4).setIsDead(false); CurInfo = getInfoForVSETVLI(MI); + BBLocalInfo = getInfoForVSETVLI(MI); PrevVSETVLIMI = &MI; continue; } @@ -1043,12 +1050,22 @@ void RISCVInsertVSETVLI::emitVSETVLIs(MachineBasicBlock &MBB) { // use the predecessor information. assert(BlockInfo[MBB.getNumber()].Pred.isValid() && "Expected a valid predecessor state."); - if (needVSETVLI(NewInfo, BlockInfo[MBB.getNumber()].Pred) && + // Don't use predecessor information if there was an earlier instruction + // in this block that allowed a vsetvli to be skipped for load/store. + if (!(BBLocalInfo.isValid() && + canSkipVSETVLIForLoadStore(MI, NewInfo, BBLocalInfo)) && + needVSETVLI(NewInfo, BlockInfo[MBB.getNumber()].Pred) && needVSETVLIPHI(NewInfo, MBB)) { insertVSETVLI(MBB, MI, NewInfo, BlockInfo[MBB.getNumber()].Pred); CurInfo = NewInfo; + BBLocalInfo = NewInfo; } + + // We must update BBLocalInfo for every vector instruction. + if (!BBLocalInfo.isValid()) + BBLocalInfo = NewInfo; } else { + assert(BBLocalInfo.isValid()); // If this instruction isn't compatible with the previous VL/VTYPE // we need to insert a VSETVLI. // If this is a unit-stride or strided load/store, we may be able to use @@ -1084,6 +1101,7 @@ void RISCVInsertVSETVLI::emitVSETVLIs(MachineBasicBlock &MBB) { if (NeedInsertVSETVLI) insertVSETVLI(MBB, MI, NewInfo, CurInfo); CurInfo = NewInfo; + BBLocalInfo = NewInfo; } } PrevVSETVLIMI = nullptr; @@ -1094,6 +1112,7 @@ void RISCVInsertVSETVLI::emitVSETVLIs(MachineBasicBlock &MBB) { if (MI.isCall() || MI.isInlineAsm() || MI.modifiesRegister(RISCV::VL) || MI.modifiesRegister(RISCV::VTYPE)) { CurInfo = VSETVLIInfo::getUnknown(); + BBLocalInfo = VSETVLIInfo::getUnknown(); PrevVSETVLIMI = nullptr; } } diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp index 7baed2793e4e..55f4a19b79eb 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp @@ -654,8 +654,8 @@ void RISCVInstrInfo::movImm(MachineBasicBlock &MBB, BuildMI(MBB, MBBI, DL, get(RISCV::LUI), Result) .addImm(Inst.Imm) .setMIFlag(Flag); - } else if (Inst.Opc == RISCV::ADDUW) { - BuildMI(MBB, MBBI, DL, get(RISCV::ADDUW), Result) + } else if (Inst.Opc == RISCV::ADD_UW) { + BuildMI(MBB, MBBI, DL, get(RISCV::ADD_UW), Result) .addReg(SrcReg, RegState::Kill) .addReg(RISCV::X0) .setMIFlag(Flag); @@ -965,93 +965,29 @@ bool RISCVInstrInfo::isBranchOffsetInRange(unsigned BranchOp, } unsigned RISCVInstrInfo::getInstSizeInBytes(const MachineInstr &MI) const { + if (MI.isMetaInstruction()) + return 0; + unsigned Opcode = MI.getOpcode(); - switch (Opcode) { - default: { - if (MI.getParent() && MI.getParent()->getParent()) { - const auto MF = MI.getMF(); - const auto &TM = static_cast<const RISCVTargetMachine &>(MF->getTarget()); - const MCRegisterInfo &MRI = *TM.getMCRegisterInfo(); - const MCSubtargetInfo &STI = *TM.getMCSubtargetInfo(); - const RISCVSubtarget &ST = MF->getSubtarget<RISCVSubtarget>(); - if (isCompressibleInst(MI, &ST, MRI, STI)) - return 2; - } - return get(Opcode).getSize(); - } - case TargetOpcode::EH_LABEL: - case TargetOpcode::IMPLICIT_DEF: - case TargetOpcode::KILL: - case TargetOpcode::DBG_VALUE: - return 0; - // These values are determined based on RISCVExpandAtomicPseudoInsts, - // RISCVExpandPseudoInsts and RISCVMCCodeEmitter, depending on where the - // pseudos are expanded. - case RISCV::PseudoCALLReg: - case RISCV::PseudoCALL: - case RISCV::PseudoJump: - case RISCV::PseudoTAIL: - case RISCV::PseudoLLA: - case RISCV::PseudoLA: - case RISCV::PseudoLA_TLS_IE: - case RISCV::PseudoLA_TLS_GD: - return 8; - case RISCV::PseudoAtomicLoadNand32: - case RISCV::PseudoAtomicLoadNand64: - return 20; - case RISCV::PseudoMaskedAtomicSwap32: - case RISCV::PseudoMaskedAtomicLoadAdd32: - case RISCV::PseudoMaskedAtomicLoadSub32: - return 28; - case RISCV::PseudoMaskedAtomicLoadNand32: - return 32; - case RISCV::PseudoMaskedAtomicLoadMax32: - case RISCV::PseudoMaskedAtomicLoadMin32: - return 44; - case RISCV::PseudoMaskedAtomicLoadUMax32: - case RISCV::PseudoMaskedAtomicLoadUMin32: - return 36; - case RISCV::PseudoCmpXchg32: - case RISCV::PseudoCmpXchg64: - return 16; - case RISCV::PseudoMaskedCmpXchg32: - return 32; - case TargetOpcode::INLINEASM: - case TargetOpcode::INLINEASM_BR: { + if (Opcode == TargetOpcode::INLINEASM || + Opcode == TargetOpcode::INLINEASM_BR) { const MachineFunction &MF = *MI.getParent()->getParent(); const auto &TM = static_cast<const RISCVTargetMachine &>(MF.getTarget()); return getInlineAsmLength(MI.getOperand(0).getSymbolName(), *TM.getMCAsmInfo()); } - case RISCV::PseudoVSPILL2_M1: - case RISCV::PseudoVSPILL2_M2: - case RISCV::PseudoVSPILL2_M4: - case RISCV::PseudoVSPILL3_M1: - case RISCV::PseudoVSPILL3_M2: - case RISCV::PseudoVSPILL4_M1: - case RISCV::PseudoVSPILL4_M2: - case RISCV::PseudoVSPILL5_M1: - case RISCV::PseudoVSPILL6_M1: - case RISCV::PseudoVSPILL7_M1: - case RISCV::PseudoVSPILL8_M1: - case RISCV::PseudoVRELOAD2_M1: - case RISCV::PseudoVRELOAD2_M2: - case RISCV::PseudoVRELOAD2_M4: - case RISCV::PseudoVRELOAD3_M1: - case RISCV::PseudoVRELOAD3_M2: - case RISCV::PseudoVRELOAD4_M1: - case RISCV::PseudoVRELOAD4_M2: - case RISCV::PseudoVRELOAD5_M1: - case RISCV::PseudoVRELOAD6_M1: - case RISCV::PseudoVRELOAD7_M1: - case RISCV::PseudoVRELOAD8_M1: { - // The values are determined based on expandVSPILL and expandVRELOAD that - // expand the pseudos depending on NF. - unsigned NF = isRVVSpillForZvlsseg(Opcode)->first; - return 4 * (2 * NF - 1); - } + + if (MI.getParent() && MI.getParent()->getParent()) { + const auto MF = MI.getMF(); + const auto &TM = static_cast<const RISCVTargetMachine &>(MF->getTarget()); + const MCRegisterInfo &MRI = *TM.getMCRegisterInfo(); + const MCSubtargetInfo &STI = *TM.getMCSubtargetInfo(); + const RISCVSubtarget &ST = MF->getSubtarget<RISCVSubtarget>(); + if (isCompressibleInst(MI, &ST, MRI, STI)) + return 2; } + return get(Opcode).getSize(); } bool RISCVInstrInfo::isAsCheapAsAMove(const MachineInstr &MI) const { diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.td b/llvm/lib/Target/RISCV/RISCVInstrInfo.td index 64cd89cda06a..ee6a74b7f14f 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.td @@ -1183,7 +1183,7 @@ def : Pat<(brind (add GPRJALR:$rs1, simm12:$imm12)), // destination. // Define AsmString to print "call" when compile with -S flag. // Define isCodeGenOnly = 0 to support parsing assembly "call" instruction. -let isCall = 1, isBarrier = 1, isCodeGenOnly = 0, hasSideEffects = 0, +let isCall = 1, isBarrier = 1, isCodeGenOnly = 0, Size = 8, hasSideEffects = 0, mayStore = 0, mayLoad = 0 in def PseudoCALLReg : Pseudo<(outs GPR:$rd), (ins call_symbol:$func), []> { let AsmString = "call\t$rd, $func"; @@ -1195,7 +1195,7 @@ def PseudoCALLReg : Pseudo<(outs GPR:$rd), (ins call_symbol:$func), []> { // if the offset fits in a signed 21-bit immediate. // Define AsmString to print "call" when compile with -S flag. // Define isCodeGenOnly = 0 to support parsing assembly "call" instruction. -let isCall = 1, Defs = [X1], isCodeGenOnly = 0 in +let isCall = 1, Defs = [X1], isCodeGenOnly = 0, Size = 8 in def PseudoCALL : Pseudo<(outs), (ins call_symbol:$func), []> { let AsmString = "call\t$func"; } @@ -1220,7 +1220,7 @@ def PseudoRET : Pseudo<(outs), (ins), [(riscv_ret_flag)]>, // expand to auipc and jalr while encoding. // Define AsmString to print "tail" when compile with -S flag. let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [X2], - isCodeGenOnly = 0 in + Size = 8, isCodeGenOnly = 0 in def PseudoTAIL : Pseudo<(outs), (ins call_symbol:$dst), []> { let AsmString = "tail\t$dst"; } @@ -1235,28 +1235,28 @@ def : Pat<(riscv_tail (iPTR tglobaladdr:$dst)), def : Pat<(riscv_tail (iPTR texternalsym:$dst)), (PseudoTAIL texternalsym:$dst)>; -let isCall = 0, isBarrier = 1, isBranch = 1, isTerminator = 1, +let isCall = 0, isBarrier = 1, isBranch = 1, isTerminator = 1, Size = 8, isCodeGenOnly = 0, hasSideEffects = 0, mayStore = 0, mayLoad = 0 in def PseudoJump : Pseudo<(outs GPR:$rd), (ins pseudo_jump_symbol:$target), []> { let AsmString = "jump\t$target, $rd"; } -let hasSideEffects = 0, mayLoad = 0, mayStore = 0, isCodeGenOnly = 0, +let hasSideEffects = 0, mayLoad = 0, mayStore = 0, Size = 8, isCodeGenOnly = 0, isAsmParserOnly = 1 in def PseudoLLA : Pseudo<(outs GPR:$dst), (ins bare_symbol:$src), [], "lla", "$dst, $src">; -let hasSideEffects = 0, mayLoad = 1, mayStore = 0, isCodeGenOnly = 0, +let hasSideEffects = 0, mayLoad = 1, mayStore = 0, Size = 8, isCodeGenOnly = 0, isAsmParserOnly = 1 in def PseudoLA : Pseudo<(outs GPR:$dst), (ins bare_symbol:$src), [], "la", "$dst, $src">; -let hasSideEffects = 0, mayLoad = 1, mayStore = 0, isCodeGenOnly = 0, +let hasSideEffects = 0, mayLoad = 1, mayStore = 0, Size = 8, isCodeGenOnly = 0, isAsmParserOnly = 1 in def PseudoLA_TLS_IE : Pseudo<(outs GPR:$dst), (ins bare_symbol:$src), [], "la.tls.ie", "$dst, $src">; -let hasSideEffects = 0, mayLoad = 1, mayStore = 0, isCodeGenOnly = 0, +let hasSideEffects = 0, mayLoad = 1, mayStore = 0, Size = 8, isCodeGenOnly = 0, isAsmParserOnly = 1 in def PseudoLA_TLS_GD : Pseudo<(outs GPR:$dst), (ins bare_symbol:$src), [], "la.tls.gd", "$dst, $src">; diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoA.td b/llvm/lib/Target/RISCV/RISCVInstrInfoA.td index ee10c3a54b2f..7d23dafb0346 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoA.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoA.td @@ -188,6 +188,7 @@ class PseudoAMO : Pseudo<(outs GPR:$res, GPR:$scratch), let hasSideEffects = 0; } +let Size = 20 in def PseudoAtomicLoadNand32 : PseudoAMO; // Ordering constants must be kept in sync with the AtomicOrdering enum in // AtomicOrdering.h. @@ -242,27 +243,35 @@ class PseudoMaskedAMOMinMaxPat<Intrinsic intrin, Pseudo AMOInst> (AMOInst GPR:$addr, GPR:$incr, GPR:$mask, GPR:$shiftamt, timm:$ordering)>; +let Size = 28 in def PseudoMaskedAtomicSwap32 : PseudoMaskedAMO; def : PseudoMaskedAMOPat<int_riscv_masked_atomicrmw_xchg_i32, PseudoMaskedAtomicSwap32>; +let Size = 28 in def PseudoMaskedAtomicLoadAdd32 : PseudoMaskedAMO; def : PseudoMaskedAMOPat<int_riscv_masked_atomicrmw_add_i32, PseudoMaskedAtomicLoadAdd32>; +let Size = 28 in def PseudoMaskedAtomicLoadSub32 : PseudoMaskedAMO; def : PseudoMaskedAMOPat<int_riscv_masked_atomicrmw_sub_i32, PseudoMaskedAtomicLoadSub32>; +let Size = 32 in def PseudoMaskedAtomicLoadNand32 : PseudoMaskedAMO; def : PseudoMaskedAMOPat<int_riscv_masked_atomicrmw_nand_i32, PseudoMaskedAtomicLoadNand32>; +let Size = 44 in def PseudoMaskedAtomicLoadMax32 : PseudoMaskedAMOMinMax; def : PseudoMaskedAMOMinMaxPat<int_riscv_masked_atomicrmw_max_i32, PseudoMaskedAtomicLoadMax32>; +let Size = 44 in def PseudoMaskedAtomicLoadMin32 : PseudoMaskedAMOMinMax; def : PseudoMaskedAMOMinMaxPat<int_riscv_masked_atomicrmw_min_i32, PseudoMaskedAtomicLoadMin32>; +let Size = 36 in def PseudoMaskedAtomicLoadUMax32 : PseudoMaskedAMOUMinUMax; def : PseudoMaskedAMOPat<int_riscv_masked_atomicrmw_umax_i32, PseudoMaskedAtomicLoadUMax32>; +let Size = 36 in def PseudoMaskedAtomicLoadUMin32 : PseudoMaskedAMOUMinUMax; def : PseudoMaskedAMOPat<int_riscv_masked_atomicrmw_umin_i32, PseudoMaskedAtomicLoadUMin32>; @@ -276,6 +285,7 @@ class PseudoCmpXchg let mayLoad = 1; let mayStore = 1; let hasSideEffects = 0; + let Size = 16; } // Ordering constants must be kept in sync with the AtomicOrdering enum in @@ -304,6 +314,7 @@ def PseudoMaskedCmpXchg32 let mayLoad = 1; let mayStore = 1; let hasSideEffects = 0; + let Size = 32; } def : Pat<(int_riscv_masked_cmpxchg_i32 @@ -347,6 +358,7 @@ def : Pat<(i64 (atomic_load_sub_64_seq_cst GPR:$addr, GPR:$incr)), /// 64-bit pseudo AMOs +let Size = 20 in def PseudoAtomicLoadNand64 : PseudoAMO; // Ordering constants must be kept in sync with the AtomicOrdering enum in // AtomicOrdering.h. diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td index 4e7e251bc412..9087ed50f9fc 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td @@ -3836,7 +3836,7 @@ multiclass VPatConversionVF_WF <string intrinsic, string instruction> { } multiclass VPatCompare_VI<string intrinsic, string inst, - ImmLeaf ImmType = simm5_plus1> { + ImmLeaf ImmType> { foreach vti = AllIntegerVectors in { defvar Intr = !cast<Intrinsic>(intrinsic); defvar Pseudo = !cast<Instruction>(inst#"_VI_"#vti.LMul.MX); @@ -3899,11 +3899,13 @@ let hasSideEffects = 0, mayLoad = 1, mayStore = 0, isCodeGenOnly = 1 in { foreach lmul = MxList in { foreach nf = NFSet<lmul>.L in { defvar vreg = SegRegClass<lmul, nf>.RC; - let hasSideEffects = 0, mayLoad = 0, mayStore = 1, isCodeGenOnly = 1 in { + let hasSideEffects = 0, mayLoad = 0, mayStore = 1, isCodeGenOnly = 1, + Size = !mul(4, !sub(!mul(nf, 2), 1)) in { def "PseudoVSPILL" # nf # "_" # lmul.MX : Pseudo<(outs), (ins vreg:$rs1, GPR:$rs2, GPR:$vlenb), []>; } - let hasSideEffects = 0, mayLoad = 1, mayStore = 0, isCodeGenOnly = 1 in { + let hasSideEffects = 0, mayLoad = 1, mayStore = 0, isCodeGenOnly = 1, + Size = !mul(4, !sub(!mul(nf, 2), 1)) in { def "PseudoVRELOAD" # nf # "_" # lmul.MX : Pseudo<(outs vreg:$rs1), (ins GPR:$rs2, GPR:$vlenb), []>; } @@ -4657,13 +4659,15 @@ defm : VPatBinarySwappedM_VV<"int_riscv_vmsgt", "PseudoVMSLT", AllIntegerVectors defm : VPatBinarySwappedM_VV<"int_riscv_vmsgeu", "PseudoVMSLEU", AllIntegerVectors>; defm : VPatBinarySwappedM_VV<"int_riscv_vmsge", "PseudoVMSLE", AllIntegerVectors>; -// Match vmslt(u).vx intrinsics to vmsle(u).vi if the scalar is -15 to 16. This -// avoids the user needing to know that there is no vmslt(u).vi instruction. -// Similar for vmsge(u).vx intrinsics using vmslt(u).vi. -defm : VPatCompare_VI<"int_riscv_vmslt", "PseudoVMSLE">; +// Match vmslt(u).vx intrinsics to vmsle(u).vi if the scalar is -15 to 16 and +// non-zero. Zero can be .vx with x0. This avoids the user needing to know that +// there is no vmslt(u).vi instruction. Similar for vmsge(u).vx intrinsics +// using vmslt(u).vi. +defm : VPatCompare_VI<"int_riscv_vmslt", "PseudoVMSLE", simm5_plus1_nonzero>; defm : VPatCompare_VI<"int_riscv_vmsltu", "PseudoVMSLEU", simm5_plus1_nonzero>; -defm : VPatCompare_VI<"int_riscv_vmsge", "PseudoVMSGT">; +// We need to handle 0 for vmsge.vi using vmslt.vi because there is no vmsge.vx. +defm : VPatCompare_VI<"int_riscv_vmsge", "PseudoVMSGT", simm5_plus1>; defm : VPatCompare_VI<"int_riscv_vmsgeu", "PseudoVMSGTU", simm5_plus1_nonzero>; //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td index e452a84a9a6f..2b920d29ab81 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td @@ -539,7 +539,7 @@ defm : VPatIntegerSetCCSDNode_VV_VX_VI<SETNE, "PseudoVMSNE">; defm : VPatIntegerSetCCSDNode_VV_VX<SETLT, "PseudoVMSLT">; defm : VPatIntegerSetCCSDNode_VV_VX<SETULT, "PseudoVMSLTU">; defm : VPatIntegerSetCCSDNode_VIPlus1<SETLT, "PseudoVMSLE", - SplatPat_simm5_plus1>; + SplatPat_simm5_plus1_nonzero>; defm : VPatIntegerSetCCSDNode_VIPlus1<SETULT, "PseudoVMSLEU", SplatPat_simm5_plus1_nonzero>; diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td index 964f0fa54512..e71c498fd5f4 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td @@ -228,6 +228,7 @@ def SDT_RISCVVWBinOp_VL : SDTypeProfile<1, 4, [SDTCisVec<0>, SDTCisVT<4, XLenVT>]>; def riscv_vwmul_vl : SDNode<"RISCVISD::VWMUL_VL", SDT_RISCVVWBinOp_VL, [SDNPCommutative]>; def riscv_vwmulu_vl : SDNode<"RISCVISD::VWMULU_VL", SDT_RISCVVWBinOp_VL, [SDNPCommutative]>; +def riscv_vwmulsu_vl : SDNode<"RISCVISD::VWMULSU_VL", SDT_RISCVVWBinOp_VL>; def riscv_vwaddu_vl : SDNode<"RISCVISD::VWADDU_VL", SDT_RISCVVWBinOp_VL, [SDNPCommutative]>; def SDTRVVVecReduce : SDTypeProfile<1, 5, [ @@ -832,7 +833,7 @@ foreach vti = AllIntegerVectors in { defm : VPatIntegerSetCCVL_VI_Swappable<vti, "PseudoVMSGTU", SETUGT, SETULT>; defm : VPatIntegerSetCCVL_VIPlus1<vti, "PseudoVMSLE", SETLT, - SplatPat_simm5_plus1>; + SplatPat_simm5_plus1_nonzero>; defm : VPatIntegerSetCCVL_VIPlus1<vti, "PseudoVMSLEU", SETULT, SplatPat_simm5_plus1_nonzero>; defm : VPatIntegerSetCCVL_VIPlus1<vti, "PseudoVMSGT", SETGE, @@ -861,6 +862,7 @@ defm : VPatBinaryVL_VV_VX<riscv_srem_vl, "PseudoVREM">; // 12.12. Vector Widening Integer Multiply Instructions defm : VPatBinaryWVL_VV_VX<riscv_vwmul_vl, "PseudoVWMUL">; defm : VPatBinaryWVL_VV_VX<riscv_vwmulu_vl, "PseudoVWMULU">; +defm : VPatBinaryWVL_VV_VX<riscv_vwmulsu_vl, "PseudoVWMULSU">; // 12.13 Vector Single-Width Integer Multiply-Add Instructions foreach vti = AllIntegerVectors in { diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td index db3f5851879a..07884d35f63c 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td @@ -337,13 +337,39 @@ def SH3ADD : ALU_rr<0b0010000, 0b110, "sh3add">, Sched<[WriteSHXADD, ReadSHXADD, ReadSHXADD]>; } // Predicates = [HasStdExtZba] +let Predicates = [HasStdExtZba, IsRV64] in { +def SLLI_UW : RVBShift_ri<0b00001, 0b001, OPC_OP_IMM_32, "slli.uw">, + Sched<[WriteShiftImm32, ReadShiftImm32]>; +def ADD_UW : ALUW_rr<0b0000100, 0b000, "add.uw">, + Sched<[WriteIALU32, ReadIALU32, ReadIALU32]>; +def SH1ADD_UW : ALUW_rr<0b0010000, 0b010, "sh1add.uw">, + Sched<[WriteSHXADD32, ReadSHXADD32, ReadSHXADD32]>; +def SH2ADD_UW : ALUW_rr<0b0010000, 0b100, "sh2add.uw">, + Sched<[WriteSHXADD32, ReadSHXADD32, ReadSHXADD32]>; +def SH3ADD_UW : ALUW_rr<0b0010000, 0b110, "sh3add.uw">, + Sched<[WriteSHXADD32, ReadSHXADD32, ReadSHXADD32]>; +} // Predicates = [HasStdExtZbb, IsRV64] + let Predicates = [HasStdExtZbbOrZbpOrZbkb] in { def ROL : ALU_rr<0b0110000, 0b001, "rol">, Sched<[WriteRotateReg, ReadRotateReg, ReadRotateReg]>; def ROR : ALU_rr<0b0110000, 0b101, "ror">, Sched<[WriteRotateReg, ReadRotateReg, ReadRotateReg]>; + +def RORI : RVBShift_ri<0b01100, 0b101, OPC_OP_IMM, "rori">, + Sched<[WriteRotateImm, ReadRotateImm]>; } // Predicates = [HasStdExtZbbOrZbpOrZbkb] +let Predicates = [HasStdExtZbbOrZbpOrZbkb, IsRV64] in { +def ROLW : ALUW_rr<0b0110000, 0b001, "rolw">, + Sched<[WriteRotateReg32, ReadRotateReg32, ReadRotateReg32]>; +def RORW : ALUW_rr<0b0110000, 0b101, "rorw">, + Sched<[WriteRotateReg32, ReadRotateReg32, ReadRotateReg32]>; + +def RORIW : RVBShiftW_ri<0b0110000, 0b101, OPC_OP_IMM_32, "roriw">, + Sched<[WriteRotateImm32, ReadRotateImm32]>; +} // Predicates = [HasStdExtZbbOrZbp, IsRV64] + let Predicates = [HasStdExtZbs] in { def BCLR : ALU_rr<0b0100100, 0b001, "bclr">, Sched<[WriteSingleBit, ReadSingleBit, ReadSingleBit]>; @@ -353,27 +379,7 @@ def BINV : ALU_rr<0b0110100, 0b001, "binv">, Sched<[WriteSingleBit, ReadSingleBit, ReadSingleBit]>; def BEXT : ALU_rr<0b0100100, 0b101, "bext">, Sched<[WriteSingleBit, ReadSingleBit, ReadSingleBit]>; -} // Predicates = [HasStdExtZbs] - -let Predicates = [HasStdExtZbp] in { -def GORC : ALU_rr<0b0010100, 0b101, "gorc">, Sched<[]>; -def GREV : ALU_rr<0b0110100, 0b101, "grev">, Sched<[]>; -} // Predicates = [HasStdExtZbp] -let Predicates = [HasStdExtZbpOrZbkx] in { -def XPERMN : ALU_rr<0b0010100, 0b010, "xperm4">, Sched<[]>; -def XPERMB : ALU_rr<0b0010100, 0b100, "xperm8">, Sched<[]>; -} // Predicates = [HasStdExtZbpOrZbkx] - -let Predicates = [HasStdExtZbp] in { -def XPERMH : ALU_rr<0b0010100, 0b110, "xperm.h">, Sched<[]>; -} // Predicates = [HasStdExtZbp] - -let Predicates = [HasStdExtZbbOrZbpOrZbkb] in -def RORI : RVBShift_ri<0b01100, 0b101, OPC_OP_IMM, "rori">, - Sched<[WriteRotateImm, ReadRotateImm]>; - -let Predicates = [HasStdExtZbs] in { def BCLRI : RVBShift_ri<0b01001, 0b001, OPC_OP_IMM, "bclri">, Sched<[WriteSingleBitImm, ReadSingleBitImm]>; def BSETI : RVBShift_ri<0b00101, 0b001, OPC_OP_IMM, "bseti">, @@ -385,10 +391,42 @@ def BEXTI : RVBShift_ri<0b01001, 0b101, OPC_OP_IMM, "bexti">, } // Predicates = [HasStdExtZbs] let Predicates = [HasStdExtZbp] in { +def GORC : ALU_rr<0b0010100, 0b101, "gorc">, Sched<[]>; +def GREV : ALU_rr<0b0110100, 0b101, "grev">, Sched<[]>; + def GREVI : RVBShift_ri<0b01101, 0b101, OPC_OP_IMM, "grevi">, Sched<[]>; def GORCI : RVBShift_ri<0b00101, 0b101, OPC_OP_IMM, "gorci">, Sched<[]>; + +def SHFL : ALU_rr<0b0000100, 0b001, "shfl">, Sched<[]>; +def UNSHFL : ALU_rr<0b0000100, 0b101, "unshfl">, Sched<[]>; + +def SHFLI : RVBShfl_ri<0b0000100, 0b001, OPC_OP_IMM, "shfli">, Sched<[]>; +def UNSHFLI : RVBShfl_ri<0b0000100, 0b101, OPC_OP_IMM, "unshfli">, Sched<[]>; + +def XPERM_H : ALU_rr<0b0010100, 0b110, "xperm.h">, Sched<[]>; } // Predicates = [HasStdExtZbp] +let Predicates = [HasStdExtZbp, IsRV64] in { +def GORCW : ALUW_rr<0b0010100, 0b101, "gorcw">, Sched<[]>; +def GREVW : ALUW_rr<0b0110100, 0b101, "grevw">, Sched<[]>; + +def GORCIW : RVBShiftW_ri<0b0010100, 0b101, OPC_OP_IMM_32, "gorciw">, Sched<[]>; +def GREVIW : RVBShiftW_ri<0b0110100, 0b101, OPC_OP_IMM_32, "greviw">, Sched<[]>; + +def SHFLW : ALUW_rr<0b0000100, 0b001, "shflw">, Sched<[]>; +def UNSHFLW : ALUW_rr<0b0000100, 0b101, "unshflw">, Sched<[]>; + +def XPERM_W : ALU_rr<0b0010100, 0b000, "xperm.w">, Sched<[]>; +} // Predicates = [HasStdExtZbp, IsRV64] + +// These instructions were named xperm.n and xperm.b in the last version of +// the draft bit manipulation specification they were included in. However, we +// use the mnemonics given to them in the ratified Zbkx extension. +let Predicates = [HasStdExtZbpOrZbkx] in { +def XPERM4 : ALU_rr<0b0010100, 0b010, "xperm4">, Sched<[]>; +def XPERM8 : ALU_rr<0b0010100, 0b100, "xperm8">, Sched<[]>; +} // Predicates = [HasStdExtZbpOrZbkx] + let Predicates = [HasStdExtZbt] in { def CMIX : RVBTernaryR<0b11, 0b001, OPC_OP, "cmix", "$rd, $rs2, $rs1, $rs3">, Sched<[]>; @@ -402,6 +440,15 @@ def FSRI : RVBTernaryImm6<0b101, OPC_OP_IMM, "fsri", "$rd, $rs1, $rs3, $shamt">, Sched<[]>; } // Predicates = [HasStdExtZbt] +let Predicates = [HasStdExtZbt, IsRV64] in { +def FSLW : RVBTernaryR<0b10, 0b001, OPC_OP_32, + "fslw", "$rd, $rs1, $rs3, $rs2">, Sched<[]>; +def FSRW : RVBTernaryR<0b10, 0b101, OPC_OP_32, "fsrw", + "$rd, $rs1, $rs3, $rs2">, Sched<[]>; +def FSRIW : RVBTernaryImm5<0b10, 0b101, OPC_OP_IMM_32, + "fsriw", "$rd, $rs1, $rs3, $shamt">, Sched<[]>; +} // Predicates = [HasStdExtZbt, IsRV64] + let Predicates = [HasStdExtZbb] in { def CLZ : RVBUnary<0b0110000, 0b00000, 0b001, OPC_OP_IMM, "clz">, Sched<[WriteCLZ, ReadCLZ]>; @@ -411,42 +458,45 @@ def CPOP : RVBUnary<0b0110000, 0b00010, 0b001, OPC_OP_IMM, "cpop">, Sched<[WriteCPOP, ReadCPOP]>; } // Predicates = [HasStdExtZbb] -let Predicates = [HasStdExtZbm, IsRV64] in -def BMATFLIP : RVBUnary<0b0110000, 0b00011, 0b001, OPC_OP_IMM, "bmatflip">, - Sched<[]>; +let Predicates = [HasStdExtZbb, IsRV64] in { +def CLZW : RVBUnary<0b0110000, 0b00000, 0b001, OPC_OP_IMM_32, "clzw">, + Sched<[WriteCLZ32, ReadCLZ32]>; +def CTZW : RVBUnary<0b0110000, 0b00001, 0b001, OPC_OP_IMM_32, "ctzw">, + Sched<[WriteCTZ32, ReadCTZ32]>; +def CPOPW : RVBUnary<0b0110000, 0b00010, 0b001, OPC_OP_IMM_32, "cpopw">, + Sched<[WriteCPOP32, ReadCPOP32]>; +} // Predicates = [HasStdExtZbb, IsRV64] let Predicates = [HasStdExtZbb] in { -def SEXTB : RVBUnary<0b0110000, 0b00100, 0b001, OPC_OP_IMM, "sext.b">, - Sched<[WriteIALU, ReadIALU]>; -def SEXTH : RVBUnary<0b0110000, 0b00101, 0b001, OPC_OP_IMM, "sext.h">, - Sched<[WriteIALU, ReadIALU]>; +def SEXT_B : RVBUnary<0b0110000, 0b00100, 0b001, OPC_OP_IMM, "sext.b">, + Sched<[WriteIALU, ReadIALU]>; +def SEXT_H : RVBUnary<0b0110000, 0b00101, 0b001, OPC_OP_IMM, "sext.h">, + Sched<[WriteIALU, ReadIALU]>; } // Predicates = [HasStdExtZbb] let Predicates = [HasStdExtZbr] in { -def CRC32B : RVBUnary<0b0110000, 0b10000, 0b001, OPC_OP_IMM, "crc32.b">, - Sched<[]>; -def CRC32H : RVBUnary<0b0110000, 0b10001, 0b001, OPC_OP_IMM, "crc32.h">, - Sched<[]>; -def CRC32W : RVBUnary<0b0110000, 0b10010, 0b001, OPC_OP_IMM, "crc32.w">, - Sched<[]>; -} // Predicates = [HasStdExtZbr] - -let Predicates = [HasStdExtZbr, IsRV64] in -def CRC32D : RVBUnary<0b0110000, 0b10011, 0b001, OPC_OP_IMM, "crc32.d">, +def CRC32_B : RVBUnary<0b0110000, 0b10000, 0b001, OPC_OP_IMM, "crc32.b">, Sched<[]>; - -let Predicates = [HasStdExtZbr] in { -def CRC32CB : RVBUnary<0b0110000, 0b11000, 0b001, OPC_OP_IMM, "crc32c.b">, +def CRC32_H : RVBUnary<0b0110000, 0b10001, 0b001, OPC_OP_IMM, "crc32.h">, Sched<[]>; -def CRC32CH : RVBUnary<0b0110000, 0b11001, 0b001, OPC_OP_IMM, "crc32c.h">, - Sched<[]>; -def CRC32CW : RVBUnary<0b0110000, 0b11010, 0b001, OPC_OP_IMM, "crc32c.w">, +def CRC32_W : RVBUnary<0b0110000, 0b10010, 0b001, OPC_OP_IMM, "crc32.w">, Sched<[]>; + +def CRC32C_B : RVBUnary<0b0110000, 0b11000, 0b001, OPC_OP_IMM, "crc32c.b">, + Sched<[]>; +def CRC32C_H : RVBUnary<0b0110000, 0b11001, 0b001, OPC_OP_IMM, "crc32c.h">, + Sched<[]>; +def CRC32C_W : RVBUnary<0b0110000, 0b11010, 0b001, OPC_OP_IMM, "crc32c.w">, + Sched<[]>; } // Predicates = [HasStdExtZbr] -let Predicates = [HasStdExtZbr, IsRV64] in -def CRC32CD : RVBUnary<0b0110000, 0b11011, 0b001, OPC_OP_IMM, "crc32c.d">, - Sched<[]>; +let Predicates = [HasStdExtZbr, IsRV64] in { +def CRC32_D : RVBUnary<0b0110000, 0b10011, 0b001, OPC_OP_IMM, "crc32.d">, + Sched<[]>; + +def CRC32C_D : RVBUnary<0b0110000, 0b11011, 0b001, OPC_OP_IMM, "crc32c.d">, + Sched<[]>; +} // Predicates = [HasStdExtZbr, IsRV64] let Predicates = [HasStdExtZbc] in { def CLMULR : ALU_rr<0b0000101, 0b010, "clmulr">, @@ -472,8 +522,6 @@ def MAXU : ALU_rr<0b0000101, 0b111, "maxu">, } // Predicates = [HasStdExtZbb] let Predicates = [HasStdExtZbp] in { -def SHFL : ALU_rr<0b0000100, 0b001, "shfl">, Sched<[]>; -def UNSHFL : ALU_rr<0b0000100, 0b101, "unshfl">, Sched<[]>; } // Predicates = [HasStdExtZbp] let Predicates = [HasStdExtZbe] in { @@ -483,15 +531,31 @@ def BDECOMPRESS : ALU_rr<0b0100100, 0b110, "bdecompress">, Sched<[]>; def BCOMPRESS : ALU_rr<0b0000100, 0b110, "bcompress">, Sched<[]>; } // Predicates = [HasStdExtZbe] +let Predicates = [HasStdExtZbe, IsRV64] in { +// NOTE: These mnemonics are from the 0.94 spec. There is a name conflict with +// bextw in the 0.93 spec. +def BDECOMPRESSW : ALUW_rr<0b0100100, 0b110, "bdecompressw">, Sched<[]>; +def BCOMPRESSW : ALUW_rr<0b0000100, 0b110, "bcompressw">, Sched<[]>; +} // Predicates = [HasStdExtZbe, IsRV64] + let Predicates = [HasStdExtZbpOrZbkb] in { def PACK : ALU_rr<0b0000100, 0b100, "pack">, Sched<[]>; def PACKH : ALU_rr<0b0000100, 0b111, "packh">, Sched<[]>; } // Predicates = [HasStdExtZbpOrZbkb] +let Predicates = [HasStdExtZbpOrZbkb, IsRV64] in +def PACKW : ALUW_rr<0b0000100, 0b100, "packw">, Sched<[]>; + let Predicates = [HasStdExtZbp] in def PACKU : ALU_rr<0b0100100, 0b100, "packu">, Sched<[]>; +let Predicates = [HasStdExtZbp, IsRV64] in +def PACKUW : ALUW_rr<0b0100100, 0b100, "packuw">, Sched<[]>; + let Predicates = [HasStdExtZbm, IsRV64] in { +def BMATFLIP : RVBUnary<0b0110000, 0b00011, 0b001, OPC_OP_IMM, "bmatflip">, + Sched<[]>; + def BMATOR : ALU_rr<0b0000100, 0b011, "bmator">, Sched<[]>; def BMATXOR : ALU_rr<0b0100100, 0b011, "bmatxor">, Sched<[]>; } // Predicates = [HasStdExtZbm, IsRV64] @@ -500,105 +564,18 @@ let Predicates = [HasStdExtZbf] in def BFP : ALU_rr<0b0100100, 0b111, "bfp">, Sched<[WriteBFP, ReadBFP, ReadBFP]>; -let Predicates = [HasStdExtZbp] in { -def SHFLI : RVBShfl_ri<0b0000100, 0b001, OPC_OP_IMM, "shfli">, Sched<[]>; -def UNSHFLI : RVBShfl_ri<0b0000100, 0b101, OPC_OP_IMM, "unshfli">, Sched<[]>; -} // Predicates = [HasStdExtZbp] - -let Predicates = [HasStdExtZba, IsRV64] in { -def SLLIUW : RVBShift_ri<0b00001, 0b001, OPC_OP_IMM_32, "slli.uw">, - Sched<[WriteShiftImm32, ReadShiftImm32]>; -def ADDUW : ALUW_rr<0b0000100, 0b000, "add.uw">, - Sched<[WriteIALU32, ReadIALU32, ReadIALU32]>; -def SH1ADDUW : ALUW_rr<0b0010000, 0b010, "sh1add.uw">, - Sched<[WriteSHXADD32, ReadSHXADD32, ReadSHXADD32]>; -def SH2ADDUW : ALUW_rr<0b0010000, 0b100, "sh2add.uw">, - Sched<[WriteSHXADD32, ReadSHXADD32, ReadSHXADD32]>; -def SH3ADDUW : ALUW_rr<0b0010000, 0b110, "sh3add.uw">, - Sched<[WriteSHXADD32, ReadSHXADD32, ReadSHXADD32]>; -} // Predicates = [HasStdExtZbb, IsRV64] - -let Predicates = [HasStdExtZbbOrZbpOrZbkb, IsRV64] in { -def ROLW : ALUW_rr<0b0110000, 0b001, "rolw">, - Sched<[WriteRotateReg32, ReadRotateReg32, ReadRotateReg32]>; -def RORW : ALUW_rr<0b0110000, 0b101, "rorw">, - Sched<[WriteRotateReg32, ReadRotateReg32, ReadRotateReg32]>; -} // Predicates = [HasStdExtZbbOrZbp, IsRV64] - -let Predicates = [HasStdExtZbp, IsRV64] in { -def GORCW : ALUW_rr<0b0010100, 0b101, "gorcw">, Sched<[]>; -def GREVW : ALUW_rr<0b0110100, 0b101, "grevw">, Sched<[]>; -} // Predicates = [HasStdExtZbp, IsRV64] - -let Predicates = [HasStdExtZbp, IsRV64] in { -def XPERMW : ALU_rr<0b0010100, 0b000, "xperm.w">, Sched<[]>; -} // Predicates = [HasStdExtZbp, IsRV64] - -let Predicates = [HasStdExtZbbOrZbpOrZbkb, IsRV64] in -def RORIW : RVBShiftW_ri<0b0110000, 0b101, OPC_OP_IMM_32, "roriw">, - Sched<[WriteRotateImm32, ReadRotateImm32]>; - -let Predicates = [HasStdExtZbp, IsRV64] in { -def GORCIW : RVBShiftW_ri<0b0010100, 0b101, OPC_OP_IMM_32, "gorciw">, Sched<[]>; -def GREVIW : RVBShiftW_ri<0b0110100, 0b101, OPC_OP_IMM_32, "greviw">, Sched<[]>; -} // Predicates = [HasStdExtZbp, IsRV64] - -let Predicates = [HasStdExtZbt, IsRV64] in { -def FSLW : RVBTernaryR<0b10, 0b001, OPC_OP_32, - "fslw", "$rd, $rs1, $rs3, $rs2">, Sched<[]>; -def FSRW : RVBTernaryR<0b10, 0b101, OPC_OP_32, "fsrw", - "$rd, $rs1, $rs3, $rs2">, Sched<[]>; -def FSRIW : RVBTernaryImm5<0b10, 0b101, OPC_OP_IMM_32, - "fsriw", "$rd, $rs1, $rs3, $shamt">, Sched<[]>; -} // Predicates = [HasStdExtZbt, IsRV64] - -let Predicates = [HasStdExtZbb, IsRV64] in { -def CLZW : RVBUnary<0b0110000, 0b00000, 0b001, OPC_OP_IMM_32, "clzw">, - Sched<[WriteCLZ32, ReadCLZ32]>; -def CTZW : RVBUnary<0b0110000, 0b00001, 0b001, OPC_OP_IMM_32, "ctzw">, - Sched<[WriteCTZ32, ReadCTZ32]>; -def CPOPW : RVBUnary<0b0110000, 0b00010, 0b001, OPC_OP_IMM_32, "cpopw">, - Sched<[WriteCPOP32, ReadCPOP32]>; -} // Predicates = [HasStdExtZbb, IsRV64] - -let Predicates = [HasStdExtZbp, IsRV64] in { -def SHFLW : ALUW_rr<0b0000100, 0b001, "shflw">, Sched<[]>; -def UNSHFLW : ALUW_rr<0b0000100, 0b101, "unshflw">, Sched<[]>; -} // Predicates = [HasStdExtZbp, IsRV64] - -let Predicates = [HasStdExtZbe, IsRV64] in { -// NOTE: These mnemonics are from the 0.94 spec. There is a name conflict with -// bextw in the 0.93 spec. -def BDECOMPRESSW : ALUW_rr<0b0100100, 0b110, "bdecompressw">, Sched<[]>; -def BCOMPRESSW : ALUW_rr<0b0000100, 0b110, "bcompressw">, Sched<[]>; -} // Predicates = [HasStdExtZbe, IsRV64] - -let Predicates = [HasStdExtZbpOrZbkb, IsRV64] in -def PACKW : ALUW_rr<0b0000100, 0b100, "packw">, Sched<[]>; - -let Predicates = [HasStdExtZbp, IsRV64] in -def PACKUW : ALUW_rr<0b0100100, 0b100, "packuw">, Sched<[]>; - let Predicates = [HasStdExtZbf, IsRV64] in def BFPW : ALUW_rr<0b0100100, 0b111, "bfpw">, Sched<[WriteBFP32, ReadBFP32, ReadBFP32]>; let Predicates = [HasStdExtZbbOrZbp, IsRV32] in { -let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in -def ZEXTH_RV32 : RVInstR<0b0000100, 0b100, OPC_OP, (outs GPR:$rd), - (ins GPR:$rs1), "zext.h", "$rd, $rs1">, - Sched<[WriteIALU, ReadIALU]> { - let rs2 = 0b00000; -} +def ZEXT_H_RV32 : RVBUnary<0b0000100, 0b00000, 0b100, OPC_OP, "zext.h">, + Sched<[WriteIALU, ReadIALU]>; } // Predicates = [HasStdExtZbbOrZbp, IsRV32] let Predicates = [HasStdExtZbbOrZbp, IsRV64] in { -let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in -def ZEXTH_RV64 : RVInstR<0b0000100, 0b100, OPC_OP_32, (outs GPR:$rd), - (ins GPR:$rs1), "zext.h", "$rd, $rs1">, - Sched<[WriteIALU, ReadIALU]> { - let rs2 = 0b00000; -} +def ZEXT_H_RV64 : RVBUnary<0b0000100, 0b00000, 0b100, OPC_OP_32, "zext.h">, + Sched<[WriteIALU, ReadIALU]>; } // Predicates = [HasStdExtZbbOrZbp, IsRV64] // We treat rev8 and orc.b as standalone instructions even though they use a @@ -619,8 +596,8 @@ def REV8_RV64 : RVBUnary<0b0110101, 0b11000, 0b101, OPC_OP_IMM, "rev8">, } // Predicates = [HasStdExtZbbOrZbpOrZbkb, IsRV64] let Predicates = [HasStdExtZbbOrZbp] in { -def ORCB : RVBUnary<0b0010100, 0b00111, 0b101, OPC_OP_IMM, "orc.b">, - Sched<[WriteORCB, ReadORCB]>; +def ORC_B : RVBUnary<0b0010100, 0b00111, 0b101, OPC_OP_IMM, "orc.b">, + Sched<[WriteORCB, ReadORCB]>; } // Predicates = [HasStdExtZbbOrZbp] let Predicates = [HasStdExtZbpOrZbkb] in @@ -637,7 +614,7 @@ def UNZIP_RV32 : RVBUnary<0b0000100, 0b01111, 0b101, OPC_OP_IMM, "unzip">; //===----------------------------------------------------------------------===// let Predicates = [HasStdExtZba, IsRV64] in { -def : InstAlias<"zext.w $rd, $rs", (ADDUW GPR:$rd, GPR:$rs, X0)>; +def : InstAlias<"zext.w $rd, $rs", (ADD_UW GPR:$rd, GPR:$rs, X0)>; } let Predicates = [HasStdExtZbp] in { @@ -775,8 +752,10 @@ def : InstAlias<"gorcw $rd, $rs1, $shamt", // Zbp is unratified and that it would likely adopt the already ratified Zbkx names. // Thus current Zbp instructions are defined as aliases for Zbkx instructions. let Predicates = [HasStdExtZbp] in { - def : InstAlias<"xperm.b $rd, $rs1, $rs2", (XPERMB GPR:$rd, GPR:$rs1, GPR:$rs2)>; - def : InstAlias<"xperm.n $rd, $rs1, $rs2", (XPERMN GPR:$rd, GPR:$rs1, GPR:$rs2)>; + def : InstAlias<"xperm.b $rd, $rs1, $rs2", + (XPERM8 GPR:$rd, GPR:$rs1, GPR:$rs2)>; + def : InstAlias<"xperm.n $rd, $rs1, $rs2", + (XPERM4 GPR:$rd, GPR:$rs1, GPR:$rs2)>; } // Predicates = [HasStdExtZbp] let Predicates = [HasStdExtZbs] in { @@ -803,8 +782,22 @@ def : Pat<(xor GPR:$rs1, (not GPR:$rs2)), (XNOR GPR:$rs1, GPR:$rs2)>; let Predicates = [HasStdExtZbbOrZbpOrZbkb] in { def : PatGprGpr<rotl, ROL>; def : PatGprGpr<rotr, ROR>; + +def : PatGprImm<rotr, RORI, uimmlog2xlen>; +// There's no encoding for roli in the the 'B' extension as it can be +// implemented with rori by negating the immediate. +def : Pat<(rotl GPR:$rs1, uimmlog2xlen:$shamt), + (RORI GPR:$rs1, (ImmSubFromXLen uimmlog2xlen:$shamt))>; } // Predicates = [HasStdExtZbbOrZbpOrZbkb] +let Predicates = [HasStdExtZbbOrZbpOrZbkb, IsRV64] in { +def : PatGprGpr<riscv_rolw, ROLW>; +def : PatGprGpr<riscv_rorw, RORW>; +def : PatGprImm<riscv_rorw, RORIW, uimm5>; +def : Pat<(riscv_rolw GPR:$rs1, uimm5:$rs2), + (RORIW GPR:$rs1, (ImmSubFrom32 uimm5:$rs2))>; +} // Predicates = [HasStdExtZbbOrZbpOrZbkb, IsRV64] + let Predicates = [HasStdExtZbs] in { def : Pat<(and (not (shiftop<shl> 1, GPR:$rs2)), GPR:$rs1), (BCLR GPR:$rs1, GPR:$rs2)>; @@ -852,48 +845,62 @@ def : Pat<(and GPR:$r, BCLRIANDIMask:$i), (BCLRITwoBitsMaskHigh BCLRIANDIMask:$i))>; } -// There's no encoding for roli in the the 'B' extension as it can be -// implemented with rori by negating the immediate. -let Predicates = [HasStdExtZbbOrZbpOrZbkb] in { -def : PatGprImm<rotr, RORI, uimmlog2xlen>; -def : Pat<(rotl GPR:$rs1, uimmlog2xlen:$shamt), - (RORI GPR:$rs1, (ImmSubFromXLen uimmlog2xlen:$shamt))>; - +let Predicates = [HasStdExtZbbOrZbp] in { // We treat orc.b as a separate instruction, so match it directly. We also // lower the Zbb orc.b intrinsic to this. -def : Pat<(riscv_gorc GPR:$rs1, 7), (ORCB GPR:$rs1)>; +def : Pat<(riscv_gorc GPR:$rs1, 7), (ORC_B GPR:$rs1)>; +} + +let Predicates = [HasStdExtZbpOrZbkb] in { +// We treat brev8 as a separate instruction, so match it directly. We also +// use this for brev8 when lowering bitreverse with Zbkb. +def : Pat<(riscv_grev GPR:$rs1, 7), (BREV8 GPR:$rs1)>; + +// We treat zip and unzip as separate instructions, so match it directly. +def : Pat<(i32 (riscv_shfl GPR:$rs1, 15)), (ZIP_RV32 GPR:$rs1)>; +def : Pat<(i32 (riscv_unshfl GPR:$rs1, 15)), (UNZIP_RV32 GPR:$rs1)>; } let Predicates = [HasStdExtZbp] in { def : PatGprGpr<riscv_grev, GREV>; def : PatGprGpr<riscv_gorc, GORC>; +def : PatGprImm<riscv_grev, GREVI, uimmlog2xlen>; +def : PatGprImm<riscv_gorc, GORCI, uimmlog2xlen>; + def : PatGprGpr<riscv_shfl, SHFL>; def : PatGprGpr<riscv_unshfl, UNSHFL>; -def : PatGprGpr<int_riscv_xperm_n, XPERMN>; -def : PatGprGpr<int_riscv_xperm_b, XPERMB>; -def : PatGprGpr<int_riscv_xperm_h, XPERMH>; def : PatGprImm<riscv_shfl, SHFLI, shfl_uimm>; def : PatGprImm<riscv_unshfl, UNSHFLI, shfl_uimm>; -def : PatGprImm<riscv_grev, GREVI, uimmlog2xlen>; -def : PatGprImm<riscv_gorc, GORCI, uimmlog2xlen>; -// We treat brev8 as a separate instruction, so match it directly. -def : Pat<(riscv_grev GPR:$rs1, 7), (BREV8 GPR:$rs1)>; +def : PatGprGpr<int_riscv_xperm_n, XPERM4>; +def : PatGprGpr<int_riscv_xperm_b, XPERM8>; +def : PatGprGpr<int_riscv_xperm_h, XPERM_H>; } // Predicates = [HasStdExtZbp] +let Predicates = [HasStdExtZbp, IsRV64] in { +def : PatGprGpr<riscv_grevw, GREVW>; +def : PatGprGpr<riscv_gorcw, GORCW>; +def : PatGprImm<riscv_grevw, GREVIW, uimm5>; +def : PatGprImm<riscv_gorcw, GORCIW, uimm5>; + +// FIXME: Move to DAG combine. +def : Pat<(riscv_rorw (riscv_grevw GPR:$rs1, 24), 16), (GREVIW GPR:$rs1, 8)>; +def : Pat<(riscv_rolw (riscv_grevw GPR:$rs1, 24), 16), (GREVIW GPR:$rs1, 8)>; + +def : PatGprGpr<riscv_shflw, SHFLW>; +def : PatGprGpr<riscv_unshflw, UNSHFLW>; +} // Predicates = [HasStdExtZbp, IsRV64] + let Predicates = [HasStdExtZbp, IsRV64] in -def : PatGprGpr<int_riscv_xperm_w, XPERMW>; +def : PatGprGpr<int_riscv_xperm_w, XPERM_W>; let Predicates = [HasStdExtZbp, IsRV32] in { +// FIXME : Move to DAG combine. def : Pat<(i32 (rotr (riscv_grev GPR:$rs1, 24), (i32 16))), (GREVI GPR:$rs1, 8)>; def : Pat<(i32 (rotl (riscv_grev GPR:$rs1, 24), (i32 16))), (GREVI GPR:$rs1, 8)>; // We treat rev8 as a separate instruction, so match it directly. def : Pat<(i32 (riscv_grev GPR:$rs1, 24)), (REV8_RV32 GPR:$rs1)>; - -// We treat zip and unzip as separate instructions, so match it directly. -def : Pat<(i32 (riscv_shfl GPR:$rs1, 15)), (ZIP_RV32 GPR:$rs1)>; -def : Pat<(i32 (riscv_unshfl GPR:$rs1, 15)), (UNZIP_RV32 GPR:$rs1)>; } // Predicates = [HasStdExtZbp, IsRV32] let Predicates = [HasStdExtZbp, IsRV64] in { @@ -942,15 +949,34 @@ def : Pat<(riscv_fsl GPR:$rs3, GPR:$rs1, uimmlog2xlen:$shamt), (FSRI GPR:$rs1, GPR:$rs3, (ImmSubFromXLen uimmlog2xlen:$shamt))>; } // Predicates = [HasStdExtZbt] +let Predicates = [HasStdExtZbt, IsRV64] in { +def : Pat<(riscv_fslw GPR:$rs1, GPR:$rs3, GPR:$rs2), + (FSLW GPR:$rs1, GPR:$rs2, GPR:$rs3)>; +def : Pat<(riscv_fsrw GPR:$rs1, GPR:$rs3, GPR:$rs2), + (FSRW GPR:$rs1, GPR:$rs2, GPR:$rs3)>; +def : Pat<(riscv_fsrw GPR:$rs1, GPR:$rs3, uimm5:$shamt), + (FSRIW GPR:$rs1, GPR:$rs3, uimm5:$shamt)>; +// We can use FSRIW for FSLW by immediate if we subtract the immediate from +// 32 and swap the operands. +def : Pat<(riscv_fslw GPR:$rs3, GPR:$rs1, uimm5:$shamt), + (FSRIW GPR:$rs1, GPR:$rs3, (ImmSubFrom32 uimm5:$shamt))>; +} // Predicates = [HasStdExtZbt, IsRV64] + let Predicates = [HasStdExtZbb] in { def : PatGpr<ctlz, CLZ>; def : PatGpr<cttz, CTZ>; def : PatGpr<ctpop, CPOP>; } // Predicates = [HasStdExtZbb] +let Predicates = [HasStdExtZbb, IsRV64] in { +def : PatGpr<riscv_clzw, CLZW>; +def : PatGpr<riscv_ctzw, CTZW>; +def : Pat<(i64 (ctpop (i64 (zexti32 (i64 GPR:$rs1))))), (CPOPW GPR:$rs1)>; +} // Predicates = [HasStdExtZbb, IsRV64] + let Predicates = [HasStdExtZbb] in { -def : Pat<(sext_inreg GPR:$rs1, i8), (SEXTB GPR:$rs1)>; -def : Pat<(sext_inreg GPR:$rs1, i16), (SEXTH GPR:$rs1)>; +def : Pat<(sext_inreg GPR:$rs1, i8), (SEXT_B GPR:$rs1)>; +def : Pat<(sext_inreg GPR:$rs1, i16), (SEXT_H GPR:$rs1)>; } let Predicates = [HasStdExtZbb] in { @@ -968,35 +994,49 @@ let Predicates = [HasStdExtZbbOrZbkb, IsRV64] in { def : Pat<(i64 (bswap GPR:$rs1)), (REV8_RV64 GPR:$rs1)>; } // Predicates = [HasStdExtZbbOrZbkb, IsRV64] +let Predicates = [HasStdExtZbpOrZbkb] in { +def : Pat<(or (and (shl GPR:$rs2, (XLenVT 8)), 0xFFFF), + (and GPR:$rs1, 0x00FF)), + (PACKH GPR:$rs1, GPR:$rs2)>; +def : Pat<(or (shl (and GPR:$rs2, 0x00FF), (XLenVT 8)), + (and GPR:$rs1, 0x00FF)), + (PACKH GPR:$rs1, GPR:$rs2)>; +} // Predicates = [HasStdExtZbpOrZbkb] + let Predicates = [HasStdExtZbpOrZbkb, IsRV32] in def : Pat<(i32 (or (and GPR:$rs1, 0x0000FFFF), (shl GPR:$rs2, (i32 16)))), (PACK GPR:$rs1, GPR:$rs2)>; +let Predicates = [HasStdExtZbpOrZbkb, IsRV64] in { +def : Pat<(i64 (or (and GPR:$rs1, 0x00000000FFFFFFFF), (shl GPR:$rs2, (i64 32)))), + (PACK GPR:$rs1, GPR:$rs2)>; + +def : Pat<(i64 (sext_inreg (or (shl GPR:$rs2, (i64 16)), + (and GPR:$rs1, 0x000000000000FFFF)), + i32)), + (PACKW GPR:$rs1, GPR:$rs2)>; +def : Pat<(i64 (or (sext_inreg (shl GPR:$rs2, (i64 16)), i32), + (and GPR:$rs1, 0x000000000000FFFF))), + (PACKW GPR:$rs1, GPR:$rs2)>; +} + let Predicates = [HasStdExtZbp, IsRV32] in def : Pat<(i32 (or (and GPR:$rs2, 0xFFFF0000), (srl GPR:$rs1, (i32 16)))), (PACKU GPR:$rs1, GPR:$rs2)>; -let Predicates = [HasStdExtZbpOrZbkb, IsRV64] in -def : Pat<(i64 (or (and GPR:$rs1, 0x00000000FFFFFFFF), (shl GPR:$rs2, (i64 32)))), - (PACK GPR:$rs1, GPR:$rs2)>; - -let Predicates = [HasStdExtZbp, IsRV64] in +let Predicates = [HasStdExtZbp, IsRV64] in { def : Pat<(i64 (or (and GPR:$rs2, 0xFFFFFFFF00000000), (srl GPR:$rs1, (i64 32)))), (PACKU GPR:$rs1, GPR:$rs2)>; -let Predicates = [HasStdExtZbpOrZbkb] in { -def : Pat<(or (and (shl GPR:$rs2, (XLenVT 8)), 0xFFFF), - (and GPR:$rs1, 0x00FF)), - (PACKH GPR:$rs1, GPR:$rs2)>; -def : Pat<(or (shl (and GPR:$rs2, 0x00FF), (XLenVT 8)), - (and GPR:$rs1, 0x00FF)), - (PACKH GPR:$rs1, GPR:$rs2)>; -} // Predicates = [HasStdExtZbpOrZbkb] +def : Pat<(i64 (or (and (assertsexti32 GPR:$rs2), 0xFFFFFFFFFFFF0000), + (srl (and GPR:$rs1, 0xFFFFFFFF), (i64 16)))), + (PACKUW GPR:$rs1, GPR:$rs2)>; +} let Predicates = [HasStdExtZbbOrZbp, IsRV32] in -def : Pat<(i32 (and GPR:$rs, 0xFFFF)), (ZEXTH_RV32 GPR:$rs)>; +def : Pat<(i32 (and GPR:$rs, 0xFFFF)), (ZEXT_H_RV32 GPR:$rs)>; let Predicates = [HasStdExtZbbOrZbp, IsRV64] in -def : Pat<(i64 (and GPR:$rs, 0xFFFF)), (ZEXTH_RV64 GPR:$rs)>; +def : Pat<(i64 (and GPR:$rs, 0xFFFF)), (ZEXT_H_RV64 GPR:$rs)>; // Pattern to exclude simm12 immediates from matching. def non_imm12 : PatLeaf<(XLenVT GPR:$a), [{ @@ -1074,80 +1114,26 @@ def : Pat<(mul_const_oneuse GPR:$r, (XLenVT 81)), let Predicates = [HasStdExtZba, IsRV64] in { def : Pat<(i64 (shl (and GPR:$rs1, 0xFFFFFFFF), uimm5:$shamt)), - (SLLIUW GPR:$rs1, uimm5:$shamt)>; + (SLLI_UW GPR:$rs1, uimm5:$shamt)>; def : Pat<(i64 (add (and GPR:$rs1, 0xFFFFFFFF), non_imm12:$rs2)), - (ADDUW GPR:$rs1, GPR:$rs2)>; -def : Pat<(i64 (and GPR:$rs, 0xFFFFFFFF)), (ADDUW GPR:$rs, X0)>; + (ADD_UW GPR:$rs1, GPR:$rs2)>; +def : Pat<(i64 (and GPR:$rs, 0xFFFFFFFF)), (ADD_UW GPR:$rs, X0)>; def : Pat<(i64 (add (shl (and GPR:$rs1, 0xFFFFFFFF), (i64 1)), non_imm12:$rs2)), - (SH1ADDUW GPR:$rs1, GPR:$rs2)>; + (SH1ADD_UW GPR:$rs1, GPR:$rs2)>; def : Pat<(i64 (add (shl (and GPR:$rs1, 0xFFFFFFFF), (i64 2)), non_imm12:$rs2)), - (SH2ADDUW GPR:$rs1, GPR:$rs2)>; + (SH2ADD_UW GPR:$rs1, GPR:$rs2)>; def : Pat<(i64 (add (shl (and GPR:$rs1, 0xFFFFFFFF), (i64 3)), non_imm12:$rs2)), - (SH3ADDUW GPR:$rs1, GPR:$rs2)>; + (SH3ADD_UW GPR:$rs1, GPR:$rs2)>; def : Pat<(i64 (add (and (shl GPR:$rs1, (i64 1)), 0x1FFFFFFFF), non_imm12:$rs2)), - (SH1ADDUW GPR:$rs1, GPR:$rs2)>; + (SH1ADD_UW GPR:$rs1, GPR:$rs2)>; def : Pat<(i64 (add (and (shl GPR:$rs1, (i64 2)), 0x3FFFFFFFF), non_imm12:$rs2)), - (SH2ADDUW GPR:$rs1, GPR:$rs2)>; + (SH2ADD_UW GPR:$rs1, GPR:$rs2)>; def : Pat<(i64 (add (and (shl GPR:$rs1, (i64 3)), 0x7FFFFFFFF), non_imm12:$rs2)), - (SH3ADDUW GPR:$rs1, GPR:$rs2)>; + (SH3ADD_UW GPR:$rs1, GPR:$rs2)>; } // Predicates = [HasStdExtZba, IsRV64] -let Predicates = [HasStdExtZbbOrZbpOrZbkb, IsRV64] in { -def : PatGprGpr<riscv_rolw, ROLW>; -def : PatGprGpr<riscv_rorw, RORW>; -def : PatGprImm<riscv_rorw, RORIW, uimm5>; -def : Pat<(riscv_rolw GPR:$rs1, uimm5:$rs2), - (RORIW GPR:$rs1, (ImmSubFrom32 uimm5:$rs2))>; -} // Predicates = [HasStdExtZbbOrZbpOrZbkb, IsRV64] - -let Predicates = [HasStdExtZbp, IsRV64] in { -def : Pat<(riscv_rorw (riscv_grevw GPR:$rs1, 24), 16), (GREVIW GPR:$rs1, 8)>; -def : Pat<(riscv_rolw (riscv_grevw GPR:$rs1, 24), 16), (GREVIW GPR:$rs1, 8)>; -def : PatGprGpr<riscv_grevw, GREVW>; -def : PatGprGpr<riscv_gorcw, GORCW>; -def : PatGprGpr<riscv_shflw, SHFLW>; -def : PatGprGpr<riscv_unshflw, UNSHFLW>; -def : PatGprImm<riscv_grevw, GREVIW, uimm5>; -def : PatGprImm<riscv_gorcw, GORCIW, uimm5>; -} // Predicates = [HasStdExtZbp, IsRV64] - -let Predicates = [HasStdExtZbt, IsRV64] in { -def : Pat<(riscv_fslw GPR:$rs1, GPR:$rs3, GPR:$rs2), - (FSLW GPR:$rs1, GPR:$rs2, GPR:$rs3)>; -def : Pat<(riscv_fsrw GPR:$rs1, GPR:$rs3, GPR:$rs2), - (FSRW GPR:$rs1, GPR:$rs2, GPR:$rs3)>; -def : Pat<(riscv_fsrw GPR:$rs1, GPR:$rs3, uimm5:$shamt), - (FSRIW GPR:$rs1, GPR:$rs3, uimm5:$shamt)>; -// We can use FSRIW for FSLW by immediate if we subtract the immediate from -// 32 and swap the operands. -def : Pat<(riscv_fslw GPR:$rs3, GPR:$rs1, uimm5:$shamt), - (FSRIW GPR:$rs1, GPR:$rs3, (ImmSubFrom32 uimm5:$shamt))>; -} // Predicates = [HasStdExtZbt, IsRV64] - -let Predicates = [HasStdExtZbb, IsRV64] in { -def : PatGpr<riscv_clzw, CLZW>; -def : PatGpr<riscv_ctzw, CTZW>; -def : Pat<(i64 (ctpop (i64 (zexti32 (i64 GPR:$rs1))))), (CPOPW GPR:$rs1)>; -} // Predicates = [HasStdExtZbb, IsRV64] - -let Predicates = [HasStdExtZbpOrZbkb, IsRV64] in { -def : Pat<(i64 (sext_inreg (or (shl GPR:$rs2, (i64 16)), - (and GPR:$rs1, 0x000000000000FFFF)), - i32)), - (PACKW GPR:$rs1, GPR:$rs2)>; -def : Pat<(i64 (or (sext_inreg (shl GPR:$rs2, (i64 16)), i32), - (and GPR:$rs1, 0x000000000000FFFF))), - (PACKW GPR:$rs1, GPR:$rs2)>; -} - -let Predicates = [HasStdExtZbp, IsRV64] in -def : Pat<(i64 (or (and (assertsexti32 GPR:$rs2), 0xFFFFFFFFFFFF0000), - (srl (and GPR:$rs1, 0xFFFFFFFF), (i64 16)))), - (PACKUW GPR:$rs1, GPR:$rs2)>; - - let Predicates = [HasStdExtZbcOrZbkc] in { def : PatGprGpr<int_riscv_clmul, CLMUL>; def : PatGprGpr<int_riscv_clmulh, CLMULH>; @@ -1167,17 +1153,17 @@ def : PatGprGpr<riscv_bdecompressw, BDECOMPRESSW>; } // Predicates = [HasStdExtZbe, IsRV64] let Predicates = [HasStdExtZbr] in { -def : PatGpr<int_riscv_crc32_b, CRC32B>; -def : PatGpr<int_riscv_crc32_h, CRC32H>; -def : PatGpr<int_riscv_crc32_w, CRC32W>; -def : PatGpr<int_riscv_crc32c_b, CRC32CB>; -def : PatGpr<int_riscv_crc32c_h, CRC32CH>; -def : PatGpr<int_riscv_crc32c_w, CRC32CW>; +def : PatGpr<int_riscv_crc32_b, CRC32_B>; +def : PatGpr<int_riscv_crc32_h, CRC32_H>; +def : PatGpr<int_riscv_crc32_w, CRC32_W>; +def : PatGpr<int_riscv_crc32c_b, CRC32C_B>; +def : PatGpr<int_riscv_crc32c_h, CRC32C_H>; +def : PatGpr<int_riscv_crc32c_w, CRC32C_W>; } // Predicates = [HasStdExtZbr] let Predicates = [HasStdExtZbr, IsRV64] in { -def : PatGpr<int_riscv_crc32_d, CRC32D>; -def : PatGpr<int_riscv_crc32c_d, CRC32CD>; +def : PatGpr<int_riscv_crc32_d, CRC32_D>; +def : PatGpr<int_riscv_crc32c_d, CRC32C_D>; } // Predicates = [HasStdExtZbr, IsRV64] let Predicates = [HasStdExtZbf] in @@ -1186,16 +1172,7 @@ def : PatGprGpr<riscv_bfp, BFP>; let Predicates = [HasStdExtZbf, IsRV64] in def : PatGprGpr<riscv_bfpw, BFPW>; -let Predicates = [HasStdExtZbkb] in { -def : PatGpr<int_riscv_brev8, BREV8>; -} // Predicates = [HasStdExtZbkb] - -let Predicates = [HasStdExtZbkb, IsRV32] in { -def : PatGpr<int_riscv_zip, ZIP_RV32>; -def : PatGpr<int_riscv_unzip, UNZIP_RV32>; -} // Predicates = [HasStdExtZbkb, IsRV32] - let Predicates = [HasStdExtZbkx] in { -def : PatGprGpr<int_riscv_xperm4, XPERMN>; -def : PatGprGpr<int_riscv_xperm8, XPERMB>; +def : PatGprGpr<int_riscv_xperm4, XPERM4>; +def : PatGprGpr<int_riscv_xperm8, XPERM8>; } diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZfh.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZfh.td index dfd0c74ee26c..a2753c132354 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoZfh.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZfh.td @@ -29,14 +29,14 @@ def riscv_fmv_x_anyexth // Instructions //===----------------------------------------------------------------------===// -let Predicates = [HasStdExtZfhmin] in { +let Predicates = [HasStdExtZfhOrZfhmin] in { def FLH : FPLoad_r<0b001, "flh", FPR16, WriteFLD16>; // Operands for stores are in the order srcreg, base, offset rather than // reflecting the order these fields are specified in the instruction // encoding. def FSH : FPStore_r<0b001, "fsh", FPR16, WriteFST16>; -} // Predicates = [HasStdExtZfhmin] +} // Predicates = [HasStdExtZfhOrZfhmin] let Predicates = [HasStdExtZfh] in { let SchedRW = [WriteFMA16, ReadFMA16, ReadFMA16, ReadFMA16] in { @@ -98,7 +98,7 @@ def FCVT_H_WU : FPUnaryOp_r_frm<0b1101010, 0b00001, FPR16, GPR, "fcvt.h.wu">, def : FPUnaryOpDynFrmAlias<FCVT_H_WU, "fcvt.h.wu", FPR16, GPR>; } // Predicates = [HasStdExtZfh] -let Predicates = [HasStdExtZfhmin] in { +let Predicates = [HasStdExtZfhOrZfhmin] in { def FCVT_H_S : FPUnaryOp_r_frm<0b0100010, 0b00000, FPR16, FPR32, "fcvt.h.s">, Sched<[WriteFCvtF32ToF16, ReadFCvtF32ToF16]>; def : FPUnaryOpDynFrmAlias<FCVT_H_S, "fcvt.h.s", FPR16, FPR32>; @@ -113,7 +113,7 @@ def FMV_X_H : FPUnaryOp_r<0b1110010, 0b00000, 0b000, GPR, FPR16, "fmv.x.h">, let mayRaiseFPException = 0 in def FMV_H_X : FPUnaryOp_r<0b1111010, 0b00000, 0b000, FPR16, GPR, "fmv.h.x">, Sched<[WriteFMovI16ToF16, ReadFMovI16ToF16]>; -} // Predicates = [HasStdExtZfhmin] +} // Predicates = [HasStdExtZfhOrZfhmin] let Predicates = [HasStdExtZfh] in { @@ -146,23 +146,23 @@ def FCVT_H_LU : FPUnaryOp_r_frm<0b1101010, 0b00011, FPR16, GPR, "fcvt.h.lu">, def : FPUnaryOpDynFrmAlias<FCVT_H_LU, "fcvt.h.lu", FPR16, GPR>; } // Predicates = [HasStdExtZfh, IsRV64] -let Predicates = [HasStdExtZfhmin, HasStdExtD] in { +let Predicates = [HasStdExtZfhOrZfhmin, HasStdExtD] in { def FCVT_H_D : FPUnaryOp_r_frm<0b0100010, 0b00001, FPR16, FPR64, "fcvt.h.d">, Sched<[WriteFCvtF64ToF16, ReadFCvtF64ToF16]>; def : FPUnaryOpDynFrmAlias<FCVT_H_D, "fcvt.h.d", FPR16, FPR64>; def FCVT_D_H : FPUnaryOp_r<0b0100001, 0b00010, 0b000, FPR64, FPR16, "fcvt.d.h">, Sched<[WriteFCvtF16ToF64, ReadFCvtF16ToF64]>; -} // Predicates = [HasStdExtZfhmin, HasStdExtD] +} // Predicates = [HasStdExtZfhOrZfhmin, HasStdExtD] //===----------------------------------------------------------------------===// // Assembler Pseudo Instructions (User-Level ISA, Version 2.2, Chapter 20) //===----------------------------------------------------------------------===// -let Predicates = [HasStdExtZfhmin] in { +let Predicates = [HasStdExtZfhOrZfhmin] in { def : InstAlias<"flh $rd, (${rs1})", (FLH FPR16:$rd, GPR:$rs1, 0), 0>; def : InstAlias<"fsh $rs2, (${rs1})", (FSH FPR16:$rs2, GPR:$rs1, 0), 0>; -} // Predicates = [HasStdExtZfhmin] +} // Predicates = [HasStdExtZfhOrZfhmin] let Predicates = [HasStdExtZfh] in { def : InstAlias<"fmv.h $rd, $rs", (FSGNJ_H FPR16:$rd, FPR16:$rs, FPR16:$rs)>; @@ -177,14 +177,14 @@ def : InstAlias<"fge.h $rd, $rs, $rt", (FLE_H GPR:$rd, FPR16:$rt, FPR16:$rs), 0>; } // Predicates = [HasStdExtZfh] -let Predicates = [HasStdExtZfhmin] in { +let Predicates = [HasStdExtZfhOrZfhmin] in { def PseudoFLH : PseudoFloatLoad<"flh", FPR16>; def PseudoFSH : PseudoStore<"fsh", FPR16>; let usesCustomInserter = 1 in { def PseudoQuietFLE_H : PseudoQuietFCMP<FPR16>; def PseudoQuietFLT_H : PseudoQuietFCMP<FPR16>; } -} // Predicates = [HasStdExtZfhmin] +} // Predicates = [HasStdExtZfhOrZfhmin] //===----------------------------------------------------------------------===// // Pseudo-instructions and codegen patterns @@ -281,7 +281,7 @@ def : PatSetCC<FPR16, any_fsetccs, SETOLE, FLE_H>; def Select_FPR16_Using_CC_GPR : SelectCC_rrirr<FPR16, GPR>; } // Predicates = [HasStdExtZfh] -let Predicates = [HasStdExtZfhmin] in { +let Predicates = [HasStdExtZfhOrZfhmin] in { /// Loads defm : LdPat<load, FLH, f16>; @@ -299,7 +299,7 @@ def : Pat<(any_fpextend FPR16:$rs1), (FCVT_S_H FPR16:$rs1)>; // Moves (no conversion) def : Pat<(riscv_fmv_h_x GPR:$src), (FMV_H_X GPR:$src)>; def : Pat<(riscv_fmv_x_anyexth FPR16:$src), (FMV_X_H FPR16:$src)>; -} // Predicates = [HasStdExtZfhmin] +} // Predicates = [HasStdExtZfhOrZfhmin] let Predicates = [HasStdExtZfh, IsRV32] in { // half->[u]int. Round-to-zero must be used. @@ -351,7 +351,7 @@ def : Pat<(any_sint_to_fp (i64 GPR:$rs1)), (FCVT_H_L $rs1, 0b111)>; def : Pat<(any_uint_to_fp (i64 GPR:$rs1)), (FCVT_H_LU $rs1, 0b111)>; } // Predicates = [HasStdExtZfh, IsRV64] -let Predicates = [HasStdExtZfhmin, HasStdExtD] in { +let Predicates = [HasStdExtZfhOrZfhmin, HasStdExtD] in { /// Float conversion operations // f64 -> f16, f16 -> f64 def : Pat<(any_fpround FPR64:$rs1), (FCVT_H_D FPR64:$rs1, 0b111)>; @@ -361,4 +361,4 @@ def : Pat<(any_fpextend FPR16:$rs1), (FCVT_D_H FPR16:$rs1)>; def : Pat<(fcopysign FPR16:$rs1, FPR64:$rs2), (FSGNJ_H $rs1, (FCVT_H_D $rs2, 0b111))>; def : Pat<(fcopysign FPR64:$rs1, FPR16:$rs2), (FSGNJ_D $rs1, (FCVT_D_H $rs2))>; -} // Predicates = [HasStdExtZfhmin, HasStdExtD] +} // Predicates = [HasStdExtZfhOrZfhmin, HasStdExtD] diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZk.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZk.td index 4a41cddedc71..e4e07f4789a6 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoZk.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZk.td @@ -1,4 +1,4 @@ -//===- RISCVInstrInfoZk.td - RISC-V Scalar Crypto instructions - tablegen -*===// +//===- RISCVInstrInfoZk.td - RISC-V 'Zk' instructions ------*- tablegen -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/llvm/lib/Target/RISCV/RISCVSExtWRemoval.cpp b/llvm/lib/Target/RISCV/RISCVSExtWRemoval.cpp index 12ec52925798..715d92b036e3 100644 --- a/llvm/lib/Target/RISCV/RISCVSExtWRemoval.cpp +++ b/llvm/lib/Target/RISCV/RISCVSExtWRemoval.cpp @@ -99,9 +99,9 @@ static bool isSignExtendingOpW(const MachineInstr &MI) { case RISCV::SLTI: case RISCV::SLTU: case RISCV::SLTIU: - case RISCV::SEXTB: - case RISCV::SEXTH: - case RISCV::ZEXTH_RV64: + case RISCV::SEXT_B: + case RISCV::SEXT_H: + case RISCV::ZEXT_H_RV64: return true; // shifting right sufficiently makes the value 32-bit sign-extended case RISCV::SRAI: diff --git a/llvm/lib/Target/RISCV/RISCVSubtarget.h b/llvm/lib/Target/RISCV/RISCVSubtarget.h index 044dda0a1ccc..34c6e8e684ac 100644 --- a/llvm/lib/Target/RISCV/RISCVSubtarget.h +++ b/llvm/lib/Target/RISCV/RISCVSubtarget.h @@ -195,6 +195,7 @@ public: return 0; } + unsigned getMinVLen() const { return ZvlLen; } RISCVABI::ABI getTargetABI() const { return TargetABI; } bool isRegisterReservedByUser(Register i) const { assert(i < RISCV::NUM_TARGET_REGS && "Register out of range"); diff --git a/llvm/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp b/llvm/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp index e950f9582f09..4d69040a4508 100644 --- a/llvm/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp +++ b/llvm/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp @@ -8,6 +8,7 @@ #include "MCTargetDesc/SparcFixupKinds.h" #include "MCTargetDesc/SparcMCTargetDesc.h" +#include "llvm/ADT/StringSwitch.h" #include "llvm/MC/MCAsmBackend.h" #include "llvm/MC/MCELFObjectWriter.h" #include "llvm/MC/MCExpr.h" @@ -131,6 +132,23 @@ namespace { return Sparc::NumTargetFixupKinds; } + Optional<MCFixupKind> getFixupKind(StringRef Name) const override { + unsigned Type; + Type = llvm::StringSwitch<unsigned>(Name) +#define ELF_RELOC(X, Y) .Case(#X, Y) +#include "llvm/BinaryFormat/ELFRelocs/Sparc.def" +#undef ELF_RELOC + .Case("BFD_RELOC_NONE", ELF::R_SPARC_NONE) + .Case("BFD_RELOC_8", ELF::R_SPARC_8) + .Case("BFD_RELOC_16", ELF::R_SPARC_16) + .Case("BFD_RELOC_32", ELF::R_SPARC_32) + .Case("BFD_RELOC_64", ELF::R_SPARC_64) + .Default(-1u); + if (Type == -1u) + return None; + return static_cast<MCFixupKind>(FirstLiteralRelocationKind + Type); + } + const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const override { const static MCFixupKindInfo InfosBE[Sparc::NumTargetFixupKinds] = { // name offset bits flags @@ -216,6 +234,11 @@ namespace { { "fixup_sparc_tls_le_lox10", 0, 0, 0 } }; + // Fixup kinds from .reloc directive are like R_SPARC_NONE. They do + // not require any extra processing. + if (Kind >= FirstLiteralRelocationKind) + return MCAsmBackend::getFixupKindInfo(FK_NONE); + if (Kind < FirstTargetFixupKind) return MCAsmBackend::getFixupKindInfo(Kind); @@ -229,6 +252,8 @@ namespace { bool shouldForceRelocation(const MCAssembler &Asm, const MCFixup &Fixup, const MCValue &Target) override { + if (Fixup.getKind() >= FirstLiteralRelocationKind) + return true; switch ((Sparc::Fixups)Fixup.getKind()) { default: return false; @@ -299,6 +324,8 @@ namespace { uint64_t Value, bool IsResolved, const MCSubtargetInfo *STI) const override { + if (Fixup.getKind() >= FirstLiteralRelocationKind) + return; Value = adjustFixupValue(Fixup.getKind(), Value); if (!Value) return; // Doesn't change encoding. diff --git a/llvm/lib/Target/Sparc/MCTargetDesc/SparcELFObjectWriter.cpp b/llvm/lib/Target/Sparc/MCTargetDesc/SparcELFObjectWriter.cpp index bc508b45c3bd..02261dc5c4cd 100644 --- a/llvm/lib/Target/Sparc/MCTargetDesc/SparcELFObjectWriter.cpp +++ b/llvm/lib/Target/Sparc/MCTargetDesc/SparcELFObjectWriter.cpp @@ -42,6 +42,9 @@ unsigned SparcELFObjectWriter::getRelocType(MCContext &Ctx, const MCValue &Target, const MCFixup &Fixup, bool IsPCRel) const { + MCFixupKind Kind = Fixup.getKind(); + if (Kind >= FirstLiteralRelocationKind) + return Kind - FirstLiteralRelocationKind; if (const SparcMCExpr *SExpr = dyn_cast<SparcMCExpr>(Fixup.getValue())) { if (SExpr->getKind() == SparcMCExpr::VK_Sparc_R_DISP32) @@ -68,6 +71,7 @@ unsigned SparcELFObjectWriter::getRelocType(MCContext &Ctx, switch(Fixup.getTargetKind()) { default: llvm_unreachable("Unimplemented fixup -> relocation"); + case FK_NONE: return ELF::R_SPARC_NONE; case FK_Data_1: return ELF::R_SPARC_8; case FK_Data_2: return ((Fixup.getOffset() % 2) ? ELF::R_SPARC_UA16 diff --git a/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp b/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp index ccc7d0737f53..610627e7e3f0 100644 --- a/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp +++ b/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp @@ -80,6 +80,88 @@ MachineBasicBlock::iterator SystemZFrameLowering::eliminateCallFramePseudoInstr( } } +namespace { +struct SZFrameSortingObj { + bool IsValid = false; // True if we care about this Object. + uint32_t ObjectIndex = 0; // Index of Object into MFI list. + uint64_t ObjectSize = 0; // Size of Object in bytes. + uint32_t D12Count = 0; // 12-bit displacement only. + uint32_t DPairCount = 0; // 12 or 20 bit displacement. +}; +typedef std::vector<SZFrameSortingObj> SZFrameObjVec; +} // namespace + +// TODO: Move to base class. +void SystemZELFFrameLowering::orderFrameObjects( + const MachineFunction &MF, SmallVectorImpl<int> &ObjectsToAllocate) const { + const MachineFrameInfo &MFI = MF.getFrameInfo(); + const SystemZInstrInfo *TII = + static_cast<const SystemZInstrInfo *>(MF.getSubtarget().getInstrInfo()); + + // Make a vector of sorting objects to track all MFI objects and mark those + // to be sorted as valid. + if (ObjectsToAllocate.size() <= 1) + return; + SZFrameObjVec SortingObjects(MFI.getObjectIndexEnd()); + for (auto &Obj : ObjectsToAllocate) { + SortingObjects[Obj].IsValid = true; + SortingObjects[Obj].ObjectIndex = Obj; + SortingObjects[Obj].ObjectSize = MFI.getObjectSize(Obj); + } + + // Examine uses for each object and record short (12-bit) and "pair" + // displacement types. + for (auto &MBB : MF) + for (auto &MI : MBB) { + if (MI.isDebugInstr()) + continue; + for (unsigned I = 0, E = MI.getNumOperands(); I != E; ++I) { + const MachineOperand &MO = MI.getOperand(I); + if (!MO.isFI()) + continue; + int Index = MO.getIndex(); + if (Index >= 0 && Index < MFI.getObjectIndexEnd() && + SortingObjects[Index].IsValid) { + if (TII->hasDisplacementPairInsn(MI.getOpcode())) + SortingObjects[Index].DPairCount++; + else if (!(MI.getDesc().TSFlags & SystemZII::Has20BitOffset)) + SortingObjects[Index].D12Count++; + } + } + } + + // Sort all objects for short/paired displacements, which should be + // sufficient as it seems like all frame objects typically are within the + // long displacement range. Sorting works by computing the "density" as + // Count / ObjectSize. The comparisons of two such fractions are refactored + // by multiplying both sides with A.ObjectSize * B.ObjectSize, in order to + // eliminate the (fp) divisions. A higher density object needs to go after + // in the list in order for it to end up lower on the stack. + auto CmpD12 = [](const SZFrameSortingObj &A, const SZFrameSortingObj &B) { + // Put all invalid and variable sized objects at the end. + if (!A.IsValid || !B.IsValid) + return A.IsValid; + if (!A.ObjectSize || !B.ObjectSize) + return A.ObjectSize > 0; + uint64_t ADensityCmp = A.D12Count * B.ObjectSize; + uint64_t BDensityCmp = B.D12Count * A.ObjectSize; + if (ADensityCmp != BDensityCmp) + return ADensityCmp < BDensityCmp; + return A.DPairCount * B.ObjectSize < B.DPairCount * A.ObjectSize; + }; + std::stable_sort(SortingObjects.begin(), SortingObjects.end(), CmpD12); + + // Now modify the original list to represent the final order that + // we want. + unsigned Idx = 0; + for (auto &Obj : SortingObjects) { + // All invalid items are sorted at the end, so it's safe to stop. + if (!Obj.IsValid) + break; + ObjectsToAllocate[Idx++] = Obj.ObjectIndex; + } +} + bool SystemZFrameLowering::hasReservedCallFrame( const MachineFunction &MF) const { // The ELF ABI requires us to allocate 160 bytes of stack space for the diff --git a/llvm/lib/Target/SystemZ/SystemZFrameLowering.h b/llvm/lib/Target/SystemZ/SystemZFrameLowering.h index 3a1af888d8f9..2b3d7efed53b 100644 --- a/llvm/lib/Target/SystemZ/SystemZFrameLowering.h +++ b/llvm/lib/Target/SystemZ/SystemZFrameLowering.h @@ -77,6 +77,9 @@ public: bool hasFP(const MachineFunction &MF) const override; StackOffset getFrameIndexReference(const MachineFunction &MF, int FI, Register &FrameReg) const override; + void + orderFrameObjects(const MachineFunction &MF, + SmallVectorImpl<int> &ObjectsToAllocate) const override; // Return the byte offset from the incoming stack pointer of Reg's // ABI-defined save slot. Return 0 if no slot is defined for Reg. Adjust diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.h b/llvm/lib/Target/SystemZ/SystemZISelLowering.h index a8ddb8c62d18..de446f33f5f1 100644 --- a/llvm/lib/Target/SystemZ/SystemZISelLowering.h +++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.h @@ -443,6 +443,11 @@ public: EVT VT) const override; bool isFPImmLegal(const APFloat &Imm, EVT VT, bool ForCodeSize) const override; + bool ShouldShrinkFPConstant(EVT VT) const override { + // Do not shrink 64-bit FP constpool entries since LDEB is slower than + // LD, and having the full constant in memory enables reg/mem opcodes. + return VT != MVT::f64; + } bool hasInlineStackProbe(MachineFunction &MF) const override; bool isLegalICmpImmediate(int64_t Imm) const override; bool isLegalAddImmediate(int64_t Imm) const override; diff --git a/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp b/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp index 6db9bf3056b7..4b6aa60f5d55 100644 --- a/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp +++ b/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp @@ -1652,6 +1652,13 @@ unsigned SystemZInstrInfo::getOpcodeForOffset(unsigned Opcode, return 0; } +bool SystemZInstrInfo::hasDisplacementPairInsn(unsigned Opcode) const { + const MCInstrDesc &MCID = get(Opcode); + if (MCID.TSFlags & SystemZII::Has20BitOffset) + return SystemZ::getDisp12Opcode(Opcode) >= 0; + return SystemZ::getDisp20Opcode(Opcode) >= 0; +} + unsigned SystemZInstrInfo::getLoadAndTest(unsigned Opcode) const { switch (Opcode) { case SystemZ::L: return SystemZ::LT; diff --git a/llvm/lib/Target/SystemZ/SystemZInstrInfo.h b/llvm/lib/Target/SystemZ/SystemZInstrInfo.h index 396f56c7f59c..9e5b2729a707 100644 --- a/llvm/lib/Target/SystemZ/SystemZInstrInfo.h +++ b/llvm/lib/Target/SystemZ/SystemZInstrInfo.h @@ -312,6 +312,9 @@ public: // exists. unsigned getOpcodeForOffset(unsigned Opcode, int64_t Offset) const; + // Return true if Opcode has a mapping in 12 <-> 20 bit displacements. + bool hasDisplacementPairInsn(unsigned Opcode) const; + // If Opcode is a load instruction that has a LOAD AND TEST form, // return the opcode for the testing form, otherwise return 0. unsigned getLoadAndTest(unsigned Opcode) const; diff --git a/llvm/lib/Target/WebAssembly/Utils/WebAssemblyTypeUtilities.cpp b/llvm/lib/Target/WebAssembly/Utils/WebAssemblyTypeUtilities.cpp index 0412e524f800..0f1655718481 100644 --- a/llvm/lib/Target/WebAssembly/Utils/WebAssemblyTypeUtilities.cpp +++ b/llvm/lib/Target/WebAssembly/Utils/WebAssemblyTypeUtilities.cpp @@ -167,3 +167,41 @@ wasm::ValType WebAssembly::regClassToValType(unsigned RC) { llvm_unreachable("unexpected type"); } } + +void WebAssembly::wasmSymbolSetType(MCSymbolWasm *Sym, const Type *GlobalVT, + const SmallVector<MVT, 1> &VTs) { + assert(!Sym->getType()); + + // Tables are represented as Arrays in LLVM IR therefore + // they reach this point as aggregate Array types with an element type + // that is a reference type. + wasm::ValType Type; + bool IsTable = false; + if (GlobalVT->isArrayTy() && + WebAssembly::isRefType(GlobalVT->getArrayElementType())) { + MVT VT; + IsTable = true; + switch (GlobalVT->getArrayElementType()->getPointerAddressSpace()) { + case WebAssembly::WasmAddressSpace::WASM_ADDRESS_SPACE_FUNCREF: + VT = MVT::funcref; + break; + case WebAssembly::WasmAddressSpace::WASM_ADDRESS_SPACE_EXTERNREF: + VT = MVT::externref; + break; + default: + report_fatal_error("unhandled address space type"); + } + Type = WebAssembly::toValType(VT); + } else if (VTs.size() == 1) { + Type = WebAssembly::toValType(VTs[0]); + } else + report_fatal_error("Aggregate globals not yet implemented"); + + if (IsTable) { + Sym->setType(wasm::WASM_SYMBOL_TYPE_TABLE); + Sym->setTableType(Type); + } else { + Sym->setType(wasm::WASM_SYMBOL_TYPE_GLOBAL); + Sym->setGlobalType(wasm::WasmGlobalType{uint8_t(Type), /*Mutable=*/true}); + } +} diff --git a/llvm/lib/Target/WebAssembly/Utils/WebAssemblyTypeUtilities.h b/llvm/lib/Target/WebAssembly/Utils/WebAssemblyTypeUtilities.h index 042d51c7d6cb..cdb95d48398d 100644 --- a/llvm/lib/Target/WebAssembly/Utils/WebAssemblyTypeUtilities.h +++ b/llvm/lib/Target/WebAssembly/Utils/WebAssemblyTypeUtilities.h @@ -17,6 +17,8 @@ #include "llvm/ADT/Optional.h" #include "llvm/BinaryFormat/Wasm.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/MC/MCSymbolWasm.h" #include "llvm/Support/MachineValueType.h" namespace llvm { @@ -41,6 +43,43 @@ enum class BlockType : unsigned { Multivalue = 0xffff, }; +enum WasmAddressSpace : unsigned { + // Default address space, for pointers to linear memory (stack, heap, data). + WASM_ADDRESS_SPACE_DEFAULT = 0, + // A non-integral address space for pointers to named objects outside of + // linear memory: WebAssembly globals or WebAssembly locals. Loads and stores + // to these pointers are lowered to global.get / global.set or local.get / + // local.set, as appropriate. + WASM_ADDRESS_SPACE_VAR = 1, + // A non-integral address space for externref values + WASM_ADDRESS_SPACE_EXTERNREF = 10, + // A non-integral address space for funcref values + WASM_ADDRESS_SPACE_FUNCREF = 20, +}; + +inline bool isDefaultAddressSpace(unsigned AS) { + return AS == WASM_ADDRESS_SPACE_DEFAULT; +} +inline bool isWasmVarAddressSpace(unsigned AS) { + return AS == WASM_ADDRESS_SPACE_VAR; +} +inline bool isValidAddressSpace(unsigned AS) { + return isDefaultAddressSpace(AS) || isWasmVarAddressSpace(AS); +} +inline bool isFuncrefType(const Type *Ty) { + return isa<PointerType>(Ty) && + Ty->getPointerAddressSpace() == + WasmAddressSpace::WASM_ADDRESS_SPACE_FUNCREF; +} +inline bool isExternrefType(const Type *Ty) { + return isa<PointerType>(Ty) && + Ty->getPointerAddressSpace() == + WasmAddressSpace::WASM_ADDRESS_SPACE_EXTERNREF; +} +inline bool isRefType(const Type *Ty) { + return isFuncrefType(Ty) || isExternrefType(Ty); +} + // Convert StringRef to ValType / HealType / BlockType Optional<wasm::ValType> parseType(StringRef Type); @@ -68,6 +107,10 @@ wasm::ValType toValType(MVT Type); // Convert a register class to a wasm ValType. wasm::ValType regClassToValType(unsigned RC); +/// Sets a Wasm Symbol Type. +void wasmSymbolSetType(MCSymbolWasm *Sym, const Type *GlobalVT, + const SmallVector<MVT, 1> &VTs); + } // end namespace WebAssembly } // end namespace llvm diff --git a/llvm/lib/Target/WebAssembly/Utils/WebAssemblyUtilities.h b/llvm/lib/Target/WebAssembly/Utils/WebAssemblyUtilities.h index 57e40f6cd8d7..cdfc758db7ac 100644 --- a/llvm/lib/Target/WebAssembly/Utils/WebAssemblyUtilities.h +++ b/llvm/lib/Target/WebAssembly/Utils/WebAssemblyUtilities.h @@ -15,7 +15,6 @@ #ifndef LLVM_LIB_TARGET_WEBASSEMBLY_UTILS_WEBASSEMBLYUTILITIES_H #define LLVM_LIB_TARGET_WEBASSEMBLY_UTILS_WEBASSEMBLYUTILITIES_H -#include "llvm/IR/DerivedTypes.h" #include "llvm/Support/CommandLine.h" namespace llvm { @@ -30,43 +29,6 @@ class WebAssemblySubtarget; namespace WebAssembly { -enum WasmAddressSpace : unsigned { - // Default address space, for pointers to linear memory (stack, heap, data). - WASM_ADDRESS_SPACE_DEFAULT = 0, - // A non-integral address space for pointers to named objects outside of - // linear memory: WebAssembly globals or WebAssembly locals. Loads and stores - // to these pointers are lowered to global.get / global.set or local.get / - // local.set, as appropriate. - WASM_ADDRESS_SPACE_VAR = 1, - // A non-integral address space for externref values - WASM_ADDRESS_SPACE_EXTERNREF = 10, - // A non-integral address space for funcref values - WASM_ADDRESS_SPACE_FUNCREF = 20, -}; - -inline bool isDefaultAddressSpace(unsigned AS) { - return AS == WASM_ADDRESS_SPACE_DEFAULT; -} -inline bool isWasmVarAddressSpace(unsigned AS) { - return AS == WASM_ADDRESS_SPACE_VAR; -} -inline bool isValidAddressSpace(unsigned AS) { - return isDefaultAddressSpace(AS) || isWasmVarAddressSpace(AS); -} -inline bool isFuncrefType(const Type *Ty) { - return isa<PointerType>(Ty) && - Ty->getPointerAddressSpace() == - WasmAddressSpace::WASM_ADDRESS_SPACE_FUNCREF; -} -inline bool isExternrefType(const Type *Ty) { - return isa<PointerType>(Ty) && - Ty->getPointerAddressSpace() == - WasmAddressSpace::WASM_ADDRESS_SPACE_EXTERNREF; -} -inline bool isRefType(const Type *Ty) { - return isFuncrefType(Ty) || isExternrefType(Ty); -} - bool isChild(const MachineInstr &MI, const WebAssemblyFunctionInfo &MFI); bool mayThrow(const MachineInstr &MI); diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp index e3af6b2662ef..bf326e5106be 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp @@ -181,17 +181,11 @@ void WebAssemblyAsmPrinter::emitGlobalVariable(const GlobalVariable *GV) { if (!Sym->getType()) { const WebAssemblyTargetLowering &TLI = *Subtarget->getTargetLowering(); - SmallVector<EVT, 1> VTs; - ComputeValueVTs(TLI, GV->getParent()->getDataLayout(), GV->getValueType(), - VTs); - if (VTs.size() != 1 || - TLI.getNumRegisters(GV->getParent()->getContext(), VTs[0]) != 1) - report_fatal_error("Aggregate globals not yet implemented"); - MVT VT = TLI.getRegisterType(GV->getParent()->getContext(), VTs[0]); - bool Mutable = true; - wasm::ValType Type = WebAssembly::toValType(VT); - Sym->setType(wasm::WASM_SYMBOL_TYPE_GLOBAL); - Sym->setGlobalType(wasm::WasmGlobalType{uint8_t(Type), Mutable}); + SmallVector<MVT, 1> VTs; + Type *GlobalVT = GV->getValueType(); + computeLegalValueVTs(TLI, GV->getParent()->getContext(), + GV->getParent()->getDataLayout(), GlobalVT, VTs); + WebAssembly::wasmSymbolSetType(Sym, GlobalVT, VTs); } // If the GlobalVariable refers to a table, we handle it here instead of diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyFastISel.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyFastISel.cpp index 406edef8ff3f..8ddd414b043a 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyFastISel.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyFastISel.cpp @@ -16,6 +16,7 @@ //===----------------------------------------------------------------------===// #include "MCTargetDesc/WebAssemblyMCTargetDesc.h" +#include "Utils/WebAssemblyTypeUtilities.h" #include "Utils/WebAssemblyUtilities.h" #include "WebAssembly.h" #include "WebAssemblyMachineFunctionInfo.h" diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyFrameLowering.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyFrameLowering.cpp index c45f7d7176b5..01baa3d9389d 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyFrameLowering.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyFrameLowering.cpp @@ -19,7 +19,7 @@ #include "WebAssemblyFrameLowering.h" #include "MCTargetDesc/WebAssemblyMCTargetDesc.h" -#include "Utils/WebAssemblyUtilities.h" +#include "Utils/WebAssemblyTypeUtilities.h" #include "WebAssembly.h" #include "WebAssemblyInstrInfo.h" #include "WebAssemblyMachineFunctionInfo.h" diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyLowerEmscriptenEHSjLj.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyLowerEmscriptenEHSjLj.cpp index fe656753889f..b6c43be03aba 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyLowerEmscriptenEHSjLj.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyLowerEmscriptenEHSjLj.cpp @@ -560,6 +560,9 @@ Value *WebAssemblyLowerEmscriptenEHSjLj::wrapInvoke(CallBase *CI) { NEltArg = NEltArg.getValue() + 1; FnAttrs.addAllocSizeAttr(SizeArg, NEltArg); } + // In case the callee has 'noreturn' attribute, We need to remove it, because + // we expect invoke wrappers to return. + FnAttrs.removeAttribute(Attribute::NoReturn); // Reconstruct the AttributesList based on the vector we constructed. AttributeList NewCallAL = AttributeList::get( @@ -630,9 +633,9 @@ static bool canLongjmp(const Value *Callee) { // Exception-catching related functions // - // We intentionally excluded __cxa_end_catch here even though it surely cannot - // longjmp, in order to maintain the unwind relationship from all existing - // catchpads (and calls within them) to catch.dispatch.longjmp. + // We intentionally treat __cxa_end_catch longjmpable in Wasm SjLj even though + // it surely cannot longjmp, in order to maintain the unwind relationship from + // all existing catchpads (and calls within them) to catch.dispatch.longjmp. // // In Wasm EH + Wasm SjLj, we // 1. Make all catchswitch and cleanuppad that unwind to caller unwind to @@ -663,6 +666,8 @@ static bool canLongjmp(const Value *Callee) { // // The comment block in findWasmUnwindDestinations() in // SelectionDAGBuilder.cpp is addressing a similar problem. + if (CalleeName == "__cxa_end_catch") + return WebAssembly::WasmEnableSjLj; if (CalleeName == "__cxa_begin_catch" || CalleeName == "__cxa_allocate_exception" || CalleeName == "__cxa_throw" || CalleeName == "__clang_call_terminate") @@ -869,15 +874,17 @@ static void nullifySetjmp(Function *F) { Function *SetjmpF = M.getFunction("setjmp"); SmallVector<Instruction *, 1> ToErase; - for (User *U : SetjmpF->users()) { - auto *CI = dyn_cast<CallInst>(U); - // FIXME 'invoke' to setjmp can happen when we use Wasm EH + Wasm SjLj, but - // we don't support two being used together yet. - if (!CI) - report_fatal_error("Wasm EH + Wasm SjLj is not fully supported yet"); - BasicBlock *BB = CI->getParent(); + for (User *U : make_early_inc_range(SetjmpF->users())) { + auto *CB = cast<CallBase>(U); + BasicBlock *BB = CB->getParent(); if (BB->getParent() != F) // in other function continue; + CallInst *CI = nullptr; + // setjmp cannot throw. So if it is an invoke, lower it to a call + if (auto *II = dyn_cast<InvokeInst>(CB)) + CI = llvm::changeToCall(II); + else + CI = cast<CallInst>(CB); ToErase.push_back(CI); CI->replaceAllUsesWith(IRB.getInt32(0)); } @@ -1313,10 +1320,13 @@ bool WebAssemblyLowerEmscriptenEHSjLj::runSjLjOnFunction(Function &F) { SmallVector<PHINode *, 4> SetjmpRetPHIs; Function *SetjmpF = M.getFunction("setjmp"); for (auto *U : make_early_inc_range(SetjmpF->users())) { - auto *CB = dyn_cast<CallBase>(U); + auto *CB = cast<CallBase>(U); BasicBlock *BB = CB->getParent(); if (BB->getParent() != &F) // in other function continue; + if (CB->getOperandBundle(LLVMContext::OB_funclet)) + report_fatal_error( + "setjmp within a catch clause is not supported in Wasm EH"); CallInst *CI = nullptr; // setjmp cannot throw. So if it is an invoke, lower it to a call @@ -1815,10 +1825,10 @@ void WebAssemblyLowerEmscriptenEHSjLj::handleLongjmpableCallsForWasmSjLj( BasicBlock *UnwindDest = nullptr; if (auto Bundle = CI->getOperandBundle(LLVMContext::OB_funclet)) { Instruction *FromPad = cast<Instruction>(Bundle->Inputs[0]); - while (!UnwindDest && FromPad) { + while (!UnwindDest) { if (auto *CPI = dyn_cast<CatchPadInst>(FromPad)) { UnwindDest = CPI->getCatchSwitch()->getUnwindDest(); - FromPad = nullptr; // stop searching + break; } else if (auto *CPI = dyn_cast<CleanupPadInst>(FromPad)) { // getCleanupRetUnwindDest() can return nullptr when // 1. This cleanuppad's matching cleanupret uwninds to caller @@ -1826,7 +1836,10 @@ void WebAssemblyLowerEmscriptenEHSjLj::handleLongjmpableCallsForWasmSjLj( // unreachable. // In case of 2, we need to traverse the parent pad chain. UnwindDest = getCleanupRetUnwindDest(CPI); - FromPad = cast<Instruction>(CPI->getParentPad()); + Value *ParentPad = CPI->getParentPad(); + if (isa<ConstantTokenNone>(ParentPad)) + break; + FromPad = cast<Instruction>(ParentPad); } } } diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyLowerRefTypesIntPtrConv.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyLowerRefTypesIntPtrConv.cpp index 8ff916c28c4e..6fd87f10150d 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyLowerRefTypesIntPtrConv.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyLowerRefTypesIntPtrConv.cpp @@ -14,7 +14,7 @@ /// //===----------------------------------------------------------------------===// -#include "Utils/WebAssemblyUtilities.h" +#include "Utils/WebAssemblyTypeUtilities.h" #include "WebAssembly.h" #include "WebAssemblySubtarget.h" #include "llvm/IR/InstIterator.h" diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyMCInstLower.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyMCInstLower.cpp index 09bccef17ab0..2e6027a5605c 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyMCInstLower.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyMCInstLower.cpp @@ -59,39 +59,7 @@ WebAssemblyMCInstLower::GetGlobalAddressSymbol(const MachineOperand &MO) const { SmallVector<MVT, 1> VTs; computeLegalValueVTs(CurrentFunc, TM, GlobalVT, VTs); - // Tables are represented as Arrays in LLVM IR therefore - // they reach this point as aggregate Array types with an element type - // that is a reference type. - wasm::ValType Type; - bool IsTable = false; - if (GlobalVT->isArrayTy() && - WebAssembly::isRefType(GlobalVT->getArrayElementType())) { - MVT VT; - IsTable = true; - switch (GlobalVT->getArrayElementType()->getPointerAddressSpace()) { - case WebAssembly::WasmAddressSpace::WASM_ADDRESS_SPACE_FUNCREF: - VT = MVT::funcref; - break; - case WebAssembly::WasmAddressSpace::WASM_ADDRESS_SPACE_EXTERNREF: - VT = MVT::externref; - break; - default: - report_fatal_error("unhandled address space type"); - } - Type = WebAssembly::toValType(VT); - } else if (VTs.size() == 1) { - Type = WebAssembly::toValType(VTs[0]); - } else - report_fatal_error("Aggregate globals not yet implemented"); - - if (IsTable) { - WasmSym->setType(wasm::WASM_SYMBOL_TYPE_TABLE); - WasmSym->setTableType(Type); - } else { - WasmSym->setType(wasm::WASM_SYMBOL_TYPE_GLOBAL); - WasmSym->setGlobalType( - wasm::WasmGlobalType{uint8_t(Type), /*Mutable=*/true}); - } + WebAssembly::wasmSymbolSetType(WasmSym, GlobalVT, VTs); } return WasmSym; } diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.cpp index 00b11321fdb2..ea80e96d50de 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.cpp @@ -30,22 +30,28 @@ void WebAssemblyFunctionInfo::initWARegs(MachineRegisterInfo &MRI) { WARegs.resize(MRI.getNumVirtRegs(), Reg); } -void llvm::computeLegalValueVTs(const Function &F, const TargetMachine &TM, +void llvm::computeLegalValueVTs(const WebAssemblyTargetLowering &TLI, + LLVMContext &Ctx, const DataLayout &DL, Type *Ty, SmallVectorImpl<MVT> &ValueVTs) { - const DataLayout &DL(F.getParent()->getDataLayout()); - const WebAssemblyTargetLowering &TLI = - *TM.getSubtarget<WebAssemblySubtarget>(F).getTargetLowering(); SmallVector<EVT, 4> VTs; ComputeValueVTs(TLI, DL, Ty, VTs); for (EVT VT : VTs) { - unsigned NumRegs = TLI.getNumRegisters(F.getContext(), VT); - MVT RegisterVT = TLI.getRegisterType(F.getContext(), VT); + unsigned NumRegs = TLI.getNumRegisters(Ctx, VT); + MVT RegisterVT = TLI.getRegisterType(Ctx, VT); for (unsigned I = 0; I != NumRegs; ++I) ValueVTs.push_back(RegisterVT); } } +void llvm::computeLegalValueVTs(const Function &F, const TargetMachine &TM, + Type *Ty, SmallVectorImpl<MVT> &ValueVTs) { + const DataLayout &DL(F.getParent()->getDataLayout()); + const WebAssemblyTargetLowering &TLI = + *TM.getSubtarget<WebAssemblySubtarget>(F).getTargetLowering(); + computeLegalValueVTs(TLI, F.getContext(), DL, Ty, ValueVTs); +} + void llvm::computeSignatureVTs(const FunctionType *Ty, const Function *TargetFunc, const Function &ContextFunc, diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.h b/llvm/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.h index 3fa2d0c8a2f2..413d0d1dc554 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.h +++ b/llvm/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.h @@ -166,6 +166,10 @@ public: void setWasmEHFuncInfo(WasmEHFuncInfo *Info) { WasmEHInfo = Info; } }; +void computeLegalValueVTs(const WebAssemblyTargetLowering &TLI, + LLVMContext &Ctx, const DataLayout &DL, Type *Ty, + SmallVectorImpl<MVT> &ValueVTs); + void computeLegalValueVTs(const Function &F, const TargetMachine &TM, Type *Ty, SmallVectorImpl<MVT> &ValueVTs); diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index aff72452af6c..90753b5b4d33 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -805,8 +805,9 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::FMAXNUM, MVT::f80, Expand); // Some FP actions are always expanded for vector types. - for (auto VT : { MVT::v4f32, MVT::v8f32, MVT::v16f32, - MVT::v2f64, MVT::v4f64, MVT::v8f64 }) { + for (auto VT : { MVT::v8f16, MVT::v16f16, MVT::v32f16, + MVT::v4f32, MVT::v8f32, MVT::v16f32, + MVT::v2f64, MVT::v4f64, MVT::v8f64 }) { setOperationAction(ISD::FSIN, VT, Expand); setOperationAction(ISD::FSINCOS, VT, Expand); setOperationAction(ISD::FCOS, VT, Expand); @@ -1094,13 +1095,10 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, if (VT == MVT::v2i64) continue; setOperationAction(ISD::ROTL, VT, Custom); setOperationAction(ISD::ROTR, VT, Custom); + setOperationAction(ISD::FSHL, VT, Custom); + setOperationAction(ISD::FSHR, VT, Custom); } - setOperationAction(ISD::FSHL, MVT::v16i8, Custom); - setOperationAction(ISD::FSHR, MVT::v16i8, Custom); - setOperationAction(ISD::FSHL, MVT::v4i32, Custom); - setOperationAction(ISD::FSHR, MVT::v4i32, Custom); - setOperationAction(ISD::STRICT_FSQRT, MVT::v2f64, Legal); setOperationAction(ISD::STRICT_FADD, MVT::v2f64, Legal); setOperationAction(ISD::STRICT_FSUB, MVT::v2f64, Legal); @@ -1958,6 +1956,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, // AVX512_FP16 scalar operations setGroup(MVT::f16); addRegisterClass(MVT::f16, &X86::FR16XRegClass); + setOperationAction(ISD::FREM, MVT::f16, Promote); + setOperationAction(ISD::STRICT_FREM, MVT::f16, Promote); setOperationAction(ISD::SELECT_CC, MVT::f16, Expand); setOperationAction(ISD::BR_CC, MVT::f16, Expand); setOperationAction(ISD::SETCC, MVT::f16, Custom); @@ -12571,6 +12571,8 @@ static SDValue lowerShuffleAsBlend(const SDLoc &DL, MVT VT, SDValue V1, if (ForceV2Zero) V2 = getZeroVector(VT, Subtarget, DAG, DL); + unsigned NumElts = VT.getVectorNumElements(); + switch (VT.SimpleTy) { case MVT::v4i64: case MVT::v8i32: @@ -12629,8 +12631,7 @@ static SDValue lowerShuffleAsBlend(const SDLoc &DL, MVT VT, SDValue V1, return Masked; if (Subtarget.hasBWI() && Subtarget.hasVLX()) { - MVT IntegerType = - MVT::getIntegerVT(std::max((int)VT.getVectorNumElements(), 8)); + MVT IntegerType = MVT::getIntegerVT(std::max<unsigned>(NumElts, 8)); SDValue MaskNode = DAG.getConstant(BlendMask, DL, IntegerType); return getVectorMaskingNode(V2, MaskNode, V1, Subtarget, DAG); } @@ -12699,8 +12700,7 @@ static SDValue lowerShuffleAsBlend(const SDLoc &DL, MVT VT, SDValue V1, // Otherwise load an immediate into a GPR, cast to k-register, and use a // masked move. - MVT IntegerType = - MVT::getIntegerVT(std::max((int)VT.getVectorNumElements(), 8)); + MVT IntegerType = MVT::getIntegerVT(std::max<unsigned>(NumElts, 8)); SDValue MaskNode = DAG.getConstant(BlendMask, DL, IntegerType); return getVectorMaskingNode(V2, MaskNode, V1, Subtarget, DAG); } @@ -29843,7 +29843,8 @@ static SDValue LowerFunnelShift(SDValue Op, const X86Subtarget &Subtarget, {Op0, Op1, Amt}, DAG, Subtarget); } assert((VT == MVT::v16i8 || VT == MVT::v32i8 || VT == MVT::v64i8 || - VT == MVT::v4i32 || VT == MVT::v8i32 || VT == MVT::v16i32) && + VT == MVT::v8i16 || VT == MVT::v4i32 || VT == MVT::v8i32 || + VT == MVT::v16i32) && "Unexpected funnel shift type!"); // fshl(x,y,z) -> unpack(y,x) << (z & (bw-1))) >> bw. @@ -29855,6 +29856,10 @@ static SDValue LowerFunnelShift(SDValue Op, const X86Subtarget &Subtarget, SDValue AmtMod = DAG.getNode(ISD::AND, DL, VT, Amt, AmtMask); bool IsCst = ISD::isBuildVectorOfConstantSDNodes(AmtMod.getNode()); + // Constant vXi16 funnel shifts can be efficiently handled by default. + if (IsCst && EltSizeInBits == 16) + return SDValue(); + unsigned ShiftOpc = IsFSHR ? ISD::SRL : ISD::SHL; unsigned NumElts = VT.getVectorNumElements(); MVT ExtSVT = MVT::getIntegerVT(2 * EltSizeInBits); @@ -29874,6 +29879,10 @@ static SDValue LowerFunnelShift(SDValue Op, const X86Subtarget &Subtarget, // Attempt to fold scalar shift as unpack(y,x) << zext(splat(z)) if (supportedVectorShiftWithBaseAmnt(ExtVT, Subtarget, ShiftOpc)) { if (SDValue ScalarAmt = DAG.getSplatValue(AmtMod)) { + // Uniform vXi16 funnel shifts can be efficiently handled by default. + if (EltSizeInBits == 16) + return SDValue(); + SDValue Lo = DAG.getBitcast(ExtVT, getUnpackl(DAG, DL, VT, Op1, Op0)); SDValue Hi = DAG.getBitcast(ExtVT, getUnpackh(DAG, DL, VT, Op1, Op0)); ScalarAmt = DAG.getZExtOrTrunc(ScalarAmt, DL, MVT::i32); @@ -29912,7 +29921,7 @@ static SDValue LowerFunnelShift(SDValue Op, const X86Subtarget &Subtarget, } // Attempt to fold per-element (ExtVT) shift as unpack(y,x) << zext(z) - if ((IsCst && !IsFSHR && EltSizeInBits == 8) || + if (((IsCst || !Subtarget.hasAVX512()) && !IsFSHR && EltSizeInBits <= 16) || supportedVectorVarShift(ExtVT, Subtarget, ShiftOpc)) { SDValue Z = DAG.getConstant(0, DL, VT); SDValue RLo = DAG.getBitcast(ExtVT, getUnpackl(DAG, DL, VT, Op1, Op0)); @@ -36477,9 +36486,8 @@ static SDValue narrowLoadToVZLoad(LoadSDNode *LN, MVT MemVT, MVT VT, // TODO: Investigate sharing more of this with shuffle lowering. static bool matchUnaryShuffle(MVT MaskVT, ArrayRef<int> Mask, bool AllowFloatDomain, bool AllowIntDomain, - SDValue &V1, const SDLoc &DL, SelectionDAG &DAG, - const X86Subtarget &Subtarget, unsigned &Shuffle, - MVT &SrcVT, MVT &DstVT) { + SDValue V1, const X86Subtarget &Subtarget, + unsigned &Shuffle, MVT &SrcVT, MVT &DstVT) { unsigned NumMaskElts = Mask.size(); unsigned MaskEltSize = MaskVT.getScalarSizeInBits(); @@ -36522,9 +36530,6 @@ static bool matchUnaryShuffle(MVT MaskVT, ArrayRef<int> Mask, MVT::getIntegerVT(MaskEltSize); SrcVT = MVT::getVectorVT(ScalarTy, SrcSize / MaskEltSize); - if (SrcVT.getSizeInBits() != MaskVT.getSizeInBits()) - V1 = extractSubVector(V1, 0, DAG, DL, SrcSize); - Shuffle = unsigned(MatchAny ? ISD::ANY_EXTEND : ISD::ZERO_EXTEND); if (SrcVT.getVectorNumElements() != NumDstElts) Shuffle = getOpcode_EXTEND_VECTOR_INREG(Shuffle); @@ -37102,6 +37107,7 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root, assert((Inputs.size() == 1 || Inputs.size() == 2) && "Unexpected number of shuffle inputs!"); + SDLoc DL(Root); MVT RootVT = Root.getSimpleValueType(); unsigned RootSizeInBits = RootVT.getSizeInBits(); unsigned NumRootElts = RootVT.getVectorNumElements(); @@ -37109,6 +37115,8 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root, // Canonicalize shuffle input op to the requested type. // TODO: Support cases where Op is smaller than VT. auto CanonicalizeShuffleInput = [&](MVT VT, SDValue Op) { + if (VT.getSizeInBits() < Op.getValueSizeInBits()) + Op = extractSubVector(Op, 0, DAG, DL, VT.getSizeInBits()); return DAG.getBitcast(VT, Op); }; @@ -37124,7 +37132,6 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root, assert(VT1.getSizeInBits() == RootSizeInBits && VT2.getSizeInBits() == RootSizeInBits && "Vector size mismatch"); - SDLoc DL(Root); SDValue Res; unsigned NumBaseMaskElts = BaseMask.size(); @@ -37393,15 +37400,13 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root, } } - SDValue NewV1 = V1; // Save operand in case early exit happens. - if (matchUnaryShuffle(MaskVT, Mask, AllowFloatDomain, AllowIntDomain, NewV1, - DL, DAG, Subtarget, Shuffle, ShuffleSrcVT, - ShuffleVT) && + if (matchUnaryShuffle(MaskVT, Mask, AllowFloatDomain, AllowIntDomain, V1, + Subtarget, Shuffle, ShuffleSrcVT, ShuffleVT) && (!IsMaskedShuffle || (NumRootElts == ShuffleVT.getVectorNumElements()))) { if (Depth == 0 && Root.getOpcode() == Shuffle) return SDValue(); // Nothing to do! - Res = CanonicalizeShuffleInput(ShuffleSrcVT, NewV1); + Res = CanonicalizeShuffleInput(ShuffleSrcVT, V1); Res = DAG.getNode(Shuffle, DL, ShuffleVT, Res); return DAG.getBitcast(RootVT, Res); } @@ -40903,6 +40908,28 @@ bool X86TargetLowering::SimplifyDemandedBitsForTargetNode( Known.One.setHighBits(ShAmt); return false; } + case X86ISD::BLENDV: { + SDValue Sel = Op.getOperand(0); + SDValue LHS = Op.getOperand(1); + SDValue RHS = Op.getOperand(2); + + APInt SignMask = APInt::getSignMask(BitWidth); + SDValue NewSel = SimplifyMultipleUseDemandedBits( + Sel, SignMask, OriginalDemandedElts, TLO.DAG, Depth + 1); + SDValue NewLHS = SimplifyMultipleUseDemandedBits( + LHS, OriginalDemandedBits, OriginalDemandedElts, TLO.DAG, Depth + 1); + SDValue NewRHS = SimplifyMultipleUseDemandedBits( + RHS, OriginalDemandedBits, OriginalDemandedElts, TLO.DAG, Depth + 1); + + if (NewSel || NewLHS || NewRHS) { + NewSel = NewSel ? NewSel : Sel; + NewLHS = NewLHS ? NewLHS : LHS; + NewRHS = NewRHS ? NewRHS : RHS; + return TLO.CombineTo(Op, TLO.DAG.getNode(X86ISD::BLENDV, SDLoc(Op), VT, + NewSel, NewLHS, NewRHS)); + } + break; + } case X86ISD::PEXTRB: case X86ISD::PEXTRW: { SDValue Vec = Op.getOperand(0); @@ -41043,6 +41070,13 @@ bool X86TargetLowering::SimplifyDemandedBitsForTargetNode( if (OriginalDemandedBits.countTrailingZeros() >= NumElts) return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT)); + // See if we only demand bits from the lower 128-bit vector. + if (SrcVT.is256BitVector() && + OriginalDemandedBits.getActiveBits() <= (NumElts / 2)) { + SDValue NewSrc = extract128BitVector(Src, 0, TLO.DAG, SDLoc(Src)); + return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, SDLoc(Op), VT, NewSrc)); + } + // Only demand the vector elements of the sign bits we need. APInt KnownUndef, KnownZero; APInt DemandedElts = OriginalDemandedBits.zextOrTrunc(NumElts); @@ -42238,19 +42272,14 @@ static SDValue combinePredicateReduction(SDNode *Extract, SelectionDAG &DAG, EVT MovmskVT = EVT::getIntegerVT(*DAG.getContext(), NumElts); Movmsk = DAG.getBitcast(MovmskVT, Match); } else { - // For all_of(setcc(x,y,eq)) - // - avoid vXi64 comparisons without PCMPEQQ (SSE41+), use PCMPEQD. - // - avoid vXi16 comparisons, use PMOVMSKB(PCMPEQB()). + // For all_of(setcc(x,y,eq)) - use PMOVMSKB(PCMPEQB()). if (BinOp == ISD::AND && Match.getOpcode() == ISD::SETCC && cast<CondCodeSDNode>(Match.getOperand(2))->get() == ISD::CondCode::SETEQ) { - SDValue Vec = Match.getOperand(0); - EVT VecSVT = Vec.getValueType().getScalarType(); - if ((VecSVT == MVT::i16 && !Subtarget.hasBWI()) || - (VecSVT == MVT::i64 && !Subtarget.hasSSE41())) { - NumElts *= 2; - VecSVT = VecSVT.getHalfSizedIntegerVT(*DAG.getContext()); - EVT CmpVT = EVT::getVectorVT(*DAG.getContext(), VecSVT, NumElts); + EVT VecSVT = Match.getOperand(0).getValueType().getScalarType(); + if (VecSVT != MVT::i8) { + NumElts *= VecSVT.getSizeInBits() / 8; + EVT CmpVT = EVT::getVectorVT(*DAG.getContext(), MVT::i8, NumElts); MatchVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1, NumElts); Match = DAG.getSetCC( DL, MatchVT, DAG.getBitcast(CmpVT, Match.getOperand(0)), @@ -43079,6 +43108,38 @@ static SDValue combineExtractVectorElt(SDNode *N, SelectionDAG &DAG, } } + // If this extract is from a loaded vector value and will be used as an + // integer, that requires a potentially expensive XMM -> GPR transfer. + // Additionally, if we can convert to a scalar integer load, that will likely + // be folded into a subsequent integer op. + // Note: Unlike the related fold for this in DAGCombiner, this is not limited + // to a single-use of the loaded vector. For the reasons above, we + // expect this to be profitable even if it creates an extra load. + bool LikelyUsedAsVector = any_of(N->uses(), [](SDNode *Use) { + return Use->getOpcode() == ISD::STORE || + Use->getOpcode() == ISD::INSERT_VECTOR_ELT || + Use->getOpcode() == ISD::SCALAR_TO_VECTOR; + }); + auto *LoadVec = dyn_cast<LoadSDNode>(InputVector); + if (LoadVec && CIdx && ISD::isNormalLoad(LoadVec) && VT.isInteger() && + SrcVT.getVectorElementType() == VT && DCI.isAfterLegalizeDAG() && + !LikelyUsedAsVector) { + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + SDValue NewPtr = + TLI.getVectorElementPointer(DAG, LoadVec->getBasePtr(), SrcVT, EltIdx); + unsigned PtrOff = VT.getSizeInBits() * CIdx->getZExtValue() / 8; + MachinePointerInfo MPI = LoadVec->getPointerInfo().getWithOffset(PtrOff); + Align Alignment = commonAlignment(LoadVec->getAlign(), PtrOff); + SDValue Load = + DAG.getLoad(VT, dl, LoadVec->getChain(), NewPtr, MPI, Alignment, + LoadVec->getMemOperand()->getFlags(), LoadVec->getAAInfo()); + SDValue Chain = Load.getValue(1); + SDValue From[] = {SDValue(N, 0), SDValue(LoadVec, 1)}; + SDValue To[] = {Load, Chain}; + DAG.ReplaceAllUsesOfValuesWith(From, To, 2); + return SDValue(N, 0); + } + return SDValue(); } @@ -44467,8 +44528,8 @@ static SDValue combineSetCCMOVMSK(SDValue EFLAGS, X86::CondCode &CC, unsigned NumEltBits = VecVT.getScalarSizeInBits(); bool IsAnyOf = CmpOpcode == X86ISD::CMP && CmpVal.isZero(); - bool IsAllOf = CmpOpcode == X86ISD::SUB && NumElts <= CmpBits && - CmpVal.isMask(NumElts); + bool IsAllOf = (CmpOpcode == X86ISD::SUB || CmpOpcode == X86ISD::CMP) && + NumElts <= CmpBits && CmpVal.isMask(NumElts); if (!IsAnyOf && !IsAllOf) return SDValue(); @@ -44500,14 +44561,16 @@ static SDValue combineSetCCMOVMSK(SDValue EFLAGS, X86::CondCode &CC, // MOVMSK(CONCAT(X,Y)) != 0 -> MOVMSK(OR(X,Y)). // MOVMSK(CONCAT(X,Y)) == -1 -> MOVMSK(AND(X,Y)). // MOVMSK(CONCAT(X,Y)) != -1 -> MOVMSK(AND(X,Y)). - if (VecVT.is256BitVector()) { + if (VecVT.is256BitVector() && NumElts <= CmpBits) { SmallVector<SDValue> Ops; if (collectConcatOps(peekThroughBitcasts(Vec).getNode(), Ops) && Ops.size() == 2) { SDLoc DL(EFLAGS); - EVT SubVT = Ops[0].getValueType(); + EVT SubVT = Ops[0].getValueType().changeTypeToInteger(); APInt CmpMask = APInt::getLowBitsSet(32, IsAnyOf ? 0 : NumElts / 2); - SDValue V = DAG.getNode(IsAnyOf ? ISD::OR : ISD::AND, DL, SubVT, Ops); + SDValue V = DAG.getNode(IsAnyOf ? ISD::OR : ISD::AND, DL, SubVT, + DAG.getBitcast(SubVT, Ops[0]), + DAG.getBitcast(SubVT, Ops[1])); V = DAG.getBitcast(VecVT.getHalfNumVectorElementsVT(), V); return DAG.getNode(X86ISD::CMP, DL, MVT::i32, DAG.getNode(X86ISD::MOVMSK, DL, MVT::i32, V), @@ -44522,26 +44585,29 @@ static SDValue combineSetCCMOVMSK(SDValue EFLAGS, X86::CondCode &CC, if (IsAllOf && Subtarget.hasSSE41()) { MVT TestVT = VecVT.is128BitVector() ? MVT::v2i64 : MVT::v4i64; SDValue BC = peekThroughBitcasts(Vec); - if (BC.getOpcode() == X86ISD::PCMPEQ) { - SDValue V = DAG.getNode(ISD::SUB, SDLoc(BC), BC.getValueType(), - BC.getOperand(0), BC.getOperand(1)); - V = DAG.getBitcast(TestVT, V); - return DAG.getNode(X86ISD::PTEST, SDLoc(EFLAGS), MVT::i32, V, V); - } - // Check for 256-bit split vector cases. - if (BC.getOpcode() == ISD::AND && - BC.getOperand(0).getOpcode() == X86ISD::PCMPEQ && - BC.getOperand(1).getOpcode() == X86ISD::PCMPEQ) { - SDValue LHS = BC.getOperand(0); - SDValue RHS = BC.getOperand(1); - LHS = DAG.getNode(ISD::SUB, SDLoc(LHS), LHS.getValueType(), - LHS.getOperand(0), LHS.getOperand(1)); - RHS = DAG.getNode(ISD::SUB, SDLoc(RHS), RHS.getValueType(), - RHS.getOperand(0), RHS.getOperand(1)); - LHS = DAG.getBitcast(TestVT, LHS); - RHS = DAG.getBitcast(TestVT, RHS); - SDValue V = DAG.getNode(ISD::OR, SDLoc(EFLAGS), TestVT, LHS, RHS); - return DAG.getNode(X86ISD::PTEST, SDLoc(EFLAGS), MVT::i32, V, V); + // Ensure MOVMSK was testing every signbit of BC. + if (BC.getValueType().getVectorNumElements() <= NumElts) { + if (BC.getOpcode() == X86ISD::PCMPEQ) { + SDValue V = DAG.getNode(ISD::SUB, SDLoc(BC), BC.getValueType(), + BC.getOperand(0), BC.getOperand(1)); + V = DAG.getBitcast(TestVT, V); + return DAG.getNode(X86ISD::PTEST, SDLoc(EFLAGS), MVT::i32, V, V); + } + // Check for 256-bit split vector cases. + if (BC.getOpcode() == ISD::AND && + BC.getOperand(0).getOpcode() == X86ISD::PCMPEQ && + BC.getOperand(1).getOpcode() == X86ISD::PCMPEQ) { + SDValue LHS = BC.getOperand(0); + SDValue RHS = BC.getOperand(1); + LHS = DAG.getNode(ISD::SUB, SDLoc(LHS), LHS.getValueType(), + LHS.getOperand(0), LHS.getOperand(1)); + RHS = DAG.getNode(ISD::SUB, SDLoc(RHS), RHS.getValueType(), + RHS.getOperand(0), RHS.getOperand(1)); + LHS = DAG.getBitcast(TestVT, LHS); + RHS = DAG.getBitcast(TestVT, RHS); + SDValue V = DAG.getNode(ISD::OR, SDLoc(EFLAGS), TestVT, LHS, RHS); + return DAG.getNode(X86ISD::PTEST, SDLoc(EFLAGS), MVT::i32, V, V); + } } } @@ -44575,7 +44641,8 @@ static SDValue combineSetCCMOVMSK(SDValue EFLAGS, X86::CondCode &CC, if (SDValue Src = getSplitVectorSrc(VecOp0, VecOp1, true)) { SDLoc DL(EFLAGS); SDValue Result = peekThroughBitcasts(Src); - if (IsAllOf && Result.getOpcode() == X86ISD::PCMPEQ) { + if (IsAllOf && Result.getOpcode() == X86ISD::PCMPEQ && + Result.getValueType().getVectorNumElements() <= NumElts) { SDValue V = DAG.getNode(ISD::SUB, DL, Result.getValueType(), Result.getOperand(0), Result.getOperand(1)); V = DAG.getBitcast(MVT::v4i64, V); @@ -46840,14 +46907,18 @@ static SDValue combineAnd(SDNode *N, SelectionDAG &DAG, if (!getTargetConstantBitsFromNode(Op, EltSizeInBits, UndefElts, EltBits)) return false; + APInt DemandedBits = APInt::getZero(EltSizeInBits); APInt DemandedElts = APInt::getZero(NumElts); for (int I = 0; I != NumElts; ++I) - if (!EltBits[I].isZero()) + if (!EltBits[I].isZero()) { + DemandedBits |= EltBits[I]; DemandedElts.setBit(I); + } APInt KnownUndef, KnownZero; return TLI.SimplifyDemandedVectorElts(OtherOp, DemandedElts, KnownUndef, - KnownZero, DCI); + KnownZero, DCI) || + TLI.SimplifyDemandedBits(OtherOp, DemandedBits, DemandedElts, DCI); }; if (SimplifyUndemandedElts(N0, N1) || SimplifyUndemandedElts(N1, N0)) { if (N->getOpcode() != ISD::DELETED_NODE) @@ -49031,8 +49102,13 @@ static SDValue combineVectorTruncation(SDNode *N, SelectionDAG &DAG, return SDValue(); // SSSE3's pshufb results in less instructions in the cases below. - if (Subtarget.hasSSSE3() && NumElems == 8 && InSVT != MVT::i64) - return SDValue(); + if (Subtarget.hasSSSE3() && NumElems == 8) { + if (InSVT == MVT::i16) + return SDValue(); + if (InSVT == MVT::i32 && + (OutSVT == MVT::i8 || !Subtarget.hasSSE41() || Subtarget.hasInt256())) + return SDValue(); + } SDLoc DL(N); // SSE2 provides PACKUS for only 2 x v8i16 -> v16i8 and SSE4.1 provides PACKUS @@ -51110,6 +51186,30 @@ static SDValue combineMOVMSK(SDNode *N, SelectionDAG &DAG, DAG.getConstant(NotMask, DL, VT)); } + // Fold movmsk(icmp_eq(and(x,c1),0)) -> movmsk(not(shl(x,c2))) + // iff pow2splat(c1). + if (Src.getOpcode() == X86ISD::PCMPEQ && + Src.getOperand(0).getOpcode() == ISD::AND && + ISD::isBuildVectorAllZeros(Src.getOperand(1).getNode())) { + SDValue LHS = Src.getOperand(0).getOperand(0); + SDValue RHS = Src.getOperand(0).getOperand(1); + KnownBits KnownRHS = DAG.computeKnownBits(RHS); + if (KnownRHS.isConstant() && KnownRHS.getConstant().isPowerOf2()) { + SDLoc DL(N); + MVT ShiftVT = SrcVT; + if (ShiftVT.getScalarType() == MVT::i8) { + // vXi8 shifts - we only care about the signbit so can use PSLLW. + ShiftVT = MVT::getVectorVT(MVT::i16, NumElts / 2); + LHS = DAG.getBitcast(ShiftVT, LHS); + } + unsigned ShiftAmt = KnownRHS.getConstant().countLeadingZeros(); + LHS = getTargetVShiftByConstNode(X86ISD::VSHLI, DL, ShiftVT, LHS, + ShiftAmt, DAG); + LHS = DAG.getNOT(DL, DAG.getBitcast(SrcVT, LHS), SrcVT); + return DAG.getNode(X86ISD::MOVMSK, DL, VT, LHS); + } + } + // Simplify the inputs. const TargetLowering &TLI = DAG.getTargetLoweringInfo(); APInt DemandedMask(APInt::getAllOnes(NumBits)); diff --git a/llvm/lib/Target/X86/X86LowerAMXType.cpp b/llvm/lib/Target/X86/X86LowerAMXType.cpp index 7368b64efd9a..6206d8efb3d0 100644 --- a/llvm/lib/Target/X86/X86LowerAMXType.cpp +++ b/llvm/lib/Target/X86/X86LowerAMXType.cpp @@ -61,6 +61,8 @@ #include "llvm/Transforms/Utils/AssumeBundleBuilder.h" #include "llvm/Transforms/Utils/Local.h" +#include <map> + using namespace llvm; using namespace PatternMatch; diff --git a/llvm/lib/Transforms/Coroutines/CoroFrame.cpp b/llvm/lib/Transforms/Coroutines/CoroFrame.cpp index 92acfb93057a..9c16d3750998 100644 --- a/llvm/lib/Transforms/Coroutines/CoroFrame.cpp +++ b/llvm/lib/Transforms/Coroutines/CoroFrame.cpp @@ -23,6 +23,7 @@ #include "llvm/Config/llvm-config.h" #include "llvm/IR/CFG.h" #include "llvm/IR/DIBuilder.h" +#include "llvm/IR/DebugInfo.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/InstIterator.h" diff --git a/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp b/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp index ce3c5153bde2..e6a542385662 100644 --- a/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp +++ b/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp @@ -46,6 +46,7 @@ #include "llvm/Analysis/LazyCallGraph.h" #include "llvm/Analysis/Loads.h" #include "llvm/Analysis/MemoryLocation.h" +#include "llvm/Analysis/ValueTracking.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/IR/Argument.h" @@ -365,26 +366,25 @@ doPromotion(Function *F, SmallPtrSetImpl<Argument *> &ArgsToPromote, // Loop over the argument list, transferring uses of the old arguments over to // the new arguments, also transferring over the names as well. - for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(), - I2 = NF->arg_begin(); - I != E; ++I) { - if (!ArgsToPromote.count(&*I) && !ByValArgsToTransform.count(&*I)) { + Function::arg_iterator I2 = NF->arg_begin(); + for (Argument &Arg : F->args()) { + if (!ArgsToPromote.count(&Arg) && !ByValArgsToTransform.count(&Arg)) { // If this is an unmodified argument, move the name and users over to the // new version. - I->replaceAllUsesWith(&*I2); - I2->takeName(&*I); + Arg.replaceAllUsesWith(&*I2); + I2->takeName(&Arg); ++I2; continue; } - if (ByValArgsToTransform.count(&*I)) { + if (ByValArgsToTransform.count(&Arg)) { // In the callee, we create an alloca, and store each of the new incoming // arguments into the alloca. Instruction *InsertPt = &NF->begin()->front(); // Just add all the struct element types. - Type *AgTy = I->getParamByValType(); - Align StructAlign = *I->getParamAlign(); + Type *AgTy = Arg.getParamByValType(); + Align StructAlign = *Arg.getParamAlign(); Value *TheAlloca = new AllocaInst(AgTy, DL.getAllocaAddrSpace(), nullptr, StructAlign, "", InsertPt); StructType *STy = cast<StructType>(AgTy); @@ -397,41 +397,41 @@ doPromotion(Function *F, SmallPtrSetImpl<Argument *> &ArgsToPromote, Value *Idx = GetElementPtrInst::Create( AgTy, TheAlloca, Idxs, TheAlloca->getName() + "." + Twine(i), InsertPt); - I2->setName(I->getName() + "." + Twine(i)); + I2->setName(Arg.getName() + "." + Twine(i)); Align Alignment = commonAlignment(StructAlign, SL->getElementOffset(i)); new StoreInst(&*I2++, Idx, false, Alignment, InsertPt); } // Anything that used the arg should now use the alloca. - I->replaceAllUsesWith(TheAlloca); - TheAlloca->takeName(&*I); + Arg.replaceAllUsesWith(TheAlloca); + TheAlloca->takeName(&Arg); continue; } // There potentially are metadata uses for things like llvm.dbg.value. // Replace them with undef, after handling the other regular uses. auto RauwUndefMetadata = make_scope_exit( - [&]() { I->replaceAllUsesWith(UndefValue::get(I->getType())); }); + [&]() { Arg.replaceAllUsesWith(UndefValue::get(Arg.getType())); }); - if (I->use_empty()) + if (Arg.use_empty()) continue; // Otherwise, if we promoted this argument, then all users are load // instructions (or GEPs with only load users), and all loads should be // using the new argument that we added. - ScalarizeTable &ArgIndices = ScalarizedElements[&*I]; + ScalarizeTable &ArgIndices = ScalarizedElements[&Arg]; - while (!I->use_empty()) { - if (LoadInst *LI = dyn_cast<LoadInst>(I->user_back())) { + while (!Arg.use_empty()) { + if (LoadInst *LI = dyn_cast<LoadInst>(Arg.user_back())) { assert(ArgIndices.begin()->second.empty() && "Load element should sort to front!"); - I2->setName(I->getName() + ".val"); + I2->setName(Arg.getName() + ".val"); LI->replaceAllUsesWith(&*I2); LI->eraseFromParent(); - LLVM_DEBUG(dbgs() << "*** Promoted load of argument '" << I->getName() + LLVM_DEBUG(dbgs() << "*** Promoted load of argument '" << Arg.getName() << "' in function '" << F->getName() << "'\n"); } else { - GetElementPtrInst *GEP = cast<GetElementPtrInst>(I->user_back()); + GetElementPtrInst *GEP = cast<GetElementPtrInst>(Arg.user_back()); assert(!GEP->use_empty() && "GEPs without uses should be cleaned up already"); IndicesVector Operands; @@ -449,7 +449,7 @@ doPromotion(Function *F, SmallPtrSetImpl<Argument *> &ArgsToPromote, assert(It != ArgIndices.end() && "GEP not handled??"); } - TheArg->setName(formatv("{0}.{1:$[.]}.val", I->getName(), + TheArg->setName(formatv("{0}.{1:$[.]}.val", Arg.getName(), make_range(Operands.begin(), Operands.end()))); LLVM_DEBUG(dbgs() << "*** Promoted agg argument '" << TheArg->getName() @@ -610,12 +610,12 @@ static bool isSafeToPromoteArgument(Argument *Arg, Type *ByValTy, AAResults &AAR return true; }; - // First, iterate the entry block and mark loads of (geps of) arguments as - // safe. + // First, iterate functions that are guaranteed to execution on function + // entry and mark loads of (geps of) arguments as safe. BasicBlock &EntryBlock = Arg->getParent()->front(); // Declare this here so we can reuse it IndicesVector Indices; - for (Instruction &I : EntryBlock) + for (Instruction &I : EntryBlock) { if (LoadInst *LI = dyn_cast<LoadInst>(&I)) { Value *V = LI->getPointerOperand(); if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(V)) { @@ -649,6 +649,10 @@ static bool isSafeToPromoteArgument(Argument *Arg, Type *ByValTy, AAResults &AAR } } + if (!isGuaranteedToTransferExecutionToSuccessor(&I)) + break; + } + // Now, iterate all uses of the argument to see if there are any uses that are // not (GEP+)loads, or any (GEP+)loads that are not safe to promote. SmallVector<LoadInst *, 16> Loads; @@ -830,7 +834,10 @@ static bool canPaddingBeAccessed(Argument *arg) { return false; } -bool ArgumentPromotionPass::areFunctionArgsABICompatible( +/// Check if callers and the callee \p F agree how promoted arguments would be +/// passed. The ones that they do not agree on are eliminated from the sets but +/// the return value has to be observed as well. +static bool areFunctionArgsABICompatible( const Function &F, const TargetTransformInfo &TTI, SmallPtrSetImpl<Argument *> &ArgsToPromote, SmallPtrSetImpl<Argument *> &ByValArgsToTransform) { @@ -1003,7 +1010,7 @@ promoteArguments(Function *F, function_ref<AAResults &(Function &F)> AARGetter, if (ArgsToPromote.empty() && ByValArgsToTransform.empty()) return nullptr; - if (!ArgumentPromotionPass::areFunctionArgsABICompatible( + if (!areFunctionArgsABICompatible( *F, TTI, ArgsToPromote, ByValArgsToTransform)) return nullptr; diff --git a/llvm/lib/Transforms/IPO/Attributor.cpp b/llvm/lib/Transforms/IPO/Attributor.cpp index 12b8a0ef9d00..d66140a726f6 100644 --- a/llvm/lib/Transforms/IPO/Attributor.cpp +++ b/llvm/lib/Transforms/IPO/Attributor.cpp @@ -183,6 +183,31 @@ ChangeStatus &llvm::operator&=(ChangeStatus &L, ChangeStatus R) { } ///} +bool AA::isNoSyncInst(Attributor &A, const Instruction &I, + const AbstractAttribute &QueryingAA) { + // We are looking for volatile instructions or non-relaxed atomics. + if (const auto *CB = dyn_cast<CallBase>(&I)) { + if (CB->hasFnAttr(Attribute::NoSync)) + return true; + + // Non-convergent and readnone imply nosync. + if (!CB->isConvergent() && !CB->mayReadOrWriteMemory()) + return true; + + if (AANoSync::isNoSyncIntrinsic(&I)) + return true; + + const auto &NoSyncAA = A.getAAFor<AANoSync>( + QueryingAA, IRPosition::callsite_function(*CB), DepClassTy::OPTIONAL); + return NoSyncAA.isAssumedNoSync(); + } + + if (!I.mayReadOrWriteMemory()) + return true; + + return !I.isVolatile() && !AANoSync::isNonRelaxedAtomic(&I); +} + bool AA::isDynamicallyUnique(Attributor &A, const AbstractAttribute &QueryingAA, const Value &V) { if (auto *C = dyn_cast<Constant>(&V)) @@ -370,6 +395,162 @@ bool AA::getPotentialCopiesOfStoredValue( return true; } +static bool isAssumedReadOnlyOrReadNone(Attributor &A, const IRPosition &IRP, + const AbstractAttribute &QueryingAA, + bool RequireReadNone, bool &IsKnown) { + + IRPosition::Kind Kind = IRP.getPositionKind(); + if (Kind == IRPosition::IRP_FUNCTION || Kind == IRPosition::IRP_CALL_SITE) { + const auto &MemLocAA = + A.getAAFor<AAMemoryLocation>(QueryingAA, IRP, DepClassTy::NONE); + if (MemLocAA.isAssumedReadNone()) { + IsKnown = MemLocAA.isKnownReadNone(); + if (!IsKnown) + A.recordDependence(MemLocAA, QueryingAA, DepClassTy::OPTIONAL); + return true; + } + } + + const auto &MemBehaviorAA = + A.getAAFor<AAMemoryBehavior>(QueryingAA, IRP, DepClassTy::NONE); + if (MemBehaviorAA.isAssumedReadNone() || + (!RequireReadNone && MemBehaviorAA.isAssumedReadOnly())) { + IsKnown = RequireReadNone ? MemBehaviorAA.isKnownReadNone() + : MemBehaviorAA.isKnownReadOnly(); + if (!IsKnown) + A.recordDependence(MemBehaviorAA, QueryingAA, DepClassTy::OPTIONAL); + return true; + } + + return false; +} + +bool AA::isAssumedReadOnly(Attributor &A, const IRPosition &IRP, + const AbstractAttribute &QueryingAA, bool &IsKnown) { + return isAssumedReadOnlyOrReadNone(A, IRP, QueryingAA, + /* RequireReadNone */ false, IsKnown); +} +bool AA::isAssumedReadNone(Attributor &A, const IRPosition &IRP, + const AbstractAttribute &QueryingAA, bool &IsKnown) { + return isAssumedReadOnlyOrReadNone(A, IRP, QueryingAA, + /* RequireReadNone */ true, IsKnown); +} + +static bool +isPotentiallyReachable(Attributor &A, const Instruction &FromI, + const Instruction *ToI, const Function &ToFn, + const AbstractAttribute &QueryingAA, + std::function<bool(const Function &F)> GoBackwardsCB) { + LLVM_DEBUG(dbgs() << "[AA] isPotentiallyReachable @" << ToFn.getName() + << " from " << FromI << " [GBCB: " << bool(GoBackwardsCB) + << "]\n"); + + SmallPtrSet<const Instruction *, 8> Visited; + SmallVector<const Instruction *> Worklist; + Worklist.push_back(&FromI); + + while (!Worklist.empty()) { + const Instruction *CurFromI = Worklist.pop_back_val(); + if (!Visited.insert(CurFromI).second) + continue; + + const Function *FromFn = CurFromI->getFunction(); + if (FromFn == &ToFn) { + if (!ToI) + return true; + LLVM_DEBUG(dbgs() << "[AA] check " << *ToI << " from " << *CurFromI + << " intraprocedurally\n"); + const auto &ReachabilityAA = A.getAAFor<AAReachability>( + QueryingAA, IRPosition::function(ToFn), DepClassTy::OPTIONAL); + bool Result = ReachabilityAA.isAssumedReachable(A, *CurFromI, *ToI); + LLVM_DEBUG(dbgs() << "[AA] " << *CurFromI << " " + << (Result ? "can potentially " : "cannot ") << "reach " + << *ToI << " [Intra]\n"); + if (Result) + return true; + continue; + } + + // TODO: If we can go arbitrarily backwards we will eventually reach an + // entry point that can reach ToI. Only once this takes a set of blocks + // through which we cannot go, or once we track internal functions not + // accessible from the outside, it makes sense to perform backwards analysis + // in the absence of a GoBackwardsCB. + if (!GoBackwardsCB) { + LLVM_DEBUG(dbgs() << "[AA] check @" << ToFn.getName() << " from " + << *CurFromI << " is not checked backwards, abort\n"); + return true; + } + + // Check if the current instruction is already known to reach the ToFn. + const auto &FnReachabilityAA = A.getAAFor<AAFunctionReachability>( + QueryingAA, IRPosition::function(*FromFn), DepClassTy::OPTIONAL); + bool Result = FnReachabilityAA.instructionCanReach( + A, *CurFromI, ToFn, /* UseBackwards */ false); + LLVM_DEBUG(dbgs() << "[AA] " << *CurFromI << " in @" << FromFn->getName() + << " " << (Result ? "can potentially " : "cannot ") + << "reach @" << ToFn.getName() << " [FromFn]\n"); + if (Result) + return true; + + // If we do not go backwards from the FromFn we are done here and so far we + // could not find a way to reach ToFn/ToI. + if (!GoBackwardsCB(*FromFn)) + continue; + + LLVM_DEBUG(dbgs() << "Stepping backwards to the call sites of @" + << FromFn->getName() << "\n"); + + auto CheckCallSite = [&](AbstractCallSite ACS) { + CallBase *CB = ACS.getInstruction(); + if (!CB) + return false; + + if (isa<InvokeInst>(CB)) + return false; + + Instruction *Inst = CB->getNextNonDebugInstruction(); + Worklist.push_back(Inst); + return true; + }; + + bool AllCallSitesKnown; + Result = !A.checkForAllCallSites(CheckCallSite, *FromFn, + /* RequireAllCallSites */ true, + &QueryingAA, AllCallSitesKnown); + if (Result) { + LLVM_DEBUG(dbgs() << "[AA] stepping back to call sites from " << *CurFromI + << " in @" << FromFn->getName() + << " failed, give up\n"); + return true; + } + + LLVM_DEBUG(dbgs() << "[AA] stepped back to call sites from " << *CurFromI + << " in @" << FromFn->getName() + << " worklist size is: " << Worklist.size() << "\n"); + } + return false; +} + +bool AA::isPotentiallyReachable( + Attributor &A, const Instruction &FromI, const Instruction &ToI, + const AbstractAttribute &QueryingAA, + std::function<bool(const Function &F)> GoBackwardsCB) { + LLVM_DEBUG(dbgs() << "[AA] isPotentiallyReachable " << ToI << " from " + << FromI << " [GBCB: " << bool(GoBackwardsCB) << "]\n"); + const Function *ToFn = ToI.getFunction(); + return ::isPotentiallyReachable(A, FromI, &ToI, *ToFn, QueryingAA, + GoBackwardsCB); +} + +bool AA::isPotentiallyReachable( + Attributor &A, const Instruction &FromI, const Function &ToFn, + const AbstractAttribute &QueryingAA, + std::function<bool(const Function &F)> GoBackwardsCB) { + return ::isPotentiallyReachable(A, FromI, /* ToI */ nullptr, ToFn, QueryingAA, + GoBackwardsCB); +} + /// Return true if \p New is equal or worse than \p Old. static bool isEqualOrWorse(const Attribute &New, const Attribute &Old) { if (!Old.isIntAttribute()) @@ -704,9 +885,8 @@ void IRPosition::verify() { "Expected a nullptr for an invalid position!"); return; case IRP_FLOAT: - assert((!isa<CallBase>(&getAssociatedValue()) && - !isa<Argument>(&getAssociatedValue())) && - "Expected specialized kind for call base and argument values!"); + assert((!isa<Argument>(&getAssociatedValue())) && + "Expected specialized kind for argument values!"); return; case IRP_RETURNED: assert(isa<Function>(getAsValuePtr()) && @@ -900,7 +1080,7 @@ bool Attributor::isAssumedDead(const Use &U, UsedAssumedInformation, CheckBBLivenessOnly, DepClass); } - return isAssumedDead(IRPosition::value(*UserI), QueryingAA, FnLivenessAA, + return isAssumedDead(IRPosition::inst(*UserI), QueryingAA, FnLivenessAA, UsedAssumedInformation, CheckBBLivenessOnly, DepClass); } @@ -923,7 +1103,8 @@ bool Attributor::isAssumedDead(const Instruction &I, // If we have a context instruction and a liveness AA we use it. if (FnLivenessAA && FnLivenessAA->getIRPosition().getAnchorScope() == I.getFunction() && - FnLivenessAA->isAssumedDead(&I)) { + (CheckBBLivenessOnly ? FnLivenessAA->isAssumedDead(I.getParent()) + : FnLivenessAA->isAssumedDead(&I))) { if (QueryingAA) recordDependence(*FnLivenessAA, *QueryingAA, DepClass); if (!FnLivenessAA->isKnownDead(&I)) @@ -934,8 +1115,9 @@ bool Attributor::isAssumedDead(const Instruction &I, if (CheckBBLivenessOnly) return false; - const AAIsDead &IsDeadAA = getOrCreateAAFor<AAIsDead>( - IRPosition::value(I, CBCtx), QueryingAA, DepClassTy::NONE); + const IRPosition IRP = IRPosition::inst(I, CBCtx); + const AAIsDead &IsDeadAA = + getOrCreateAAFor<AAIsDead>(IRP, QueryingAA, DepClassTy::NONE); // Don't check liveness for AAIsDead. if (QueryingAA == &IsDeadAA) return false; @@ -1035,8 +1217,14 @@ bool Attributor::checkForAllUses( const Use *U = Worklist.pop_back_val(); if (isa<PHINode>(U->getUser()) && !Visited.insert(U).second) continue; - LLVM_DEBUG(dbgs() << "[Attributor] Check use: " << **U << " in " - << *U->getUser() << "\n"); + LLVM_DEBUG({ + if (auto *Fn = dyn_cast<Function>(U->getUser())) + dbgs() << "[Attributor] Check use: " << **U << " in " << Fn->getName() + << "\n"; + else + dbgs() << "[Attributor] Check use: " << **U << " in " << *U->getUser() + << "\n"; + }); bool UsedAssumedInformation = false; if (isAssumedDead(*U, &QueryingAA, LivenessAA, UsedAssumedInformation, CheckBBLivenessOnly, LivenessDepClass)) { @@ -1126,8 +1314,14 @@ bool Attributor::checkForAllCallSites(function_ref<bool(AbstractCallSite)> Pred, SmallVector<const Use *, 8> Uses(make_pointer_range(Fn.uses())); for (unsigned u = 0; u < Uses.size(); ++u) { const Use &U = *Uses[u]; - LLVM_DEBUG(dbgs() << "[Attributor] Check use: " << *U << " in " - << *U.getUser() << "\n"); + LLVM_DEBUG({ + if (auto *Fn = dyn_cast<Function>(U)) + dbgs() << "[Attributor] Check use: " << Fn->getName() << " in " + << *U.getUser() << "\n"; + else + dbgs() << "[Attributor] Check use: " << *U << " in " << *U.getUser() + << "\n"; + }); bool UsedAssumedInformation = false; if (isAssumedDead(U, QueryingAA, nullptr, UsedAssumedInformation, /* CheckBBLivenessOnly */ true)) { @@ -1268,9 +1462,12 @@ static bool checkForAllInstructionsImpl( for (Instruction *I : *Insts) { // Skip dead instructions. if (A && !CheckPotentiallyDead && - A->isAssumedDead(IRPosition::value(*I), QueryingAA, LivenessAA, - UsedAssumedInformation, CheckBBLivenessOnly)) + A->isAssumedDead(IRPosition::inst(*I), QueryingAA, LivenessAA, + UsedAssumedInformation, CheckBBLivenessOnly)) { + LLVM_DEBUG(dbgs() << "[Attributor] Instruction " << *I + << " is potentially dead, skip!\n";); continue; + } if (!Pred(*I)) return false; @@ -1329,7 +1526,7 @@ bool Attributor::checkForAllReadWriteInstructions( for (Instruction *I : InfoCache.getReadOrWriteInstsForFunction(*AssociatedFunction)) { // Skip dead instructions. - if (isAssumedDead(IRPosition::value(*I), &QueryingAA, &LivenessAA, + if (isAssumedDead(IRPosition::inst(*I), &QueryingAA, &LivenessAA, UsedAssumedInformation)) continue; @@ -1381,9 +1578,11 @@ void Attributor::runTillFixpoint() { InvalidAA->Deps.pop_back(); AbstractAttribute *DepAA = cast<AbstractAttribute>(Dep.getPointer()); if (Dep.getInt() == unsigned(DepClassTy::OPTIONAL)) { + LLVM_DEBUG(dbgs() << " - recompute: " << *DepAA); Worklist.insert(DepAA); continue; } + LLVM_DEBUG(dbgs() << " - invalidate: " << *DepAA); DepAA->getState().indicatePessimisticFixpoint(); assert(DepAA->getState().isAtFixpoint() && "Expected fixpoint state!"); if (!DepAA->getState().isValidState()) @@ -1433,6 +1632,9 @@ void Attributor::runTillFixpoint() { // Note that dependent ones are added above. Worklist.clear(); Worklist.insert(ChangedAAs.begin(), ChangedAAs.end()); + Worklist.insert(QueryAAsAwaitingUpdate.begin(), + QueryAAsAwaitingUpdate.end()); + QueryAAsAwaitingUpdate.clear(); } while (!Worklist.empty() && (IterationCounter++ < MaxFixedPointIterations || VerifyMaxFixpointIterations)); @@ -1492,6 +1694,12 @@ void Attributor::runTillFixpoint() { } } +void Attributor::registerForUpdate(AbstractAttribute &AA) { + assert(AA.isQueryAA() && + "Non-query AAs should not be required to register for updates!"); + QueryAAsAwaitingUpdate.insert(&AA); +} + ChangeStatus Attributor::manifestAttributes() { TimeTraceScope TimeScope("Attributor::manifestAttributes"); size_t NumFinalAAs = DG.SyntheticRoot.Deps.size(); @@ -1792,7 +2000,7 @@ ChangeStatus Attributor::cleanupIR() { // Actually we do not delete the blocks but squash them into a single // unreachable but untangling branches that jump here is something we need // to do in a more generic way. - DetatchDeadBlocks(ToBeDeletedBBs, nullptr); + detachDeadBlocks(ToBeDeletedBBs, nullptr); } identifyDeadInternalFunctions(); @@ -1897,7 +2105,7 @@ ChangeStatus Attributor::updateAA(AbstractAttribute &AA) { /* CheckBBLivenessOnly */ true)) CS = AA.update(*this); - if (DV.empty()) { + if (!AA.isQueryAA() && DV.empty()) { // If the attribute did not query any non-fix information, the state // will not change and we can indicate that right away. AAState.indicateOptimisticFixpoint(); @@ -2601,12 +2809,12 @@ void Attributor::identifyDefaultAbstractAttributes(Function &F) { auto CallSitePred = [&](Instruction &I) -> bool { auto &CB = cast<CallBase>(I); - IRPosition CBRetPos = IRPosition::callsite_returned(CB); + IRPosition CBInstPos = IRPosition::inst(CB); IRPosition CBFnPos = IRPosition::callsite_function(CB); // Call sites might be dead if they do not have side effects and no live // users. The return value might be dead if there are no live users. - getOrCreateAAFor<AAIsDead>(CBRetPos); + getOrCreateAAFor<AAIsDead>(CBInstPos); Function *Callee = CB.getCalledFunction(); // TODO: Even if the callee is not known now we might be able to simplify diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp index 76420783b2d1..2d88e329e093 100644 --- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp +++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp @@ -15,6 +15,7 @@ #include "llvm/ADT/APInt.h" #include "llvm/ADT/SCCIterator.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SetOperations.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/Statistic.h" @@ -68,6 +69,12 @@ static cl::opt<unsigned, true> MaxPotentialValues( cl::location(llvm::PotentialConstantIntValuesState::MaxPotentialValues), cl::init(7)); +static cl::opt<unsigned> + MaxInterferingWrites("attributor-max-interfering-writes", cl::Hidden, + cl::desc("Maximum number of interfering writes to " + "check before assuming all might interfere."), + cl::init(6)); + STATISTIC(NumAAs, "Number of abstract attributes created"); // Some helper macros to deal with statistics tracking. @@ -244,6 +251,8 @@ static Value *constructPointer(Type *ResTy, Type *PtrElemTy, Value *Ptr, /// once. Note that the value used for the callback may still be the value /// associated with \p IRP (due to PHIs). To limit how much effort is invested, /// we will never visit more values than specified by \p MaxValues. +/// If \p Intraprocedural is set to true only values valid in the scope of +/// \p CtxI will be visited and simplification into other scopes is prevented. template <typename StateTy> static bool genericValueTraversal( Attributor &A, IRPosition IRP, const AbstractAttribute &QueryingAA, @@ -251,7 +260,8 @@ static bool genericValueTraversal( function_ref<bool(Value &, const Instruction *, StateTy &, bool)> VisitValueCB, const Instruction *CtxI, bool UseValueSimplify = true, int MaxValues = 16, - function_ref<Value *(Value *)> StripCB = nullptr) { + function_ref<Value *(Value *)> StripCB = nullptr, + bool Intraprocedural = false) { const AAIsDead *LivenessAA = nullptr; if (IRP.getAnchorScope()) @@ -281,8 +291,11 @@ static bool genericValueTraversal( continue; // Make sure we limit the compile time for complex expressions. - if (Iteration++ >= MaxValues) + if (Iteration++ >= MaxValues) { + LLVM_DEBUG(dbgs() << "Generic value traversal reached iteration limit: " + << Iteration << "!\n"); return false; + } // Explicitly look through calls with a "returned" attribute if we do // not have a pointer as stripPointerCasts only works on them. @@ -331,10 +344,7 @@ static bool genericValueTraversal( "Expected liveness in the presence of instructions!"); for (unsigned u = 0, e = PHI->getNumIncomingValues(); u < e; u++) { BasicBlock *IncomingBB = PHI->getIncomingBlock(u); - bool UsedAssumedInformation = false; - if (A.isAssumedDead(*IncomingBB->getTerminator(), &QueryingAA, - LivenessAA, UsedAssumedInformation, - /* CheckBBLivenessOnly */ true)) { + if (LivenessAA->isEdgeDead(IncomingBB, PHI->getParent())) { AnyDead = true; continue; } @@ -344,24 +354,49 @@ static bool genericValueTraversal( continue; } + if (auto *Arg = dyn_cast<Argument>(V)) { + if (!Intraprocedural && !Arg->hasPassPointeeByValueCopyAttr()) { + SmallVector<Item> CallSiteValues; + bool AllCallSitesKnown = true; + if (A.checkForAllCallSites( + [&](AbstractCallSite ACS) { + // Callbacks might not have a corresponding call site operand, + // stick with the argument in that case. + Value *CSOp = ACS.getCallArgOperand(*Arg); + if (!CSOp) + return false; + CallSiteValues.push_back({CSOp, ACS.getInstruction()}); + return true; + }, + *Arg->getParent(), true, &QueryingAA, AllCallSitesKnown)) { + Worklist.append(CallSiteValues); + continue; + } + } + } + if (UseValueSimplify && !isa<Constant>(V)) { bool UsedAssumedInformation = false; Optional<Value *> SimpleV = A.getAssumedSimplified(*V, QueryingAA, UsedAssumedInformation); if (!SimpleV.hasValue()) continue; - if (!SimpleV.getValue()) - return false; Value *NewV = SimpleV.getValue(); - if (NewV != V) { - Worklist.push_back({NewV, CtxI}); - continue; + if (NewV && NewV != V) { + if (!Intraprocedural || !CtxI || + AA::isValidInScope(*NewV, CtxI->getFunction())) { + Worklist.push_back({NewV, CtxI}); + continue; + } } } // Once a leaf is reached we inform the user through the callback. - if (!VisitValueCB(*V, CtxI, State, Iteration > 1)) + if (!VisitValueCB(*V, CtxI, State, Iteration > 1)) { + LLVM_DEBUG(dbgs() << "Generic value traversal visit callback failed for: " + << *V << "!\n"); return false; + } } while (!Worklist.empty()); // If we actually used liveness information so we have to record a dependence. @@ -375,7 +410,8 @@ static bool genericValueTraversal( bool AA::getAssumedUnderlyingObjects(Attributor &A, const Value &Ptr, SmallVectorImpl<Value *> &Objects, const AbstractAttribute &QueryingAA, - const Instruction *CtxI) { + const Instruction *CtxI, + bool Intraprocedural) { auto StripCB = [&](Value *V) { return getUnderlyingObject(V); }; SmallPtrSet<Value *, 8> SeenObjects; auto VisitValueCB = [&SeenObjects](Value &Val, const Instruction *, @@ -387,7 +423,7 @@ bool AA::getAssumedUnderlyingObjects(Attributor &A, const Value &Ptr, }; if (!genericValueTraversal<decltype(Objects)>( A, IRPosition::value(Ptr), QueryingAA, Objects, VisitValueCB, CtxI, - true, 32, StripCB)) + true, 32, StripCB, Intraprocedural)) return false; return true; } @@ -620,7 +656,7 @@ struct AACallSiteReturnedFromReturned : public BaseType { if (!AssociatedFunction) return S.indicatePessimisticFixpoint(); - CallBase &CBContext = static_cast<CallBase &>(this->getAnchorValue()); + CallBase &CBContext = cast<CallBase>(this->getAnchorValue()); if (IntroduceCallBaseContext) LLVM_DEBUG(dbgs() << "[Attributor] Introducing call base context:" << CBContext << "\n"); @@ -1026,7 +1062,6 @@ private: BooleanState BS; }; -namespace { struct AAPointerInfoImpl : public StateWrapper<AA::PointerInfo::State, AAPointerInfo> { using BaseTy = StateWrapper<AA::PointerInfo::State, AAPointerInfo>; @@ -1058,6 +1093,165 @@ struct AAPointerInfoImpl const override { return State::forallInterferingAccesses(SI, CB); } + bool forallInterferingWrites( + Attributor &A, const AbstractAttribute &QueryingAA, LoadInst &LI, + function_ref<bool(const Access &, bool)> UserCB) const override { + SmallPtrSet<const Access *, 8> DominatingWrites; + SmallVector<std::pair<const Access *, bool>, 8> InterferingWrites; + + Function &Scope = *LI.getFunction(); + const auto &NoSyncAA = A.getAAFor<AANoSync>( + QueryingAA, IRPosition::function(Scope), DepClassTy::OPTIONAL); + const auto *ExecDomainAA = A.lookupAAFor<AAExecutionDomain>( + IRPosition::function(Scope), &QueryingAA, DepClassTy::OPTIONAL); + const bool NoSync = NoSyncAA.isAssumedNoSync(); + + // Helper to determine if we need to consider threading, which we cannot + // right now. However, if the function is (assumed) nosync or the thread + // executing all instructions is the main thread only we can ignore + // threading. + auto CanIgnoreThreading = [&](const Instruction &I) -> bool { + if (NoSync) + return true; + if (ExecDomainAA && ExecDomainAA->isExecutedByInitialThreadOnly(I)) + return true; + return false; + }; + + // Helper to determine if the access is executed by the same thread as the + // load, for now it is sufficient to avoid any potential threading effects + // as we cannot deal with them anyway. + auto IsSameThreadAsLoad = [&](const Access &Acc) -> bool { + return CanIgnoreThreading(*Acc.getLocalInst()); + }; + + // TODO: Use inter-procedural reachability and dominance. + const auto &NoRecurseAA = A.getAAFor<AANoRecurse>( + QueryingAA, IRPosition::function(*LI.getFunction()), + DepClassTy::OPTIONAL); + + const bool CanUseCFGResoning = CanIgnoreThreading(LI); + InformationCache &InfoCache = A.getInfoCache(); + const DominatorTree *DT = + NoRecurseAA.isKnownNoRecurse() + ? InfoCache.getAnalysisResultForFunction<DominatorTreeAnalysis>( + Scope) + : nullptr; + + enum GPUAddressSpace : unsigned { + Generic = 0, + Global = 1, + Shared = 3, + Constant = 4, + Local = 5, + }; + + // Helper to check if a value has "kernel lifetime", that is it will not + // outlive a GPU kernel. This is true for shared, constant, and local + // globals on AMD and NVIDIA GPUs. + auto HasKernelLifetime = [&](Value *V, Module &M) { + Triple T(M.getTargetTriple()); + if (!(T.isAMDGPU() || T.isNVPTX())) + return false; + switch (V->getType()->getPointerAddressSpace()) { + case GPUAddressSpace::Shared: + case GPUAddressSpace::Constant: + case GPUAddressSpace::Local: + return true; + default: + return false; + }; + }; + + // The IsLiveInCalleeCB will be used by the AA::isPotentiallyReachable query + // to determine if we should look at reachability from the callee. For + // certain pointers we know the lifetime and we do not have to step into the + // callee to determine reachability as the pointer would be dead in the + // callee. See the conditional initialization below. + std::function<bool(const Function &)> IsLiveInCalleeCB; + + if (auto *AI = dyn_cast<AllocaInst>(&getAssociatedValue())) { + // If the alloca containing function is not recursive the alloca + // must be dead in the callee. + const Function *AIFn = AI->getFunction(); + const auto &NoRecurseAA = A.getAAFor<AANoRecurse>( + *this, IRPosition::function(*AIFn), DepClassTy::OPTIONAL); + if (NoRecurseAA.isAssumedNoRecurse()) { + IsLiveInCalleeCB = [AIFn](const Function &Fn) { return AIFn != &Fn; }; + } + } else if (auto *GV = dyn_cast<GlobalValue>(&getAssociatedValue())) { + // If the global has kernel lifetime we can stop if we reach a kernel + // as it is "dead" in the (unknown) callees. + if (HasKernelLifetime(GV, *GV->getParent())) + IsLiveInCalleeCB = [](const Function &Fn) { + return !Fn.hasFnAttribute("kernel"); + }; + } + + auto AccessCB = [&](const Access &Acc, bool Exact) { + if (!Acc.isWrite()) + return true; + + // For now we only filter accesses based on CFG reasoning which does not + // work yet if we have threading effects, or the access is complicated. + if (CanUseCFGResoning) { + if (!AA::isPotentiallyReachable(A, *Acc.getLocalInst(), LI, QueryingAA, + IsLiveInCalleeCB)) + return true; + if (DT && Exact && + (Acc.getLocalInst()->getFunction() == LI.getFunction()) && + IsSameThreadAsLoad(Acc)) { + if (DT->dominates(Acc.getLocalInst(), &LI)) + DominatingWrites.insert(&Acc); + } + } + + InterferingWrites.push_back({&Acc, Exact}); + return true; + }; + if (!State::forallInterferingAccesses(LI, AccessCB)) + return false; + + // If we cannot use CFG reasoning we only filter the non-write accesses + // and are done here. + if (!CanUseCFGResoning) { + for (auto &It : InterferingWrites) + if (!UserCB(*It.first, It.second)) + return false; + return true; + } + + // Helper to determine if we can skip a specific write access. This is in + // the worst case quadratic as we are looking for another write that will + // hide the effect of this one. + auto CanSkipAccess = [&](const Access &Acc, bool Exact) { + if (!IsSameThreadAsLoad(Acc)) + return false; + if (!DominatingWrites.count(&Acc)) + return false; + for (const Access *DomAcc : DominatingWrites) { + assert(Acc.getLocalInst()->getFunction() == + DomAcc->getLocalInst()->getFunction() && + "Expected dominating writes to be in the same function!"); + + if (DomAcc != &Acc && + DT->dominates(Acc.getLocalInst(), DomAcc->getLocalInst())) { + return true; + } + } + return false; + }; + + // Run the user callback on all writes we cannot skip and return if that + // succeeded for all or not. + unsigned NumInterferingWrites = InterferingWrites.size(); + for (auto &It : InterferingWrites) + if (!DT || NumInterferingWrites > MaxInterferingWrites || + !CanSkipAccess(*It.first, It.second)) + if (!UserCB(*It.first, It.second)) + return false; + return true; + } ChangeStatus translateAndAddCalleeState(Attributor &A, const AAPointerInfo &CalleeAA, @@ -1200,9 +1394,8 @@ struct AAPointerInfoFloating : public AAPointerInfoImpl { << " : " << *Idx << "\n"); return false; } - UsrOI.Offset = PtrOI.Offset + - DL.getIndexedOffsetInType( - GEP->getSourceElementType(), Indices); + UsrOI.Offset = PtrOI.Offset + DL.getIndexedOffsetInType( + GEP->getSourceElementType(), Indices); Follow = true; return true; } @@ -1693,17 +1886,9 @@ ChangeStatus AAReturnedValuesImpl::updateImpl(Attributor &A) { auto ReturnValueCB = [&](Value &V, const Instruction *CtxI, ReturnInst &Ret, bool) -> bool { - bool UsedAssumedInformation = false; - Optional<Value *> SimpleRetVal = - A.getAssumedSimplified(V, *this, UsedAssumedInformation); - if (!SimpleRetVal.hasValue()) - return true; - if (!SimpleRetVal.getValue()) - return false; - Value *RetVal = *SimpleRetVal; - assert(AA::isValidInScope(*RetVal, Ret.getFunction()) && + assert(AA::isValidInScope(V, Ret.getFunction()) && "Assumed returned value should be valid in function scope!"); - if (ReturnedValues[RetVal].insert(&Ret)) + if (ReturnedValues[&V].insert(&Ret)) Changed = ChangeStatus::CHANGED; return true; }; @@ -1712,7 +1897,8 @@ ChangeStatus AAReturnedValuesImpl::updateImpl(Attributor &A) { ReturnInst &Ret = cast<ReturnInst>(I); return genericValueTraversal<ReturnInst>( A, IRPosition::value(*Ret.getReturnValue()), *this, Ret, ReturnValueCB, - &I); + &I, /* UseValueSimplify */ true, /* MaxValues */ 16, + /* StripCB */ nullptr, /* Intraprocedural */ true); }; // Discover returned values from all live returned instructions in the @@ -1767,24 +1953,16 @@ struct AANoSyncImpl : AANoSync { /// See AbstractAttribute::updateImpl(...). ChangeStatus updateImpl(Attributor &A) override; - - /// Helper function used to determine whether an instruction is non-relaxed - /// atomic. In other words, if an atomic instruction does not have unordered - /// or monotonic ordering - static bool isNonRelaxedAtomic(Instruction *I); - - /// Helper function specific for intrinsics which are potentially volatile - static bool isNoSyncIntrinsic(Instruction *I); }; -bool AANoSyncImpl::isNonRelaxedAtomic(Instruction *I) { +bool AANoSync::isNonRelaxedAtomic(const Instruction *I) { if (!I->isAtomic()) return false; if (auto *FI = dyn_cast<FenceInst>(I)) // All legal orderings for fence are stronger than monotonic. return FI->getSyncScopeID() != SyncScope::SingleThread; - else if (auto *AI = dyn_cast<AtomicCmpXchgInst>(I)) { + if (auto *AI = dyn_cast<AtomicCmpXchgInst>(I)) { // Unordered is not a legal ordering for cmpxchg. return (AI->getSuccessOrdering() != AtomicOrdering::Monotonic || AI->getFailureOrdering() != AtomicOrdering::Monotonic); @@ -1813,7 +1991,7 @@ bool AANoSyncImpl::isNonRelaxedAtomic(Instruction *I) { /// Return true if this intrinsic is nosync. This is only used for intrinsics /// which would be nosync except that they have a volatile flag. All other /// intrinsics are simply annotated with the nosync attribute in Intrinsics.td. -bool AANoSyncImpl::isNoSyncIntrinsic(Instruction *I) { +bool AANoSync::isNoSyncIntrinsic(const Instruction *I) { if (auto *MI = dyn_cast<MemIntrinsic>(I)) return !MI->isVolatile(); return false; @@ -1822,24 +2000,7 @@ bool AANoSyncImpl::isNoSyncIntrinsic(Instruction *I) { ChangeStatus AANoSyncImpl::updateImpl(Attributor &A) { auto CheckRWInstForNoSync = [&](Instruction &I) { - /// We are looking for volatile instructions or Non-Relaxed atomics. - - if (const auto *CB = dyn_cast<CallBase>(&I)) { - if (CB->hasFnAttr(Attribute::NoSync)) - return true; - - if (isNoSyncIntrinsic(&I)) - return true; - - const auto &NoSyncAA = A.getAAFor<AANoSync>( - *this, IRPosition::callsite_function(*CB), DepClassTy::REQUIRED); - return NoSyncAA.isAssumedNoSync(); - } - - if (!I.isVolatile() && !isNonRelaxedAtomic(&I)) - return true; - - return false; + return AA::isNoSyncInst(A, I, *this); }; auto CheckForNoSync = [&](Instruction &I) { @@ -2327,16 +2488,6 @@ struct AANoRecurseFunction final : AANoRecurseImpl { AANoRecurseFunction(const IRPosition &IRP, Attributor &A) : AANoRecurseImpl(IRP, A) {} - /// See AbstractAttribute::initialize(...). - void initialize(Attributor &A) override { - AANoRecurseImpl::initialize(A); - // TODO: We should build a call graph ourselves to enable this in the module - // pass as well. - if (const Function *F = getAnchorScope()) - if (A.getInfoCache().getSccSize(*F) != 1) - indicatePessimisticFixpoint(); - } - /// See AbstractAttribute::updateImpl(...). ChangeStatus updateImpl(Attributor &A) override { @@ -2359,27 +2510,10 @@ struct AANoRecurseFunction final : AANoRecurseImpl { return ChangeStatus::UNCHANGED; } - // If the above check does not hold anymore we look at the calls. - auto CheckForNoRecurse = [&](Instruction &I) { - const auto &CB = cast<CallBase>(I); - if (CB.hasFnAttr(Attribute::NoRecurse)) - return true; - - const auto &NoRecurseAA = A.getAAFor<AANoRecurse>( - *this, IRPosition::callsite_function(CB), DepClassTy::REQUIRED); - if (!NoRecurseAA.isAssumedNoRecurse()) - return false; - - // Recursion to the same function - if (CB.getCalledFunction() == getAnchorScope()) - return false; - - return true; - }; - - bool UsedAssumedInformation = false; - if (!A.checkForAllCallLikeInstructions(CheckForNoRecurse, *this, - UsedAssumedInformation)) + const AAFunctionReachability &EdgeReachability = + A.getAAFor<AAFunctionReachability>(*this, getIRPosition(), + DepClassTy::REQUIRED); + if (EdgeReachability.canReach(A, *getAnchorScope())) return indicatePessimisticFixpoint(); return ChangeStatus::UNCHANGED; } @@ -2798,16 +2932,10 @@ struct AAWillReturnImpl : public AAWillReturn { (!getAssociatedFunction() || !getAssociatedFunction()->mustProgress())) return false; - const auto &MemAA = - A.getAAFor<AAMemoryBehavior>(*this, getIRPosition(), DepClassTy::NONE); - if (!MemAA.isAssumedReadOnly()) - return false; - if (KnownOnly && !MemAA.isKnownReadOnly()) - return false; - if (!MemAA.isKnownReadOnly()) - A.recordDependence(MemAA, *this, DepClassTy::OPTIONAL); - - return true; + bool IsKnown; + if (AA::isAssumedReadOnly(A, getIRPosition(), *this, IsKnown)) + return IsKnown || !KnownOnly; + return false; } /// See AbstractAttribute::updateImpl(...). @@ -2904,6 +3032,10 @@ struct AAReachabilityImpl : AAReachability { /// See AbstractAttribute::updateImpl(...). ChangeStatus updateImpl(Attributor &A) override { + const auto &NoRecurseAA = A.getAAFor<AANoRecurse>( + *this, IRPosition::function(*getAnchorScope()), DepClassTy::REQUIRED); + if (!NoRecurseAA.isAssumedNoRecurse()) + return indicatePessimisticFixpoint(); return ChangeStatus::UNCHANGED; } }; @@ -3008,9 +3140,8 @@ struct AANoAliasArgument final return Base::updateImpl(A); // If the argument is read-only, no-alias cannot break synchronization. - const auto &MemBehaviorAA = A.getAAFor<AAMemoryBehavior>( - *this, getIRPosition(), DepClassTy::OPTIONAL); - if (MemBehaviorAA.isAssumedReadOnly()) + bool IsKnown; + if (AA::isAssumedReadOnly(A, getIRPosition(), *this, IsKnown)) return Base::updateImpl(A); // If the argument is never passed through callbacks, no-alias cannot break @@ -3366,14 +3497,8 @@ struct AAIsDeadValueImpl : public AAIsDead { if (!NoUnwindAA.isKnownNoUnwind()) A.recordDependence(NoUnwindAA, *this, DepClassTy::OPTIONAL); - const auto &MemBehaviorAA = - A.getAndUpdateAAFor<AAMemoryBehavior>(*this, CallIRP, DepClassTy::NONE); - if (MemBehaviorAA.isAssumedReadOnly()) { - if (!MemBehaviorAA.isKnownReadOnly()) - A.recordDependence(MemBehaviorAA, *this, DepClassTy::OPTIONAL); - return true; - } - return false; + bool IsKnown; + return AA::isAssumedReadOnly(A, CallIRP, *this, IsKnown); } }; @@ -3699,6 +3824,7 @@ struct AAIsDeadFunction : public AAIsDead { if (!AssumedLiveBlocks.count(&BB)) { A.deleteAfterManifest(BB); ++BUILD_STAT_NAME(AAIsDead, BasicBlock); + HasChanged = ChangeStatus::CHANGED; } return HasChanged; @@ -3708,7 +3834,7 @@ struct AAIsDeadFunction : public AAIsDead { ChangeStatus updateImpl(Attributor &A) override; bool isEdgeDead(const BasicBlock *From, const BasicBlock *To) const override { - return !AssumedLiveEdges.count(std::make_pair(From, To)); + return isValidState() && !AssumedLiveEdges.count(std::make_pair(From, To)); } /// See AbstractAttribute::trackStatistics() @@ -4921,14 +5047,11 @@ ChangeStatus AANoCaptureImpl::updateImpl(Attributor &A) { AANoCapture::StateType T; // Readonly means we cannot capture through memory. - const auto &FnMemAA = - A.getAAFor<AAMemoryBehavior>(*this, FnPos, DepClassTy::NONE); - if (FnMemAA.isAssumedReadOnly()) { + bool IsKnown; + if (AA::isAssumedReadOnly(A, FnPos, *this, IsKnown)) { T.addKnownBits(NOT_CAPTURED_IN_MEM); - if (FnMemAA.isKnownReadOnly()) + if (IsKnown) addKnownBits(NOT_CAPTURED_IN_MEM); - else - A.recordDependence(FnMemAA, *this, DepClassTy::OPTIONAL); } // Make sure all returned values are different than the underlying value. @@ -5085,7 +5208,6 @@ struct AANoCaptureCallSiteReturned final : AANoCaptureImpl { STATS_DECLTRACK_CSRET_ATTR(nocapture) } }; -} // namespace /// ------------------ Value Simplify Attribute ---------------------------- @@ -5106,7 +5228,6 @@ bool ValueSimplifyStateType::unionAssumed(Optional<Value *> Other) { return true; } -namespace { struct AAValueSimplifyImpl : AAValueSimplify { AAValueSimplifyImpl(const IRPosition &IRP, Attributor &A) : AAValueSimplify(IRP, A) {} @@ -5266,8 +5387,6 @@ struct AAValueSimplifyImpl : AAValueSimplify { auto CheckAccess = [&](const AAPointerInfo::Access &Acc, bool IsExact) { LLVM_DEBUG(dbgs() << " - visit access " << Acc << "\n"); - if (!Acc.isWrite()) - return true; if (Acc.isWrittenValueYetUndetermined()) return true; Value *Content = Acc.getWrittenValue(); @@ -5287,7 +5406,7 @@ struct AAValueSimplifyImpl : AAValueSimplify { auto &PI = A.getAAFor<AAPointerInfo>(AA, IRPosition::value(*Obj), DepClassTy::REQUIRED); - if (!PI.forallInterferingAccesses(L, CheckAccess)) + if (!PI.forallInterferingWrites(A, AA, L, CheckAccess)) return false; } return true; @@ -5325,9 +5444,8 @@ struct AAValueSimplifyArgument final : AAValueSimplifyImpl { if (Arg->hasByValAttr()) { // TODO: We probably need to verify synchronization is not an issue, e.g., // there is no race by not copying a constant byval. - const auto &MemAA = A.getAAFor<AAMemoryBehavior>(*this, getIRPosition(), - DepClassTy::REQUIRED); - if (!MemAA.isAssumedReadOnly()) + bool IsKnown; + if (!AA::isAssumedReadOnly(A, getIRPosition(), *this, IsKnown)) return indicatePessimisticFixpoint(); } @@ -6827,9 +6945,8 @@ struct AAPrivatizablePtrCallSiteArgument final return indicatePessimisticFixpoint(); } - const auto &MemBehaviorAA = - A.getAAFor<AAMemoryBehavior>(*this, IRP, DepClassTy::REQUIRED); - if (!MemBehaviorAA.isAssumedReadOnly()) { + bool IsKnown; + if (!AA::isAssumedReadOnly(A, IRP, *this, IsKnown)) { LLVM_DEBUG(dbgs() << "[AAPrivatizablePtr] pointer is written!\n"); return indicatePessimisticFixpoint(); } @@ -7378,7 +7495,6 @@ void AAMemoryBehaviorFloating::analyzeUseIn(Attributor &A, const Use &U, if (UserI->mayWriteToMemory()) removeAssumedBits(NO_WRITES); } -} // namespace /// -------------------- Memory Locations Attributes --------------------------- /// Includes read-none, argmemonly, inaccessiblememonly, @@ -7412,7 +7528,6 @@ std::string AAMemoryLocation::getMemoryLocationsAsStr( return S; } -namespace { struct AAMemoryLocationImpl : public AAMemoryLocation { AAMemoryLocationImpl(const IRPosition &IRP, Attributor &A) @@ -7657,7 +7772,8 @@ void AAMemoryLocationImpl::categorizePtrValue( << getMemoryLocationsAsStr(State.getAssumed()) << "]\n"); SmallVector<Value *, 8> Objects; - if (!AA::getAssumedUnderlyingObjects(A, Ptr, Objects, *this, &I)) { + if (!AA::getAssumedUnderlyingObjects(A, Ptr, Objects, *this, &I, + /* Intraprocedural */ true)) { LLVM_DEBUG( dbgs() << "[AAMemoryLocation] Pointer locations not categorized\n"); updateStateAndAccessesMap(State, NO_UNKOWN_MEM, &I, nullptr, Changed, @@ -9411,7 +9527,7 @@ struct AACallEdgesCallSite : public AACallEdgesImpl { } }; - CallBase *CB = static_cast<CallBase *>(getCtxI()); + CallBase *CB = cast<CallBase>(getCtxI()); if (CB->isInlineAsm()) { setHasUnknownCallee(false, Change); @@ -9450,7 +9566,7 @@ struct AACallEdgesFunction : public AACallEdgesImpl { ChangeStatus Change = ChangeStatus::UNCHANGED; auto ProcessCallInst = [&](Instruction &Inst) { - CallBase &CB = static_cast<CallBase &>(Inst); + CallBase &CB = cast<CallBase>(Inst); auto &CBEdges = A.getAAFor<AACallEdges>( *this, IRPosition::callsite_function(CB), DepClassTy::REQUIRED); @@ -9481,11 +9597,39 @@ struct AACallEdgesFunction : public AACallEdgesImpl { struct AAFunctionReachabilityFunction : public AAFunctionReachability { private: struct QuerySet { - void markReachable(Function *Fn) { - Reachable.insert(Fn); - Unreachable.erase(Fn); + void markReachable(const Function &Fn) { + Reachable.insert(&Fn); + Unreachable.erase(&Fn); } + /// If there is no information about the function None is returned. + Optional<bool> isCachedReachable(const Function &Fn) { + // Assume that we can reach the function. + // TODO: Be more specific with the unknown callee. + if (CanReachUnknownCallee) + return true; + + if (Reachable.count(&Fn)) + return true; + + if (Unreachable.count(&Fn)) + return false; + + return llvm::None; + } + + /// Set of functions that we know for sure is reachable. + DenseSet<const Function *> Reachable; + + /// Set of functions that are unreachable, but might become reachable. + DenseSet<const Function *> Unreachable; + + /// If we can reach a function with a call to a unknown function we assume + /// that we can reach any function. + bool CanReachUnknownCallee = false; + }; + + struct QueryResolver : public QuerySet { ChangeStatus update(Attributor &A, const AAFunctionReachability &AA, ArrayRef<const AACallEdges *> AAEdgesList) { ChangeStatus Change = ChangeStatus::UNCHANGED; @@ -9499,31 +9643,30 @@ private: } } - for (Function *Fn : make_early_inc_range(Unreachable)) { - if (checkIfReachable(A, AA, AAEdgesList, Fn)) { + for (const Function *Fn : make_early_inc_range(Unreachable)) { + if (checkIfReachable(A, AA, AAEdgesList, *Fn)) { Change = ChangeStatus::CHANGED; - markReachable(Fn); + markReachable(*Fn); } } return Change; } - bool isReachable(Attributor &A, const AAFunctionReachability &AA, - ArrayRef<const AACallEdges *> AAEdgesList, Function *Fn) { - // Assume that we can reach the function. - // TODO: Be more specific with the unknown callee. - if (CanReachUnknownCallee) - return true; - - if (Reachable.count(Fn)) - return true; + bool isReachable(Attributor &A, AAFunctionReachability &AA, + ArrayRef<const AACallEdges *> AAEdgesList, + const Function &Fn) { + Optional<bool> Cached = isCachedReachable(Fn); + if (Cached.hasValue()) + return Cached.getValue(); - if (Unreachable.count(Fn)) - return false; + // The query was not cached, thus it is new. We need to request an update + // explicitly to make sure this the information is properly run to a + // fixpoint. + A.registerForUpdate(AA); // We need to assume that this function can't reach Fn to prevent // an infinite loop if this function is recursive. - Unreachable.insert(Fn); + Unreachable.insert(&Fn); bool Result = checkIfReachable(A, AA, AAEdgesList, Fn); if (Result) @@ -9533,13 +9676,13 @@ private: bool checkIfReachable(Attributor &A, const AAFunctionReachability &AA, ArrayRef<const AACallEdges *> AAEdgesList, - Function *Fn) const { + const Function &Fn) const { // Handle the most trivial case first. for (auto *AAEdges : AAEdgesList) { const SetVector<Function *> &Edges = AAEdges->getOptimisticEdges(); - if (Edges.count(Fn)) + if (Edges.count(const_cast<Function *>(&Fn))) return true; } @@ -9560,28 +9703,44 @@ private: } // The result is false for now, set dependencies and leave. - for (auto Dep : Deps) - A.recordDependence(AA, *Dep, DepClassTy::REQUIRED); + for (auto *Dep : Deps) + A.recordDependence(*Dep, AA, DepClassTy::REQUIRED); return false; } + }; - /// Set of functions that we know for sure is reachable. - DenseSet<Function *> Reachable; + /// Get call edges that can be reached by this instruction. + bool getReachableCallEdges(Attributor &A, const AAReachability &Reachability, + const Instruction &Inst, + SmallVector<const AACallEdges *> &Result) const { + // Determine call like instructions that we can reach from the inst. + auto CheckCallBase = [&](Instruction &CBInst) { + if (!Reachability.isAssumedReachable(A, Inst, CBInst)) + return true; - /// Set of functions that are unreachable, but might become reachable. - DenseSet<Function *> Unreachable; + auto &CB = cast<CallBase>(CBInst); + const AACallEdges &AAEdges = A.getAAFor<AACallEdges>( + *this, IRPosition::callsite_function(CB), DepClassTy::REQUIRED); - /// If we can reach a function with a call to a unknown function we assume - /// that we can reach any function. - bool CanReachUnknownCallee = false; - }; + Result.push_back(&AAEdges); + return true; + }; + + bool UsedAssumedInformation = false; + return A.checkForAllCallLikeInstructions(CheckCallBase, *this, + UsedAssumedInformation, + /* CheckBBLivenessOnly */ true); + } public: AAFunctionReachabilityFunction(const IRPosition &IRP, Attributor &A) : AAFunctionReachability(IRP, A) {} - bool canReach(Attributor &A, Function *Fn) const override { + bool canReach(Attributor &A, const Function &Fn) const override { + if (!isValidState()) + return true; + const AACallEdges &AAEdges = A.getAAFor<AACallEdges>(*this, getIRPosition(), DepClassTy::REQUIRED); @@ -9590,14 +9749,18 @@ public: // a const_cast. // This is a hack for us to be able to cache queries. auto *NonConstThis = const_cast<AAFunctionReachabilityFunction *>(this); - bool Result = - NonConstThis->WholeFunction.isReachable(A, *this, {&AAEdges}, Fn); + bool Result = NonConstThis->WholeFunction.isReachable(A, *NonConstThis, + {&AAEdges}, Fn); return Result; } /// Can \p CB reach \p Fn - bool canReach(Attributor &A, CallBase &CB, Function *Fn) const override { + bool canReach(Attributor &A, CallBase &CB, + const Function &Fn) const override { + if (!isValidState()) + return true; + const AACallEdges &AAEdges = A.getAAFor<AACallEdges>( *this, IRPosition::callsite_function(CB), DepClassTy::REQUIRED); @@ -9606,13 +9769,40 @@ public: // a const_cast. // This is a hack for us to be able to cache queries. auto *NonConstThis = const_cast<AAFunctionReachabilityFunction *>(this); - QuerySet &CBQuery = NonConstThis->CBQueries[&CB]; + QueryResolver &CBQuery = NonConstThis->CBQueries[&CB]; - bool Result = CBQuery.isReachable(A, *this, {&AAEdges}, Fn); + bool Result = CBQuery.isReachable(A, *NonConstThis, {&AAEdges}, Fn); return Result; } + bool instructionCanReach(Attributor &A, const Instruction &Inst, + const Function &Fn, + bool UseBackwards) const override { + if (!isValidState()) + return true; + + if (UseBackwards) + return AA::isPotentiallyReachable(A, Inst, Fn, *this, nullptr); + + const auto &Reachability = A.getAAFor<AAReachability>( + *this, IRPosition::function(*getAssociatedFunction()), + DepClassTy::REQUIRED); + + SmallVector<const AACallEdges *> CallEdges; + bool AllKnown = getReachableCallEdges(A, Reachability, Inst, CallEdges); + // Attributor returns attributes as const, so this function has to be + // const for users of this attribute to use it without having to do + // a const_cast. + // This is a hack for us to be able to cache queries. + auto *NonConstThis = const_cast<AAFunctionReachabilityFunction *>(this); + QueryResolver &InstQSet = NonConstThis->InstQueries[&Inst]; + if (!AllKnown) + InstQSet.CanReachUnknownCallee = true; + + return InstQSet.isReachable(A, *NonConstThis, CallEdges, Fn); + } + /// See AbstractAttribute::updateImpl(...). ChangeStatus updateImpl(Attributor &A) override { const AACallEdges &AAEdges = @@ -9621,7 +9811,7 @@ public: Change |= WholeFunction.update(A, *this, {&AAEdges}); - for (auto CBPair : CBQueries) { + for (auto &CBPair : CBQueries) { const AACallEdges &AAEdges = A.getAAFor<AACallEdges>( *this, IRPosition::callsite_function(*CBPair.first), DepClassTy::REQUIRED); @@ -9629,6 +9819,25 @@ public: Change |= CBPair.second.update(A, *this, {&AAEdges}); } + // Update the Instruction queries. + const AAReachability *Reachability; + if (!InstQueries.empty()) { + Reachability = &A.getAAFor<AAReachability>( + *this, IRPosition::function(*getAssociatedFunction()), + DepClassTy::REQUIRED); + } + + // Check for local callbases first. + for (auto &InstPair : InstQueries) { + SmallVector<const AACallEdges *> CallEdges; + bool AllKnown = + getReachableCallEdges(A, *Reachability, *InstPair.first, CallEdges); + // Update will return change if we this effects any queries. + if (!AllKnown) + InstPair.second.CanReachUnknownCallee = true; + Change |= InstPair.second.update(A, *this, CallEdges); + } + return Change; } @@ -9649,11 +9858,14 @@ private: } /// Used to answer if a the whole function can reacha a specific function. - QuerySet WholeFunction; + QueryResolver WholeFunction; /// Used to answer if a call base inside this function can reach a specific /// function. - DenseMap<CallBase *, QuerySet> CBQueries; + DenseMap<const CallBase *, QueryResolver> CBQueries; + + /// This is for instruction queries than scan "forward". + DenseMap<const Instruction *, QueryResolver> InstQueries; }; /// ---------------------- Assumption Propagation ------------------------------ @@ -9790,8 +10002,6 @@ private: } }; -} // namespace - AACallGraphNode *AACallEdgeIterator::operator*() const { return static_cast<AACallGraphNode *>(const_cast<AACallEdges *>( &A.getOrCreateAAFor<AACallEdges>(IRPosition::function(**I)))); diff --git a/llvm/lib/Transforms/IPO/CalledValuePropagation.cpp b/llvm/lib/Transforms/IPO/CalledValuePropagation.cpp index 74f11fa30959..927dceec8865 100644 --- a/llvm/lib/Transforms/IPO/CalledValuePropagation.cpp +++ b/llvm/lib/Transforms/IPO/CalledValuePropagation.cpp @@ -21,6 +21,7 @@ #include "llvm/Analysis/ValueLatticeUtils.h" #include "llvm/IR/MDBuilder.h" #include "llvm/InitializePasses.h" +#include "llvm/Pass.h" #include "llvm/Support/CommandLine.h" #include "llvm/Transforms/IPO.h" using namespace llvm; diff --git a/llvm/lib/Transforms/IPO/GlobalOpt.cpp b/llvm/lib/Transforms/IPO/GlobalOpt.cpp index d3cac3efce86..1cb32e32c895 100644 --- a/llvm/lib/Transforms/IPO/GlobalOpt.cpp +++ b/llvm/lib/Transforms/IPO/GlobalOpt.cpp @@ -352,14 +352,10 @@ static bool collectSRATypes(DenseMap<uint64_t, Type *> &Types, GlobalValue *GV, while (!Worklist.empty()) { Use *U = Worklist.pop_back_val(); User *V = U->getUser(); - if (isa<BitCastOperator>(V) || isa<AddrSpaceCastOperator>(V)) { - AppendUses(V); - continue; - } - if (auto *GEP = dyn_cast<GEPOperator>(V)) { - if (!GEP->hasAllConstantIndices()) - return false; + auto *GEP = dyn_cast<GEPOperator>(V); + if (isa<BitCastOperator>(V) || isa<AddrSpaceCastOperator>(V) || + (GEP && GEP->hasAllConstantIndices())) { AppendUses(V); continue; } @@ -2229,6 +2225,13 @@ OptimizeGlobalAliases(Module &M, for (GlobalValue *GV : Used.used()) Used.compilerUsedErase(GV); + // Return whether GV is explicitly or implicitly dso_local and not replaceable + // by another definition in the current linkage unit. + auto IsModuleLocal = [](GlobalValue &GV) { + return !GlobalValue::isInterposableLinkage(GV.getLinkage()) && + (GV.isDSOLocal() || GV.isImplicitDSOLocal()); + }; + for (GlobalAlias &J : llvm::make_early_inc_range(M.aliases())) { // Aliases without names cannot be referenced outside this module. if (!J.hasName() && !J.isDeclaration() && !J.hasLocalLinkage()) @@ -2240,18 +2243,20 @@ OptimizeGlobalAliases(Module &M, } // If the alias can change at link time, nothing can be done - bail out. - if (J.isInterposable()) + if (!IsModuleLocal(J)) continue; Constant *Aliasee = J.getAliasee(); GlobalValue *Target = dyn_cast<GlobalValue>(Aliasee->stripPointerCasts()); // We can't trivially replace the alias with the aliasee if the aliasee is // non-trivial in some way. We also can't replace the alias with the aliasee - // if the aliasee is interposable because aliases point to the local - // definition. + // if the aliasee may be preemptible at runtime. On ELF, a non-preemptible + // alias can be used to access the definition as if preemption did not + // happen. // TODO: Try to handle non-zero GEPs of local aliasees. - if (!Target || Target->isInterposable()) + if (!Target || !IsModuleLocal(*Target)) continue; + Target->removeDeadConstantUsers(); // Make all users of the alias use the aliasee instead. diff --git a/llvm/lib/Transforms/IPO/IROutliner.cpp b/llvm/lib/Transforms/IPO/IROutliner.cpp index e064fbbef595..faf7cb7d566a 100644 --- a/llvm/lib/Transforms/IPO/IROutliner.cpp +++ b/llvm/lib/Transforms/IPO/IROutliner.cpp @@ -42,6 +42,11 @@ extern cl::opt<bool> DisableBranches; // A command flag to be used for debugging to indirect calls from similarity // matching and outlining. extern cl::opt<bool> DisableIndirectCalls; + +// A command flag to be used for debugging to exclude intrinsics from similarity +// matching and outlining. +extern cl::opt<bool> DisableIntrinsics; + } // namespace llvm // Set to true if the user wants the ir outliner to run on linkonceodr linkage @@ -2610,6 +2615,8 @@ unsigned IROutliner::doOutline(Module &M) { // Find the possible similarity sections. InstructionClassifier.EnableBranches = !DisableBranches; InstructionClassifier.EnableIndirectCalls = !DisableIndirectCalls; + InstructionClassifier.EnableIntrinsics = !DisableIntrinsics; + IRSimilarityIdentifier &Identifier = getIRSI(M); SimilarityGroupList &SimilarityCandidates = *Identifier.getSimilarity(); diff --git a/llvm/lib/Transforms/IPO/LowerTypeTests.cpp b/llvm/lib/Transforms/IPO/LowerTypeTests.cpp index c0bb19e184d6..8e83d7bcb6c2 100644 --- a/llvm/lib/Transforms/IPO/LowerTypeTests.cpp +++ b/llvm/lib/Transforms/IPO/LowerTypeTests.cpp @@ -42,6 +42,7 @@ #include "llvm/IR/InlineAsm.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Intrinsics.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Metadata.h" diff --git a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp index 68f33410c602..2d765fb6ce6d 100644 --- a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp +++ b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp @@ -26,19 +26,25 @@ #include "llvm/ADT/StringRef.h" #include "llvm/Analysis/CallGraph.h" #include "llvm/Analysis/CallGraphSCCPass.h" +#include "llvm/Analysis/MemoryLocation.h" #include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/Frontend/OpenMP/OMPConstants.h" #include "llvm/Frontend/OpenMP/OMPIRBuilder.h" #include "llvm/IR/Assumptions.h" +#include "llvm/IR/Constants.h" #include "llvm/IR/DiagnosticInfo.h" #include "llvm/IR/GlobalValue.h" +#include "llvm/IR/GlobalVariable.h" #include "llvm/IR/Instruction.h" +#include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/IntrinsicsAMDGPU.h" #include "llvm/IR/IntrinsicsNVPTX.h" +#include "llvm/IR/LLVMContext.h" #include "llvm/InitializePasses.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" #include "llvm/Transforms/IPO.h" #include "llvm/Transforms/IPO/Attributor.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" @@ -98,6 +104,11 @@ static cl::opt<bool> DisableOpenMPOptStateMachineRewrite( cl::desc("Disable OpenMP optimizations that replace the state machine."), cl::Hidden, cl::init(false)); +static cl::opt<bool> DisableOpenMPOptBarrierElimination( + "openmp-opt-disable-barrier-elimination", cl::ZeroOrMore, + cl::desc("Disable OpenMP optimizations that eliminate barriers."), + cl::Hidden, cl::init(false)); + static cl::opt<bool> PrintModuleAfterOptimizations( "openmp-opt-print-module", cl::ZeroOrMore, cl::desc("Print the current module after OpenMP optimizations."), @@ -147,6 +158,7 @@ STATISTIC(NumOpenMPParallelRegionsMerged, "Number of OpenMP parallel regions merged"); STATISTIC(NumBytesMovedToSharedMemory, "Amount of memory pushed to shared memory"); +STATISTIC(NumBarriersEliminated, "Number of redundant barriers eliminated"); #if !defined(NDEBUG) static constexpr auto TAG = "[" DEBUG_TYPE "]"; @@ -458,7 +470,6 @@ struct OMPInformationCache : public InformationCache { RTLFunctions.insert(F); \ if (declMatchesRTFTypes(F, OMPBuilder._ReturnType, ArgsTypes)) { \ RuntimeFunctionIDMap[F] = _Enum; \ - F->removeFnAttr(Attribute::NoInline); \ auto &RFI = RFIs[_Enum]; \ RFI.Kind = _Enum; \ RFI.Name = _Name; \ @@ -480,6 +491,15 @@ struct OMPInformationCache : public InformationCache { } #include "llvm/Frontend/OpenMP/OMPKinds.def" + // Remove the `noinline` attribute from `__kmpc`, `_OMP::` and `omp_` + // functions, except if `optnone` is present. + for (Function &F : M) { + for (StringRef Prefix : {"__kmpc", "_ZN4_OMP", "omp_"}) + if (F.getName().startswith(Prefix) && + !F.hasFnAttribute(Attribute::OptimizeNone)) + F.removeFnAttr(Attribute::NoInline); + } + // TODO: We should attach the attributes defined in OMPKinds.def. } @@ -787,6 +807,8 @@ struct OpenMPOpt { if (remarksEnabled()) analysisGlobalization(); + + Changed |= eliminateBarriers(); } else { if (PrintICVValues) printICVs(); @@ -809,6 +831,8 @@ struct OpenMPOpt { Changed = true; } } + + Changed |= eliminateBarriers(); } return Changed; @@ -1378,6 +1402,213 @@ private: return Changed; } + /// Eliminates redundant, aligned barriers in OpenMP offloaded kernels. + /// TODO: Make this an AA and expand it to work across blocks and functions. + bool eliminateBarriers() { + bool Changed = false; + + if (DisableOpenMPOptBarrierElimination) + return /*Changed=*/false; + + if (OMPInfoCache.Kernels.empty()) + return /*Changed=*/false; + + enum ImplicitBarrierType { IBT_ENTRY, IBT_EXIT }; + + class BarrierInfo { + Instruction *I; + enum ImplicitBarrierType Type; + + public: + BarrierInfo(enum ImplicitBarrierType Type) : I(nullptr), Type(Type) {} + BarrierInfo(Instruction &I) : I(&I) {} + + bool isImplicit() { return !I; } + + bool isImplicitEntry() { return isImplicit() && Type == IBT_ENTRY; } + + bool isImplicitExit() { return isImplicit() && Type == IBT_EXIT; } + + Instruction *getInstruction() { return I; } + }; + + for (Function *Kernel : OMPInfoCache.Kernels) { + for (BasicBlock &BB : *Kernel) { + SmallVector<BarrierInfo, 8> BarriersInBlock; + SmallPtrSet<Instruction *, 8> BarriersToBeDeleted; + + // Add the kernel entry implicit barrier. + if (&Kernel->getEntryBlock() == &BB) + BarriersInBlock.push_back(IBT_ENTRY); + + // Find implicit and explicit aligned barriers in the same basic block. + for (Instruction &I : BB) { + if (isa<ReturnInst>(I)) { + // Add the implicit barrier when exiting the kernel. + BarriersInBlock.push_back(IBT_EXIT); + continue; + } + CallBase *CB = dyn_cast<CallBase>(&I); + if (!CB) + continue; + + auto IsAlignBarrierCB = [&](CallBase &CB) { + switch (CB.getIntrinsicID()) { + case Intrinsic::nvvm_barrier0: + case Intrinsic::nvvm_barrier0_and: + case Intrinsic::nvvm_barrier0_or: + case Intrinsic::nvvm_barrier0_popc: + case Intrinsic::amdgcn_s_barrier: + return true; + default: + break; + } + return hasAssumption(CB, + KnownAssumptionString("ompx_aligned_barrier")); + }; + + if (IsAlignBarrierCB(*CB)) { + // Add an explicit aligned barrier. + BarriersInBlock.push_back(I); + } + } + + if (BarriersInBlock.size() <= 1) + continue; + + // A barrier in a barrier pair is removeable if all instructions + // between the barriers in the pair are side-effect free modulo the + // barrier operation. + auto IsBarrierRemoveable = [&Kernel](BarrierInfo *StartBI, + BarrierInfo *EndBI) { + assert( + !StartBI->isImplicitExit() && + "Expected start barrier to be other than a kernel exit barrier"); + assert( + !EndBI->isImplicitEntry() && + "Expected end barrier to be other than a kernel entry barrier"); + // If StarBI instructions is null then this the implicit + // kernel entry barrier, so iterate from the first instruction in the + // entry block. + Instruction *I = (StartBI->isImplicitEntry()) + ? &Kernel->getEntryBlock().front() + : StartBI->getInstruction()->getNextNode(); + assert(I && "Expected non-null start instruction"); + Instruction *E = (EndBI->isImplicitExit()) + ? I->getParent()->getTerminator() + : EndBI->getInstruction(); + assert(E && "Expected non-null end instruction"); + + for (; I != E; I = I->getNextNode()) { + if (!I->mayHaveSideEffects() && !I->mayReadFromMemory()) + continue; + + auto IsPotentiallyAffectedByBarrier = + [](Optional<MemoryLocation> Loc) { + const Value *Obj = (Loc && Loc->Ptr) + ? getUnderlyingObject(Loc->Ptr) + : nullptr; + if (!Obj) { + LLVM_DEBUG( + dbgs() + << "Access to unknown location requires barriers\n"); + return true; + } + if (isa<UndefValue>(Obj)) + return false; + if (isa<AllocaInst>(Obj)) + return false; + if (auto *GV = dyn_cast<GlobalVariable>(Obj)) { + if (GV->isConstant()) + return false; + if (GV->isThreadLocal()) + return false; + if (GV->getAddressSpace() == (int)AddressSpace::Local) + return false; + if (GV->getAddressSpace() == (int)AddressSpace::Constant) + return false; + } + LLVM_DEBUG(dbgs() << "Access to '" << *Obj + << "' requires barriers\n"); + return true; + }; + + if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(I)) { + Optional<MemoryLocation> Loc = MemoryLocation::getForDest(MI); + if (IsPotentiallyAffectedByBarrier(Loc)) + return false; + if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(I)) { + Optional<MemoryLocation> Loc = + MemoryLocation::getForSource(MTI); + if (IsPotentiallyAffectedByBarrier(Loc)) + return false; + } + continue; + } + + if (auto *LI = dyn_cast<LoadInst>(I)) + if (LI->hasMetadata(LLVMContext::MD_invariant_load)) + continue; + + Optional<MemoryLocation> Loc = MemoryLocation::getOrNone(I); + if (IsPotentiallyAffectedByBarrier(Loc)) + return false; + } + + return true; + }; + + // Iterate barrier pairs and remove an explicit barrier if analysis + // deems it removeable. + for (auto *It = BarriersInBlock.begin(), + *End = BarriersInBlock.end() - 1; + It != End; ++It) { + + BarrierInfo *StartBI = It; + BarrierInfo *EndBI = (It + 1); + + // Cannot remove when both are implicit barriers, continue. + if (StartBI->isImplicit() && EndBI->isImplicit()) + continue; + + if (!IsBarrierRemoveable(StartBI, EndBI)) + continue; + + assert(!(StartBI->isImplicit() && EndBI->isImplicit()) && + "Expected at least one explicit barrier to remove."); + + // Remove an explicit barrier, check first, then second. + if (!StartBI->isImplicit()) { + LLVM_DEBUG(dbgs() << "Remove start barrier " + << *StartBI->getInstruction() << "\n"); + BarriersToBeDeleted.insert(StartBI->getInstruction()); + } else { + LLVM_DEBUG(dbgs() << "Remove end barrier " + << *EndBI->getInstruction() << "\n"); + BarriersToBeDeleted.insert(EndBI->getInstruction()); + } + } + + if (BarriersToBeDeleted.empty()) + continue; + + Changed = true; + for (Instruction *I : BarriersToBeDeleted) { + ++NumBarriersEliminated; + auto Remark = [&](OptimizationRemark OR) { + return OR << "Redundant barrier eliminated."; + }; + + if (EnableVerboseRemarks) + emitRemark<OptimizationRemark>(I, "OMP190", Remark); + I->eraseFromParent(); + } + } + } + + return Changed; + } + void analysisGlobalization() { auto &RFI = OMPInfoCache.RFIs[OMPRTL___kmpc_alloc_shared]; diff --git a/llvm/lib/Transforms/IPO/SampleProfileProbe.cpp b/llvm/lib/Transforms/IPO/SampleProfileProbe.cpp index 21395460bccb..e104ae00e916 100644 --- a/llvm/lib/Transforms/IPO/SampleProfileProbe.cpp +++ b/llvm/lib/Transforms/IPO/SampleProfileProbe.cpp @@ -23,6 +23,7 @@ #include "llvm/IR/GlobalVariable.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/Instruction.h" +#include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/MDBuilder.h" #include "llvm/ProfileData/SampleProf.h" #include "llvm/Support/CRC.h" diff --git a/llvm/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp b/llvm/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp index daaf6cbeb3fd..52708ff2f226 100644 --- a/llvm/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp +++ b/llvm/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp @@ -535,7 +535,7 @@ void writeThinLTOBitcode(raw_ostream &OS, raw_ostream *ThinLinkOS, // the information that is needed by thin link will be written in the // given OS. if (ThinLinkOS && Index) - WriteThinLinkBitcodeToFile(M, *ThinLinkOS, *Index, ModHash); + writeThinLinkBitcodeToFile(M, *ThinLinkOS, *Index, ModHash); } class WriteThinLTOBitcode : public ModulePass { diff --git a/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp b/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp index 6acace1d9fd4..8b30f0e989a1 100644 --- a/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp +++ b/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp @@ -970,7 +970,7 @@ bool DevirtModule::runForTesting( if (StringRef(ClWriteSummary).endswith(".bc")) { raw_fd_ostream OS(ClWriteSummary, EC, sys::fs::OF_None); ExitOnErr(errorCodeToError(EC)); - WriteIndexToFile(*Summary, OS); + writeIndexToFile(*Summary, OS); } else { raw_fd_ostream OS(ClWriteSummary, EC, sys::fs::OF_TextWithCRLF); ExitOnErr(errorCodeToError(EC)); diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp index 1fb46af46bee..05b28328afbf 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -2468,10 +2468,28 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) { // Fence instruction simplification Instruction *InstCombinerImpl::visitFenceInst(FenceInst &FI) { - // Remove identical consecutive fences. - Instruction *Next = FI.getNextNonDebugInstruction(); - if (auto *NFI = dyn_cast<FenceInst>(Next)) - if (FI.isIdenticalTo(NFI)) + auto *NFI = dyn_cast<FenceInst>(FI.getNextNonDebugInstruction()); + // This check is solely here to handle arbitrary target-dependent syncscopes. + // TODO: Can remove if does not matter in practice. + if (NFI && FI.isIdenticalTo(NFI)) + return eraseInstFromFunction(FI); + + // Returns true if FI1 is identical or stronger fence than FI2. + auto isIdenticalOrStrongerFence = [](FenceInst *FI1, FenceInst *FI2) { + auto FI1SyncScope = FI1->getSyncScopeID(); + // Consider same scope, where scope is global or single-thread. + if (FI1SyncScope != FI2->getSyncScopeID() || + (FI1SyncScope != SyncScope::System && + FI1SyncScope != SyncScope::SingleThread)) + return false; + + return isAtLeastOrStrongerThan(FI1->getOrdering(), FI2->getOrdering()); + }; + if (NFI && isIdenticalOrStrongerFence(NFI, &FI)) + return eraseInstFromFunction(FI); + + if (auto *PFI = dyn_cast_or_null<FenceInst>(FI.getPrevNonDebugInstruction())) + if (isIdenticalOrStrongerFence(PFI, &FI)) return eraseInstFromFunction(FI); return nullptr; } diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp index fd58a44504b3..e45be5745fcc 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -5882,6 +5882,55 @@ static Instruction *foldICmpInvariantGroup(ICmpInst &I) { return nullptr; } +/// This function folds patterns produced by lowering of reduce idioms, such as +/// llvm.vector.reduce.and which are lowered into instruction chains. This code +/// attempts to generate fewer number of scalar comparisons instead of vector +/// comparisons when possible. +static Instruction *foldReductionIdiom(ICmpInst &I, + InstCombiner::BuilderTy &Builder, + const DataLayout &DL) { + if (I.getType()->isVectorTy()) + return nullptr; + ICmpInst::Predicate OuterPred, InnerPred; + Value *LHS, *RHS; + + // Match lowering of @llvm.vector.reduce.and. Turn + /// %vec_ne = icmp ne <8 x i8> %lhs, %rhs + /// %scalar_ne = bitcast <8 x i1> %vec_ne to i8 + /// %res = icmp <pred> i8 %scalar_ne, 0 + /// + /// into + /// + /// %lhs.scalar = bitcast <8 x i8> %lhs to i64 + /// %rhs.scalar = bitcast <8 x i8> %rhs to i64 + /// %res = icmp <pred> i64 %lhs.scalar, %rhs.scalar + /// + /// for <pred> in {ne, eq}. + if (!match(&I, m_ICmp(OuterPred, + m_OneUse(m_BitCast(m_OneUse( + m_ICmp(InnerPred, m_Value(LHS), m_Value(RHS))))), + m_Zero()))) + return nullptr; + auto *LHSTy = dyn_cast<FixedVectorType>(LHS->getType()); + if (!LHSTy || !LHSTy->getElementType()->isIntegerTy()) + return nullptr; + unsigned NumBits = + LHSTy->getNumElements() * LHSTy->getElementType()->getIntegerBitWidth(); + // TODO: Relax this to "not wider than max legal integer type"? + if (!DL.isLegalInteger(NumBits)) + return nullptr; + + if (ICmpInst::isEquality(OuterPred) && InnerPred == ICmpInst::ICMP_NE) { + auto *ScalarTy = Builder.getIntNTy(NumBits); + LHS = Builder.CreateBitCast(LHS, ScalarTy, LHS->getName() + ".scalar"); + RHS = Builder.CreateBitCast(RHS, ScalarTy, RHS->getName() + ".scalar"); + return ICmpInst::Create(Instruction::ICmp, OuterPred, LHS, RHS, + I.getName()); + } + + return nullptr; +} + Instruction *InstCombinerImpl::visitICmpInst(ICmpInst &I) { bool Changed = false; const SimplifyQuery Q = SQ.getWithInstruction(&I); @@ -6124,6 +6173,9 @@ Instruction *InstCombinerImpl::visitICmpInst(ICmpInst &I) { if (Instruction *Res = foldICmpInvariantGroup(I)) return Res; + if (Instruction *Res = foldReductionIdiom(I, Builder, DL)) + return Res; + return Changed ? &I : nullptr; } diff --git a/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp b/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp index 30f6aab2114b..09694d50468f 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp @@ -46,8 +46,8 @@ void InstCombinerImpl::PHIArgMergedDebugLoc(Instruction *Inst, PHINode &PN) { // will be inefficient. assert(!isa<CallInst>(Inst)); - for (unsigned i = 1; i != PN.getNumIncomingValues(); ++i) { - auto *I = cast<Instruction>(PN.getIncomingValue(i)); + for (Value *V : drop_begin(PN.incoming_values())) { + auto *I = cast<Instruction>(V); Inst->applyMergedLocation(Inst->getDebugLoc(), I->getDebugLoc()); } } @@ -138,8 +138,9 @@ Instruction *InstCombinerImpl::foldIntegerTypedPHI(PHINode &PN) { return nullptr; SmallVector<Value *, 4> AvailablePtrVals; - for (unsigned i = 0; i != PN.getNumIncomingValues(); ++i) { - Value *Arg = PN.getIncomingValue(i); + for (auto Incoming : zip(PN.blocks(), PN.incoming_values())) { + BasicBlock *BB = std::get<0>(Incoming); + Value *Arg = std::get<1>(Incoming); // First look backward: if (auto *PI = dyn_cast<PtrToIntInst>(Arg)) { @@ -151,8 +152,8 @@ Instruction *InstCombinerImpl::foldIntegerTypedPHI(PHINode &PN) { Value *ArgIntToPtr = nullptr; for (User *U : Arg->users()) { if (isa<IntToPtrInst>(U) && U->getType() == IntToPtr->getType() && - (DT.dominates(cast<Instruction>(U), PN.getIncomingBlock(i)) || - cast<Instruction>(U)->getParent() == PN.getIncomingBlock(i))) { + (DT.dominates(cast<Instruction>(U), BB) || + cast<Instruction>(U)->getParent() == BB)) { ArgIntToPtr = U; break; } @@ -190,26 +191,21 @@ Instruction *InstCombinerImpl::foldIntegerTypedPHI(PHINode &PN) { "Not enough available ptr typed incoming values"); PHINode *MatchingPtrPHI = nullptr; unsigned NumPhis = 0; - for (auto II = BB->begin(); II != BB->end(); II++, NumPhis++) { + for (PHINode &PtrPHI : BB->phis()) { // FIXME: consider handling this in AggressiveInstCombine - PHINode *PtrPHI = dyn_cast<PHINode>(II); - if (!PtrPHI) - break; - if (NumPhis > MaxNumPhis) + if (NumPhis++ > MaxNumPhis) return nullptr; - if (PtrPHI == &PN || PtrPHI->getType() != IntToPtr->getType()) + if (&PtrPHI == &PN || PtrPHI.getType() != IntToPtr->getType()) continue; - MatchingPtrPHI = PtrPHI; - for (unsigned i = 0; i != PtrPHI->getNumIncomingValues(); ++i) { - if (AvailablePtrVals[i] != - PtrPHI->getIncomingValueForBlock(PN.getIncomingBlock(i))) { - MatchingPtrPHI = nullptr; - break; - } - } - - if (MatchingPtrPHI) - break; + if (any_of(zip(PN.blocks(), AvailablePtrVals), + [&](const auto &BlockAndValue) { + BasicBlock *BB = std::get<0>(BlockAndValue); + Value *V = std::get<1>(BlockAndValue); + return PtrPHI.getIncomingValueForBlock(BB) != V; + })) + continue; + MatchingPtrPHI = &PtrPHI; + break; } if (MatchingPtrPHI) { @@ -250,9 +246,9 @@ Instruction *InstCombinerImpl::foldIntegerTypedPHI(PHINode &PN) { InsertNewInstBefore(NewPtrPHI, PN); SmallDenseMap<Value *, Instruction *> Casts; - for (unsigned i = 0; i != PN.getNumIncomingValues(); ++i) { - auto *IncomingBB = PN.getIncomingBlock(i); - auto *IncomingVal = AvailablePtrVals[i]; + for (auto Incoming : zip(PN.blocks(), AvailablePtrVals)) { + auto *IncomingBB = std::get<0>(Incoming); + auto *IncomingVal = std::get<1>(Incoming); if (IncomingVal->getType() == IntToPtr->getType()) { NewPtrPHI->addIncoming(IncomingVal, IncomingBB); @@ -330,8 +326,8 @@ InstCombinerImpl::foldPHIArgInsertValueInstructionIntoPHI(PHINode &PN) { // Scan to see if all operands are `insertvalue`'s with the same indicies, // and all have a single use. - for (unsigned i = 1; i != PN.getNumIncomingValues(); ++i) { - auto *I = dyn_cast<InsertValueInst>(PN.getIncomingValue(i)); + for (Value *V : drop_begin(PN.incoming_values())) { + auto *I = dyn_cast<InsertValueInst>(V); if (!I || !I->hasOneUser() || I->getIndices() != FirstIVI->getIndices()) return nullptr; } @@ -370,8 +366,8 @@ InstCombinerImpl::foldPHIArgExtractValueInstructionIntoPHI(PHINode &PN) { // Scan to see if all operands are `extractvalue`'s with the same indicies, // and all have a single use. - for (unsigned i = 1; i != PN.getNumIncomingValues(); ++i) { - auto *I = dyn_cast<ExtractValueInst>(PN.getIncomingValue(i)); + for (Value *V : drop_begin(PN.incoming_values())) { + auto *I = dyn_cast<ExtractValueInst>(V); if (!I || !I->hasOneUser() || I->getIndices() != FirstEVI->getIndices() || I->getAggregateOperand()->getType() != FirstEVI->getAggregateOperand()->getType()) @@ -412,8 +408,8 @@ Instruction *InstCombinerImpl::foldPHIArgBinOpIntoPHI(PHINode &PN) { Type *RHSType = RHSVal->getType(); // Scan to see if all operands are the same opcode, and all have one user. - for (unsigned i = 1; i != PN.getNumIncomingValues(); ++i) { - Instruction *I = dyn_cast<Instruction>(PN.getIncomingValue(i)); + for (Value *V : drop_begin(PN.incoming_values())) { + Instruction *I = dyn_cast<Instruction>(V); if (!I || I->getOpcode() != Opc || !I->hasOneUser() || // Verify type of the LHS matches so we don't fold cmp's of different // types. @@ -461,15 +457,17 @@ Instruction *InstCombinerImpl::foldPHIArgBinOpIntoPHI(PHINode &PN) { // Add all operands to the new PHIs. if (NewLHS || NewRHS) { - for (unsigned i = 1, e = PN.getNumIncomingValues(); i != e; ++i) { - Instruction *InInst = cast<Instruction>(PN.getIncomingValue(i)); + for (auto Incoming : drop_begin(zip(PN.blocks(), PN.incoming_values()))) { + BasicBlock *InBB = std::get<0>(Incoming); + Value *InVal = std::get<1>(Incoming); + Instruction *InInst = cast<Instruction>(InVal); if (NewLHS) { Value *NewInLHS = InInst->getOperand(0); - NewLHS->addIncoming(NewInLHS, PN.getIncomingBlock(i)); + NewLHS->addIncoming(NewInLHS, InBB); } if (NewRHS) { Value *NewInRHS = InInst->getOperand(1); - NewRHS->addIncoming(NewInRHS, PN.getIncomingBlock(i)); + NewRHS->addIncoming(NewInRHS, InBB); } } } @@ -487,8 +485,8 @@ Instruction *InstCombinerImpl::foldPHIArgBinOpIntoPHI(PHINode &PN) { NewBinOp->copyIRFlags(PN.getIncomingValue(0)); - for (unsigned i = 1, e = PN.getNumIncomingValues(); i != e; ++i) - NewBinOp->andIRFlags(PN.getIncomingValue(i)); + for (Value *V : drop_begin(PN.incoming_values())) + NewBinOp->andIRFlags(V); PHIArgMergedDebugLoc(NewBinOp, PN); return NewBinOp; @@ -511,9 +509,8 @@ Instruction *InstCombinerImpl::foldPHIArgGEPIntoPHI(PHINode &PN) { bool AllInBounds = true; // Scan to see if all operands are the same opcode, and all have one user. - for (unsigned i = 1; i != PN.getNumIncomingValues(); ++i) { - GetElementPtrInst *GEP = - dyn_cast<GetElementPtrInst>(PN.getIncomingValue(i)); + for (Value *V : drop_begin(PN.incoming_values())) { + GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(V); if (!GEP || !GEP->hasOneUser() || GEP->getType() != FirstInst->getType() || GEP->getNumOperands() != FirstInst->getNumOperands()) return nullptr; @@ -527,8 +524,8 @@ Instruction *InstCombinerImpl::foldPHIArgGEPIntoPHI(PHINode &PN) { AllBasePointersAreAllocas = false; // Compare the operand lists. - for (unsigned op = 0, e = FirstInst->getNumOperands(); op != e; ++op) { - if (FirstInst->getOperand(op) == GEP->getOperand(op)) + for (unsigned Op = 0, E = FirstInst->getNumOperands(); Op != E; ++Op) { + if (FirstInst->getOperand(Op) == GEP->getOperand(Op)) continue; // Don't merge two GEPs when two operands differ (introducing phi nodes) @@ -536,11 +533,12 @@ Instruction *InstCombinerImpl::foldPHIArgGEPIntoPHI(PHINode &PN) { // substantially cheaper to compute for the constants, so making it a // variable index could pessimize the path. This also handles the case // for struct indices, which must always be constant. - if (isa<ConstantInt>(FirstInst->getOperand(op)) || - isa<ConstantInt>(GEP->getOperand(op))) + if (isa<ConstantInt>(FirstInst->getOperand(Op)) || + isa<ConstantInt>(GEP->getOperand(Op))) return nullptr; - if (FirstInst->getOperand(op)->getType() !=GEP->getOperand(op)->getType()) + if (FirstInst->getOperand(Op)->getType() != + GEP->getOperand(Op)->getType()) return nullptr; // If we already needed a PHI for an earlier operand, and another operand @@ -550,7 +548,7 @@ Instruction *InstCombinerImpl::foldPHIArgGEPIntoPHI(PHINode &PN) { if (NeededPhi) return nullptr; - FixedOperands[op] = nullptr; // Needs a PHI. + FixedOperands[Op] = nullptr; // Needs a PHI. NeededPhi = true; } } @@ -569,29 +567,30 @@ Instruction *InstCombinerImpl::foldPHIArgGEPIntoPHI(PHINode &PN) { SmallVector<PHINode*, 16> OperandPhis(FixedOperands.size()); bool HasAnyPHIs = false; - for (unsigned i = 0, e = FixedOperands.size(); i != e; ++i) { - if (FixedOperands[i]) continue; // operand doesn't need a phi. - Value *FirstOp = FirstInst->getOperand(i); - PHINode *NewPN = PHINode::Create(FirstOp->getType(), e, - FirstOp->getName()+".pn"); + for (unsigned I = 0, E = FixedOperands.size(); I != E; ++I) { + if (FixedOperands[I]) + continue; // operand doesn't need a phi. + Value *FirstOp = FirstInst->getOperand(I); + PHINode *NewPN = + PHINode::Create(FirstOp->getType(), E, FirstOp->getName() + ".pn"); InsertNewInstBefore(NewPN, PN); NewPN->addIncoming(FirstOp, PN.getIncomingBlock(0)); - OperandPhis[i] = NewPN; - FixedOperands[i] = NewPN; + OperandPhis[I] = NewPN; + FixedOperands[I] = NewPN; HasAnyPHIs = true; } - // Add all operands to the new PHIs. if (HasAnyPHIs) { - for (unsigned i = 1, e = PN.getNumIncomingValues(); i != e; ++i) { - GetElementPtrInst *InGEP =cast<GetElementPtrInst>(PN.getIncomingValue(i)); - BasicBlock *InBB = PN.getIncomingBlock(i); - - for (unsigned op = 0, e = OperandPhis.size(); op != e; ++op) - if (PHINode *OpPhi = OperandPhis[op]) - OpPhi->addIncoming(InGEP->getOperand(op), InBB); + for (auto Incoming : drop_begin(zip(PN.blocks(), PN.incoming_values()))) { + BasicBlock *InBB = std::get<0>(Incoming); + Value *InVal = std::get<1>(Incoming); + GetElementPtrInst *InGEP = cast<GetElementPtrInst>(InVal); + + for (unsigned Op = 0, E = OperandPhis.size(); Op != E; ++Op) + if (PHINode *OpPhi = OperandPhis[Op]) + OpPhi->addIncoming(InGEP->getOperand(Op), InBB); } } @@ -627,18 +626,18 @@ static bool isSafeAndProfitableToSinkLoad(LoadInst *L) { // Check for non-address taken alloca. If not address-taken already, it isn't // profitable to do this xform. if (AllocaInst *AI = dyn_cast<AllocaInst>(L->getOperand(0))) { - bool isAddressTaken = false; + bool IsAddressTaken = false; for (User *U : AI->users()) { if (isa<LoadInst>(U)) continue; if (StoreInst *SI = dyn_cast<StoreInst>(U)) { // If storing TO the alloca, then the address isn't taken. if (SI->getOperand(1) == AI) continue; } - isAddressTaken = true; + IsAddressTaken = true; break; } - if (!isAddressTaken && AI->isStaticAlloca()) + if (!IsAddressTaken && AI->isStaticAlloca()) return false; } @@ -665,9 +664,9 @@ Instruction *InstCombinerImpl::foldPHIArgLoadIntoPHI(PHINode &PN) { // When processing loads, we need to propagate two bits of information to the // sunk load: whether it is volatile, and what its alignment is. - bool isVolatile = FirstLI->isVolatile(); + bool IsVolatile = FirstLI->isVolatile(); Align LoadAlignment = FirstLI->getAlign(); - unsigned LoadAddrSpace = FirstLI->getPointerAddressSpace(); + const unsigned LoadAddrSpace = FirstLI->getPointerAddressSpace(); // We can't sink the load if the loaded value could be modified between the // load and the PHI. @@ -678,22 +677,25 @@ Instruction *InstCombinerImpl::foldPHIArgLoadIntoPHI(PHINode &PN) { // If the PHI is of volatile loads and the load block has multiple // successors, sinking it would remove a load of the volatile value from // the path through the other successor. - if (isVolatile && + if (IsVolatile && FirstLI->getParent()->getTerminator()->getNumSuccessors() != 1) return nullptr; - // Check to see if all arguments are the same operation. - for (unsigned i = 1, e = PN.getNumIncomingValues(); i != e; ++i) { - LoadInst *LI = dyn_cast<LoadInst>(PN.getIncomingValue(i)); - if (!LI || !LI->hasOneUser()) + for (auto Incoming : drop_begin(zip(PN.blocks(), PN.incoming_values()))) { + BasicBlock *InBB = std::get<0>(Incoming); + Value *InVal = std::get<1>(Incoming); + LoadInst *LI = dyn_cast<LoadInst>(InVal); + if (!LI || !LI->hasOneUser() || LI->isAtomic()) + return nullptr; + + // Make sure all arguments are the same type of operation. + if (LI->isVolatile() != IsVolatile || + LI->getPointerAddressSpace() != LoadAddrSpace) return nullptr; // We can't sink the load if the loaded value could be modified between // the load and the PHI. - if (LI->isVolatile() != isVolatile || - LI->getParent() != PN.getIncomingBlock(i) || - LI->getPointerAddressSpace() != LoadAddrSpace || - !isSafeAndProfitableToSinkLoad(LI)) + if (LI->getParent() != InBB || !isSafeAndProfitableToSinkLoad(LI)) return nullptr; LoadAlignment = std::min(LoadAlignment, LI->getAlign()); @@ -701,8 +703,7 @@ Instruction *InstCombinerImpl::foldPHIArgLoadIntoPHI(PHINode &PN) { // If the PHI is of volatile loads and the load block has multiple // successors, sinking it would remove a load of the volatile value from // the path through the other successor. - if (isVolatile && - LI->getParent()->getTerminator()->getNumSuccessors() != 1) + if (IsVolatile && LI->getParent()->getTerminator()->getNumSuccessors() != 1) return nullptr; } @@ -715,7 +716,7 @@ Instruction *InstCombinerImpl::foldPHIArgLoadIntoPHI(PHINode &PN) { Value *InVal = FirstLI->getOperand(0); NewPN->addIncoming(InVal, PN.getIncomingBlock(0)); LoadInst *NewLI = - new LoadInst(FirstLI->getType(), NewPN, "", isVolatile, LoadAlignment); + new LoadInst(FirstLI->getType(), NewPN, "", IsVolatile, LoadAlignment); unsigned KnownIDs[] = { LLVMContext::MD_tbaa, @@ -734,13 +735,15 @@ Instruction *InstCombinerImpl::foldPHIArgLoadIntoPHI(PHINode &PN) { NewLI->setMetadata(ID, FirstLI->getMetadata(ID)); // Add all operands to the new PHI and combine TBAA metadata. - for (unsigned i = 1, e = PN.getNumIncomingValues(); i != e; ++i) { - LoadInst *LI = cast<LoadInst>(PN.getIncomingValue(i)); + for (auto Incoming : drop_begin(zip(PN.blocks(), PN.incoming_values()))) { + BasicBlock *BB = std::get<0>(Incoming); + Value *V = std::get<1>(Incoming); + LoadInst *LI = cast<LoadInst>(V); combineMetadata(NewLI, LI, KnownIDs, true); Value *NewInVal = LI->getOperand(0); if (NewInVal != InVal) InVal = nullptr; - NewPN->addIncoming(NewInVal, PN.getIncomingBlock(i)); + NewPN->addIncoming(NewInVal, BB); } if (InVal) { @@ -755,7 +758,7 @@ Instruction *InstCombinerImpl::foldPHIArgLoadIntoPHI(PHINode &PN) { // If this was a volatile load that we are merging, make sure to loop through // and mark all the input loads as non-volatile. If we don't do this, we will // insert a new volatile load and the old ones will not be deletable. - if (isVolatile) + if (IsVolatile) for (Value *IncValue : PN.incoming_values()) cast<LoadInst>(IncValue)->setVolatile(false); @@ -830,8 +833,8 @@ Instruction *InstCombinerImpl::foldPHIArgZextsIntoPHI(PHINode &Phi) { // operands, and zext the result back to the original type. PHINode *NewPhi = PHINode::Create(NarrowType, NumIncomingValues, Phi.getName() + ".shrunk"); - for (unsigned i = 0; i != NumIncomingValues; ++i) - NewPhi->addIncoming(NewIncoming[i], Phi.getIncomingBlock(i)); + for (unsigned I = 0; I != NumIncomingValues; ++I) + NewPhi->addIncoming(NewIncoming[I], Phi.getIncomingBlock(I)); InsertNewInstBefore(NewPhi, Phi); return CastInst::CreateZExtOrBitCast(NewPhi, Phi.getType()); @@ -885,13 +888,13 @@ Instruction *InstCombinerImpl::foldPHIArgOpIntoPHI(PHINode &PN) { } // Check to see if all arguments are the same operation. - for (unsigned i = 1, e = PN.getNumIncomingValues(); i != e; ++i) { - Instruction *I = dyn_cast<Instruction>(PN.getIncomingValue(i)); + for (Value *V : drop_begin(PN.incoming_values())) { + Instruction *I = dyn_cast<Instruction>(V); if (!I || !I->hasOneUser() || !I->isSameOperationAs(FirstInst)) return nullptr; if (CastSrcTy) { if (I->getOperand(0)->getType() != CastSrcTy) - return nullptr; // Cast operation must match. + return nullptr; // Cast operation must match. } else if (I->getOperand(1) != ConstantOp) { return nullptr; } @@ -907,11 +910,13 @@ Instruction *InstCombinerImpl::foldPHIArgOpIntoPHI(PHINode &PN) { NewPN->addIncoming(InVal, PN.getIncomingBlock(0)); // Add all operands to the new PHI. - for (unsigned i = 1, e = PN.getNumIncomingValues(); i != e; ++i) { - Value *NewInVal = cast<Instruction>(PN.getIncomingValue(i))->getOperand(0); + for (auto Incoming : drop_begin(zip(PN.blocks(), PN.incoming_values()))) { + BasicBlock *BB = std::get<0>(Incoming); + Value *V = std::get<1>(Incoming); + Value *NewInVal = cast<Instruction>(V)->getOperand(0); if (NewInVal != InVal) InVal = nullptr; - NewPN->addIncoming(NewInVal, PN.getIncomingBlock(i)); + NewPN->addIncoming(NewInVal, BB); } Value *PhiVal; @@ -937,8 +942,8 @@ Instruction *InstCombinerImpl::foldPHIArgOpIntoPHI(PHINode &PN) { BinOp = BinaryOperator::Create(BinOp->getOpcode(), PhiVal, ConstantOp); BinOp->copyIRFlags(PN.getIncomingValue(0)); - for (unsigned i = 1, e = PN.getNumIncomingValues(); i != e; ++i) - BinOp->andIRFlags(PN.getIncomingValue(i)); + for (Value *V : drop_begin(PN.incoming_values())) + BinOp->andIRFlags(V); PHIArgMergedDebugLoc(BinOp, PN); return BinOp; @@ -952,8 +957,8 @@ Instruction *InstCombinerImpl::foldPHIArgOpIntoPHI(PHINode &PN) { } /// Return true if this PHI node is only used by a PHI node cycle that is dead. -static bool DeadPHICycle(PHINode *PN, - SmallPtrSetImpl<PHINode*> &PotentiallyDeadPHIs) { +static bool isDeadPHICycle(PHINode *PN, + SmallPtrSetImpl<PHINode *> &PotentiallyDeadPHIs) { if (PN->use_empty()) return true; if (!PN->hasOneUse()) return false; @@ -966,7 +971,7 @@ static bool DeadPHICycle(PHINode *PN, return false; if (PHINode *PU = dyn_cast<PHINode>(PN->user_back())) - return DeadPHICycle(PU, PotentiallyDeadPHIs); + return isDeadPHICycle(PU, PotentiallyDeadPHIs); return false; } @@ -999,7 +1004,7 @@ static bool PHIsEqualValue(PHINode *PN, Value *NonPhiInVal, /// Return an existing non-zero constant if this phi node has one, otherwise /// return constant 1. -static ConstantInt *GetAnyNonZeroConstInt(PHINode &PN) { +static ConstantInt *getAnyNonZeroConstInt(PHINode &PN) { assert(isa<IntegerType>(PN.getType()) && "Expect only integer type phi"); for (Value *V : PN.operands()) if (auto *ConstVA = dyn_cast<ConstantInt>(V)) @@ -1014,8 +1019,8 @@ struct PHIUsageRecord { unsigned Shift; // The amount shifted. Instruction *Inst; // The trunc instruction. - PHIUsageRecord(unsigned pn, unsigned Sh, Instruction *User) - : PHIId(pn), Shift(Sh), Inst(User) {} + PHIUsageRecord(unsigned Pn, unsigned Sh, Instruction *User) + : PHIId(Pn), Shift(Sh), Inst(User) {} bool operator<(const PHIUsageRecord &RHS) const { if (PHIId < RHS.PHIId) return true; @@ -1032,12 +1037,11 @@ struct LoweredPHIRecord { unsigned Shift; // The amount shifted. unsigned Width; // The width extracted. - LoweredPHIRecord(PHINode *pn, unsigned Sh, Type *Ty) - : PN(pn), Shift(Sh), Width(Ty->getPrimitiveSizeInBits()) {} + LoweredPHIRecord(PHINode *Phi, unsigned Sh, Type *Ty) + : PN(Phi), Shift(Sh), Width(Ty->getPrimitiveSizeInBits()) {} // Ctor form used by DenseMap. - LoweredPHIRecord(PHINode *pn, unsigned Sh) - : PN(pn), Shift(Sh), Width(0) {} + LoweredPHIRecord(PHINode *Phi, unsigned Sh) : PN(Phi), Shift(Sh), Width(0) {} }; } // namespace @@ -1093,10 +1097,13 @@ Instruction *InstCombinerImpl::SliceUpIllegalIntegerPHI(PHINode &FirstPhi) { // input is defined in the predecessor, then we won't be split the critical // edge which is required to insert a truncate. Because of this, we have to // bail out. - for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { - InvokeInst *II = dyn_cast<InvokeInst>(PN->getIncomingValue(i)); - if (!II) continue; - if (II->getParent() != PN->getIncomingBlock(i)) + for (auto Incoming : zip(PN->blocks(), PN->incoming_values())) { + BasicBlock *BB = std::get<0>(Incoming); + Value *V = std::get<1>(Incoming); + InvokeInst *II = dyn_cast<InvokeInst>(V); + if (!II) + continue; + if (II->getParent() != BB) continue; // If we have a phi, and if it's directly in the predecessor, then we have @@ -1146,8 +1153,8 @@ Instruction *InstCombinerImpl::SliceUpIllegalIntegerPHI(PHINode &FirstPhi) { array_pod_sort(PHIUsers.begin(), PHIUsers.end()); LLVM_DEBUG(dbgs() << "SLICING UP PHI: " << FirstPhi << '\n'; - for (unsigned i = 1, e = PHIsToSlice.size(); i != e; ++i) dbgs() - << "AND USER PHI #" << i << ": " << *PHIsToSlice[i] << '\n';); + for (unsigned I = 1; I != PHIsToSlice.size(); ++I) dbgs() + << "AND USER PHI #" << I << ": " << *PHIsToSlice[I] << '\n'); // PredValues - This is a temporary used when rewriting PHI nodes. It is // hoisted out here to avoid construction/destruction thrashing. @@ -1175,8 +1182,9 @@ Instruction *InstCombinerImpl::SliceUpIllegalIntegerPHI(PHINode &FirstPhi) { assert(EltPHI->getType() != PN->getType() && "Truncate didn't shrink phi?"); - for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { - BasicBlock *Pred = PN->getIncomingBlock(i); + for (auto Incoming : zip(PN->blocks(), PN->incoming_values())) { + BasicBlock *Pred = std::get<0>(Incoming); + Value *InVal = std::get<1>(Incoming); Value *&PredVal = PredValues[Pred]; // If we already have a value for this predecessor, reuse it. @@ -1186,7 +1194,6 @@ Instruction *InstCombinerImpl::SliceUpIllegalIntegerPHI(PHINode &FirstPhi) { } // Handle the PHI self-reuse case. - Value *InVal = PN->getIncomingValue(i); if (InVal == PN) { PredVal = EltPHI; EltPHI->addIncoming(PredVal, Pred); @@ -1207,8 +1214,8 @@ Instruction *InstCombinerImpl::SliceUpIllegalIntegerPHI(PHINode &FirstPhi) { Builder.SetInsertPoint(Pred->getTerminator()); Value *Res = InVal; if (Offset) - Res = Builder.CreateLShr(Res, ConstantInt::get(InVal->getType(), - Offset), "extract"); + Res = Builder.CreateLShr( + Res, ConstantInt::get(InVal->getType(), Offset), "extract"); Res = Builder.CreateTrunc(Res, Ty, "extract.t"); PredVal = Res; EltPHI->addIncoming(Res, Pred); @@ -1217,12 +1224,12 @@ Instruction *InstCombinerImpl::SliceUpIllegalIntegerPHI(PHINode &FirstPhi) { // rewriting, we will ultimately delete the code we inserted. This // means we need to revisit that PHI to make sure we extract out the // needed piece. - if (PHINode *OldInVal = dyn_cast<PHINode>(PN->getIncomingValue(i))) + if (PHINode *OldInVal = dyn_cast<PHINode>(InVal)) if (PHIsInspected.count(OldInVal)) { unsigned RefPHIId = find(PHIsToSlice, OldInVal) - PHIsToSlice.begin(); - PHIUsers.push_back(PHIUsageRecord(RefPHIId, Offset, - cast<Instruction>(Res))); + PHIUsers.push_back( + PHIUsageRecord(RefPHIId, Offset, cast<Instruction>(Res))); ++UserE; } } @@ -1240,12 +1247,12 @@ Instruction *InstCombinerImpl::SliceUpIllegalIntegerPHI(PHINode &FirstPhi) { // Replace all the remaining uses of the PHI nodes (self uses and the lshrs) // with poison. Value *Poison = PoisonValue::get(FirstPhi.getType()); - for (unsigned i = 1, e = PHIsToSlice.size(); i != e; ++i) - replaceInstUsesWith(*PHIsToSlice[i], Poison); + for (PHINode *PHI : drop_begin(PHIsToSlice)) + replaceInstUsesWith(*PHI, Poison); return replaceInstUsesWith(FirstPhi, Poison); } -static Value *SimplifyUsingControlFlow(InstCombiner &Self, PHINode &PN, +static Value *simplifyUsingControlFlow(InstCombiner &Self, PHINode &PN, const DominatorTree &DT) { // Simplify the following patterns: // if (cond) @@ -1302,8 +1309,8 @@ static Value *SimplifyUsingControlFlow(InstCombiner &Self, PHINode &PN, DT.dominates(FalseOutEdge, FalseIncEdge)) // This Phi is actually equivalent to branching condition of IDom. return Cond; - else if (DT.dominates(TrueOutEdge, FalseIncEdge) && - DT.dominates(FalseOutEdge, TrueIncEdge)) { + if (DT.dominates(TrueOutEdge, FalseIncEdge) && + DT.dominates(FalseOutEdge, TrueIncEdge)) { // This Phi is actually opposite to branching condition of IDom. We invert // the condition that will potentially open up some opportunities for // sinking. @@ -1369,7 +1376,7 @@ Instruction *InstCombinerImpl::visitPHINode(PHINode &PN) { if (PHINode *PU = dyn_cast<PHINode>(PHIUser)) { SmallPtrSet<PHINode*, 16> PotentiallyDeadPHIs; PotentiallyDeadPHIs.insert(&PN); - if (DeadPHICycle(PU, PotentiallyDeadPHIs)) + if (isDeadPHICycle(PU, PotentiallyDeadPHIs)) return replaceInstUsesWith(PN, PoisonValue::get(PN.getType())); } @@ -1398,15 +1405,15 @@ Instruction *InstCombinerImpl::visitPHINode(PHINode &PN) { match(CmpInst->getOperand(1), m_Zero())) { ConstantInt *NonZeroConst = nullptr; bool MadeChange = false; - for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i) { - Instruction *CtxI = PN.getIncomingBlock(i)->getTerminator(); - Value *VA = PN.getIncomingValue(i); + for (unsigned I = 0, E = PN.getNumIncomingValues(); I != E; ++I) { + Instruction *CtxI = PN.getIncomingBlock(I)->getTerminator(); + Value *VA = PN.getIncomingValue(I); if (isKnownNonZero(VA, DL, 0, &AC, CtxI, &DT)) { if (!NonZeroConst) - NonZeroConst = GetAnyNonZeroConstInt(PN); + NonZeroConst = getAnyNonZeroConstInt(PN); if (NonZeroConst != VA) { - replaceOperand(PN, i, NonZeroConst); + replaceOperand(PN, I, NonZeroConst); MadeChange = true; } } @@ -1457,17 +1464,17 @@ Instruction *InstCombinerImpl::visitPHINode(PHINode &PN) { // however. PHINode *FirstPN = cast<PHINode>(PN.getParent()->begin()); if (&PN != FirstPN) - for (unsigned i = 0, e = FirstPN->getNumIncomingValues(); i != e; ++i) { - BasicBlock *BBA = PN.getIncomingBlock(i); - BasicBlock *BBB = FirstPN->getIncomingBlock(i); + for (unsigned I = 0, E = FirstPN->getNumIncomingValues(); I != E; ++I) { + BasicBlock *BBA = PN.getIncomingBlock(I); + BasicBlock *BBB = FirstPN->getIncomingBlock(I); if (BBA != BBB) { - Value *VA = PN.getIncomingValue(i); - unsigned j = PN.getBasicBlockIndex(BBB); - Value *VB = PN.getIncomingValue(j); - PN.setIncomingBlock(i, BBB); - PN.setIncomingValue(i, VB); - PN.setIncomingBlock(j, BBA); - PN.setIncomingValue(j, VA); + Value *VA = PN.getIncomingValue(I); + unsigned J = PN.getBasicBlockIndex(BBB); + Value *VB = PN.getIncomingValue(J); + PN.setIncomingBlock(I, BBB); + PN.setIncomingValue(I, VB); + PN.setIncomingBlock(J, BBA); + PN.setIncomingValue(J, VA); // NOTE: Instcombine normally would want us to "return &PN" if we // modified any of the operands of an instruction. However, since we // aren't adding or removing uses (just rearranging them) we don't do @@ -1500,7 +1507,7 @@ Instruction *InstCombinerImpl::visitPHINode(PHINode &PN) { return Res; // Ultimately, try to replace this Phi with a dominating condition. - if (auto *V = SimplifyUsingControlFlow(*this, PN, DT)) + if (auto *V = simplifyUsingControlFlow(*this, PN, DT)) return replaceInstUsesWith(PN, V); return nullptr; diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp index 71a5ae24eead..3f064cfda712 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp @@ -1219,7 +1219,7 @@ Value *InstCombinerImpl::SimplifyDemandedVectorElts(Value *V, for (auto I = gep_type_begin(GEP), E = gep_type_end(GEP); I != E; I++) if (I.isStruct()) - return true;; + return true; return false; }; if (mayIndexStructType(cast<GetElementPtrInst>(*I))) @@ -1228,10 +1228,11 @@ Value *InstCombinerImpl::SimplifyDemandedVectorElts(Value *V, // Conservatively track the demanded elements back through any vector // operands we may have. We know there must be at least one, or we // wouldn't have a vector result to get here. Note that we intentionally - // merge the undef bits here since gepping with either an undef base or - // index results in undef. + // merge the undef bits here since gepping with either an poison base or + // index results in poison. for (unsigned i = 0; i < I->getNumOperands(); i++) { - if (match(I->getOperand(i), m_Undef())) { + if (i == 0 ? match(I->getOperand(i), m_Undef()) + : match(I->getOperand(i), m_Poison())) { // If the entire vector is undefined, just return this info. UndefElts = EltMask; return nullptr; @@ -1239,7 +1240,11 @@ Value *InstCombinerImpl::SimplifyDemandedVectorElts(Value *V, if (I->getOperand(i)->getType()->isVectorTy()) { APInt UndefEltsOp(VWidth, 0); simplifyAndSetOp(I, i, DemandedElts, UndefEltsOp); - UndefElts |= UndefEltsOp; + // gep(x, undef) is not undef, so skip considering idx ops here + // Note that we could propagate poison, but we can't distinguish between + // undef & poison bits ATM + if (i == 0) + UndefElts |= UndefEltsOp; } } diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp index 029be5257694..3091905ca534 100644 --- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -68,6 +68,7 @@ #include "llvm/IR/Constants.h" #include "llvm/IR/DIBuilder.h" #include "llvm/IR/DataLayout.h" +#include "llvm/IR/DebugInfo.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" diff --git a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp index 6e72255e51ae..8f94172a6402 100644 --- a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp +++ b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp @@ -1527,22 +1527,22 @@ void AddressSanitizer::getInterestingMemoryOperands( return; if (LoadInst *LI = dyn_cast<LoadInst>(I)) { - if (!ClInstrumentReads || ignoreAccess(LI, LI->getPointerOperand())) + if (!ClInstrumentReads || ignoreAccess(I, LI->getPointerOperand())) return; Interesting.emplace_back(I, LI->getPointerOperandIndex(), false, LI->getType(), LI->getAlign()); } else if (StoreInst *SI = dyn_cast<StoreInst>(I)) { - if (!ClInstrumentWrites || ignoreAccess(LI, SI->getPointerOperand())) + if (!ClInstrumentWrites || ignoreAccess(I, SI->getPointerOperand())) return; Interesting.emplace_back(I, SI->getPointerOperandIndex(), true, SI->getValueOperand()->getType(), SI->getAlign()); } else if (AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(I)) { - if (!ClInstrumentAtomics || ignoreAccess(LI, RMW->getPointerOperand())) + if (!ClInstrumentAtomics || ignoreAccess(I, RMW->getPointerOperand())) return; Interesting.emplace_back(I, RMW->getPointerOperandIndex(), true, RMW->getValOperand()->getType(), None); } else if (AtomicCmpXchgInst *XCHG = dyn_cast<AtomicCmpXchgInst>(I)) { - if (!ClInstrumentAtomics || ignoreAccess(LI, XCHG->getPointerOperand())) + if (!ClInstrumentAtomics || ignoreAccess(I, XCHG->getPointerOperand())) return; Interesting.emplace_back(I, XCHG->getPointerOperandIndex(), true, XCHG->getCompareOperand()->getType(), None); @@ -1556,7 +1556,7 @@ void AddressSanitizer::getInterestingMemoryOperands( return; auto BasePtr = CI->getOperand(OpOffset); - if (ignoreAccess(LI, BasePtr)) + if (ignoreAccess(I, BasePtr)) return; Type *Ty = IsWrite ? CI->getArgOperand(0)->getType() : CI->getType(); MaybeAlign Alignment = Align(1); @@ -1568,7 +1568,7 @@ void AddressSanitizer::getInterestingMemoryOperands( } else { for (unsigned ArgNo = 0; ArgNo < CI->arg_size(); ArgNo++) { if (!ClInstrumentByval || !CI->isByValArgument(ArgNo) || - ignoreAccess(LI, CI->getArgOperand(ArgNo))) + ignoreAccess(I, CI->getArgOperand(ArgNo))) continue; Type *Ty = CI->getParamByValType(ArgNo); Interesting.emplace_back(I, ArgNo, false, Ty, Align(1)); diff --git a/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp index fb10a99d1338..7b3741d19a1b 100644 --- a/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp +++ b/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp @@ -304,6 +304,7 @@ public: static bool isStandardLifetime(const AllocaInfo &AllocaInfo, const DominatorTree &DT); bool instrumentStack( + bool ShouldDetectUseAfterScope, MapVector<AllocaInst *, AllocaInfo> &AllocasToInstrument, SmallVector<Instruction *, 4> &UnrecognizedLifetimes, DenseMap<AllocaInst *, std::vector<DbgVariableIntrinsic *>> &AllocaDbgMap, @@ -1359,6 +1360,7 @@ bool HWAddressSanitizer::isStandardLifetime(const AllocaInfo &AllocaInfo, } bool HWAddressSanitizer::instrumentStack( + bool ShouldDetectUseAfterScope, MapVector<AllocaInst *, AllocaInfo> &AllocasToInstrument, SmallVector<Instruction *, 4> &UnrecognizedLifetimes, DenseMap<AllocaInst *, std::vector<DbgVariableIntrinsic *>> &AllocaDbgMap, @@ -1410,7 +1412,7 @@ bool HWAddressSanitizer::instrumentStack( }; bool StandardLifetime = UnrecognizedLifetimes.empty() && isStandardLifetime(Info, GetDT()); - if (DetectUseAfterScope && StandardLifetime) { + if (ShouldDetectUseAfterScope && StandardLifetime) { IntrinsicInst *Start = Info.LifetimeStart[0]; IRB.SetInsertPoint(Start->getNextNode()); tagAlloca(IRB, AI, Tag, Size); @@ -1505,8 +1507,14 @@ bool HWAddressSanitizer::sanitizeFunction( SmallVector<Instruction *, 8> LandingPadVec; SmallVector<Instruction *, 4> UnrecognizedLifetimes; DenseMap<AllocaInst *, std::vector<DbgVariableIntrinsic *>> AllocaDbgMap; + bool CallsReturnTwice = false; for (auto &BB : F) { for (auto &Inst : BB) { + if (CallInst *CI = dyn_cast<CallInst>(&Inst)) { + if (CI->canReturnTwice()) { + CallsReturnTwice = true; + } + } if (InstrumentStack) { if (AllocaInst *AI = dyn_cast<AllocaInst>(&Inst)) { if (isInterestingAlloca(*AI)) @@ -1531,9 +1539,14 @@ bool HWAddressSanitizer::sanitizeFunction( } } - if (isa<ReturnInst>(Inst) || isa<ResumeInst>(Inst) || - isa<CleanupReturnInst>(Inst)) + if (isa<ReturnInst>(Inst)) { + if (CallInst *CI = Inst.getParent()->getTerminatingMustTailCall()) + RetVec.push_back(CI); + else + RetVec.push_back(&Inst); + } else if (isa<ResumeInst, CleanupReturnInst>(Inst)) { RetVec.push_back(&Inst); + } if (auto *DVI = dyn_cast<DbgVariableIntrinsic>(&Inst)) { for (Value *V : DVI->location_ops()) { @@ -1585,7 +1598,12 @@ bool HWAddressSanitizer::sanitizeFunction( if (!AllocasToInstrument.empty()) { Value *StackTag = ClGenerateTagsWithCalls ? nullptr : getStackBaseTag(EntryIRB); - instrumentStack(AllocasToInstrument, UnrecognizedLifetimes, AllocaDbgMap, + // Calls to functions that may return twice (e.g. setjmp) confuse the + // postdominator analysis, and will leave us to keep memory tagged after + // function return. Work around this by always untagging at every return + // statement if return_twice functions are called. + instrumentStack(DetectUseAfterScope && !CallsReturnTwice, + AllocasToInstrument, UnrecognizedLifetimes, AllocaDbgMap, RetVec, StackTag, GetDT, GetPDT); } // Pad and align each of the allocas that we instrumented to stop small diff --git a/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp b/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp index ab179b03dd29..6868408ef5f5 100644 --- a/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp +++ b/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp @@ -456,6 +456,9 @@ bool InstrProfiling::lowerIntrinsics(Function *F) { } else if (auto *IPI = dyn_cast<InstrProfIncrementInst>(&Instr)) { lowerIncrement(IPI); MadeChange = true; + } else if (auto *IPC = dyn_cast<InstrProfCoverInst>(&Instr)) { + lowerCover(IPC); + MadeChange = true; } else if (auto *IPVP = dyn_cast<InstrProfValueProfileInst>(&Instr)) { lowerValueProfileInst(IPVP); MadeChange = true; @@ -539,7 +542,8 @@ static bool containsProfilingIntrinsics(Module &M) { return !F->use_empty(); return false; }; - return containsIntrinsic(llvm::Intrinsic::instrprof_increment) || + return containsIntrinsic(llvm::Intrinsic::instrprof_cover) || + containsIntrinsic(llvm::Intrinsic::instrprof_increment) || containsIntrinsic(llvm::Intrinsic::instrprof_increment_step) || containsIntrinsic(llvm::Intrinsic::instrprof_value_profile); } @@ -689,47 +693,58 @@ void InstrProfiling::lowerValueProfileInst(InstrProfValueProfileInst *Ind) { Ind->eraseFromParent(); } -void InstrProfiling::lowerIncrement(InstrProfIncrementInst *Inc) { - GlobalVariable *Counters = getOrCreateRegionCounters(Inc); - - IRBuilder<> Builder(Inc); - uint64_t Index = Inc->getIndex()->getZExtValue(); - Value *Addr = Builder.CreateConstInBoundsGEP2_32(Counters->getValueType(), - Counters, 0, Index); - - if (isRuntimeCounterRelocationEnabled()) { - Type *Int64Ty = Type::getInt64Ty(M->getContext()); - Type *Int64PtrTy = Type::getInt64PtrTy(M->getContext()); - Function *Fn = Inc->getParent()->getParent(); - Instruction &I = Fn->getEntryBlock().front(); - LoadInst *LI = dyn_cast<LoadInst>(&I); - if (!LI) { - IRBuilder<> Builder(&I); - GlobalVariable *Bias = - M->getGlobalVariable(getInstrProfCounterBiasVarName()); - if (!Bias) { - // Compiler must define this variable when runtime counter relocation - // is being used. Runtime has a weak external reference that is used - // to check whether that's the case or not. - Bias = new GlobalVariable( - *M, Int64Ty, false, GlobalValue::LinkOnceODRLinkage, - Constant::getNullValue(Int64Ty), getInstrProfCounterBiasVarName()); - Bias->setVisibility(GlobalVariable::HiddenVisibility); - // A definition that's weak (linkonce_odr) without being in a COMDAT - // section wouldn't lead to link errors, but it would lead to a dead - // data word from every TU but one. Putting it in COMDAT ensures there - // will be exactly one data slot in the link. - if (TT.supportsCOMDAT()) - Bias->setComdat(M->getOrInsertComdat(Bias->getName())); - } - LI = Builder.CreateLoad(Int64Ty, Bias); +Value *InstrProfiling::getCounterAddress(InstrProfInstBase *I) { + auto *Counters = getOrCreateRegionCounters(I); + IRBuilder<> Builder(I); + + auto *Addr = Builder.CreateConstInBoundsGEP2_32( + Counters->getValueType(), Counters, 0, I->getIndex()->getZExtValue()); + + if (!isRuntimeCounterRelocationEnabled()) + return Addr; + + Type *Int64Ty = Type::getInt64Ty(M->getContext()); + Function *Fn = I->getParent()->getParent(); + Instruction &EntryI = Fn->getEntryBlock().front(); + LoadInst *LI = dyn_cast<LoadInst>(&EntryI); + if (!LI) { + IRBuilder<> EntryBuilder(&EntryI); + auto *Bias = M->getGlobalVariable(getInstrProfCounterBiasVarName()); + if (!Bias) { + // Compiler must define this variable when runtime counter relocation + // is being used. Runtime has a weak external reference that is used + // to check whether that's the case or not. + Bias = new GlobalVariable( + *M, Int64Ty, false, GlobalValue::LinkOnceODRLinkage, + Constant::getNullValue(Int64Ty), getInstrProfCounterBiasVarName()); + Bias->setVisibility(GlobalVariable::HiddenVisibility); + // A definition that's weak (linkonce_odr) without being in a COMDAT + // section wouldn't lead to link errors, but it would lead to a dead + // data word from every TU but one. Putting it in COMDAT ensures there + // will be exactly one data slot in the link. + if (TT.supportsCOMDAT()) + Bias->setComdat(M->getOrInsertComdat(Bias->getName())); } - auto *Add = Builder.CreateAdd(Builder.CreatePtrToInt(Addr, Int64Ty), LI); - Addr = Builder.CreateIntToPtr(Add, Int64PtrTy); + LI = EntryBuilder.CreateLoad(Int64Ty, Bias); } + auto *Add = Builder.CreateAdd(Builder.CreatePtrToInt(Addr, Int64Ty), LI); + return Builder.CreateIntToPtr(Add, Addr->getType()); +} + +void InstrProfiling::lowerCover(InstrProfCoverInst *CoverInstruction) { + auto *Addr = getCounterAddress(CoverInstruction); + IRBuilder<> Builder(CoverInstruction); + // We store zero to represent that this block is covered. + Builder.CreateStore(Builder.getInt8(0), Addr); + CoverInstruction->eraseFromParent(); +} + +void InstrProfiling::lowerIncrement(InstrProfIncrementInst *Inc) { + auto *Addr = getCounterAddress(Inc); + IRBuilder<> Builder(Inc); if (Options.Atomic || AtomicCounterUpdateAll || - (Index == 0 && AtomicFirstCounter)) { + (Inc->getIndex()->isZeroValue() && AtomicFirstCounter)) { Builder.CreateAtomicRMW(AtomicRMWInst::Add, Addr, Inc->getStep(), MaybeAlign(), AtomicOrdering::Monotonic); } else { @@ -849,6 +864,31 @@ static bool needsRuntimeRegistrationOfSectionRange(const Triple &TT) { } GlobalVariable * +InstrProfiling::createRegionCounters(InstrProfInstBase *Inc, StringRef Name, + GlobalValue::LinkageTypes Linkage) { + uint64_t NumCounters = Inc->getNumCounters()->getZExtValue(); + auto &Ctx = M->getContext(); + GlobalVariable *GV; + if (isa<InstrProfCoverInst>(Inc)) { + auto *CounterTy = Type::getInt8Ty(Ctx); + auto *CounterArrTy = ArrayType::get(CounterTy, NumCounters); + // TODO: `Constant::getAllOnesValue()` does not yet accept an array type. + std::vector<Constant *> InitialValues(NumCounters, + Constant::getAllOnesValue(CounterTy)); + GV = new GlobalVariable(*M, CounterArrTy, false, Linkage, + ConstantArray::get(CounterArrTy, InitialValues), + Name); + GV->setAlignment(Align(1)); + } else { + auto *CounterTy = ArrayType::get(Type::getInt64Ty(Ctx), NumCounters); + GV = new GlobalVariable(*M, CounterTy, false, Linkage, + Constant::getNullValue(CounterTy), Name); + GV->setAlignment(Align(8)); + } + return GV; +} + +GlobalVariable * InstrProfiling::getOrCreateRegionCounters(InstrProfInstBase *Inc) { GlobalVariable *NamePtr = Inc->getName(); auto &PD = ProfileDataMap[NamePtr]; @@ -914,16 +954,11 @@ InstrProfiling::getOrCreateRegionCounters(InstrProfInstBase *Inc) { uint64_t NumCounters = Inc->getNumCounters()->getZExtValue(); LLVMContext &Ctx = M->getContext(); - ArrayType *CounterTy = ArrayType::get(Type::getInt64Ty(Ctx), NumCounters); - // Create the counters variable. - auto *CounterPtr = - new GlobalVariable(*M, CounterTy, false, Linkage, - Constant::getNullValue(CounterTy), CntsVarName); + auto *CounterPtr = createRegionCounters(Inc, CntsVarName, Linkage); CounterPtr->setVisibility(Visibility); CounterPtr->setSection( getInstrProfSectionName(IPSK_cnts, TT.getObjectFormat())); - CounterPtr->setAlignment(Align(8)); MaybeSetComdat(CounterPtr); CounterPtr->setLinkage(Linkage); PD.RegionCounters = CounterPtr; diff --git a/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp b/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp index 8fedefccf0e1..5e078f2c4212 100644 --- a/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp +++ b/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp @@ -26,6 +26,7 @@ #include "llvm/IR/GlobalValue.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/Instruction.h" +#include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" #include "llvm/IR/Type.h" diff --git a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp index cfe993dedbc2..c51acdf52f14 100644 --- a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp +++ b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp @@ -182,6 +182,7 @@ #include "llvm/IR/ValueMap.h" #include "llvm/InitializePasses.h" #include "llvm/Pass.h" +#include "llvm/Support/Alignment.h" #include "llvm/Support/AtomicOrdering.h" #include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" @@ -1718,11 +1719,10 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { // Figure out maximal valid memcpy alignment. const Align ArgAlign = DL.getValueOrABITypeAlignment( MaybeAlign(FArg.getParamAlignment()), FArg.getParamByValType()); - Value *CpShadowPtr = + Value *CpShadowPtr, *CpOriginPtr; + std::tie(CpShadowPtr, CpOriginPtr) = getShadowOriginPtr(V, EntryIRB, EntryIRB.getInt8Ty(), ArgAlign, - /*isStore*/ true) - .first; - // TODO(glider): need to copy origins. + /*isStore*/ true); if (!PropagateShadow || Overflow) { // ParamTLS overflow. EntryIRB.CreateMemSet( @@ -1735,6 +1735,19 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { CopyAlign, Size); LLVM_DEBUG(dbgs() << " ByValCpy: " << *Cpy << "\n"); (void)Cpy; + + if (MS.TrackOrigins) { + Value *OriginPtr = + getOriginPtrForArgument(&FArg, EntryIRB, ArgOffset); + // FIXME: OriginSize should be: + // alignTo(V % kMinOriginAlignment + Size, kMinOriginAlignment) + unsigned OriginSize = alignTo(Size, kMinOriginAlignment); + EntryIRB.CreateMemCpy( + CpOriginPtr, + /* by getShadowOriginPtr */ kMinOriginAlignment, OriginPtr, + /* by origin_tls[ArgOffset] */ kMinOriginAlignment, + OriginSize); + } } } @@ -3701,7 +3714,6 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { insertShadowCheck(A, &CB); Size = DL.getTypeAllocSize(A->getType()); } else { - bool ArgIsInitialized = false; Value *Store = nullptr; // Compute the Shadow for arg even if it is ByVal, because // in that case getShadow() will copy the actual arg shadow to @@ -3722,10 +3734,10 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { MaybeAlign Alignment = llvm::None; if (ParamAlignment) Alignment = std::min(*ParamAlignment, kShadowTLSAlignment); - Value *AShadowPtr = + Value *AShadowPtr, *AOriginPtr; + std::tie(AShadowPtr, AOriginPtr) = getShadowOriginPtr(A, IRB, IRB.getInt8Ty(), Alignment, - /*isStore*/ false) - .first; + /*isStore*/ false); if (!PropagateShadow) { Store = IRB.CreateMemSet(ArgShadowBase, Constant::getNullValue(IRB.getInt8Ty()), @@ -3733,6 +3745,17 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { } else { Store = IRB.CreateMemCpy(ArgShadowBase, Alignment, AShadowPtr, Alignment, Size); + if (MS.TrackOrigins) { + Value *ArgOriginBase = getOriginPtrForArgument(A, IRB, ArgOffset); + // FIXME: OriginSize should be: + // alignTo(A % kMinOriginAlignment + Size, kMinOriginAlignment) + unsigned OriginSize = alignTo(Size, kMinOriginAlignment); + IRB.CreateMemCpy( + ArgOriginBase, + /* by origin_tls[ArgOffset] */ kMinOriginAlignment, + AOriginPtr, + /* by getShadowOriginPtr */ kMinOriginAlignment, OriginSize); + } } } else { // Any other parameters mean we need bit-grained tracking of uninit @@ -3743,12 +3766,11 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { Store = IRB.CreateAlignedStore(ArgShadow, ArgShadowBase, kShadowTLSAlignment); Constant *Cst = dyn_cast<Constant>(ArgShadow); - if (Cst && Cst->isNullValue()) - ArgIsInitialized = true; + if (MS.TrackOrigins && !(Cst && Cst->isNullValue())) { + IRB.CreateStore(getOrigin(A), + getOriginPtrForArgument(A, IRB, ArgOffset)); + } } - if (MS.TrackOrigins && !ArgIsInitialized) - IRB.CreateStore(getOrigin(A), - getOriginPtrForArgument(A, IRB, ArgOffset)); (void)Store; assert(Store != nullptr); LLVM_DEBUG(dbgs() << " Param:" << *Store << "\n"); diff --git a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp index c46415e5b1f4..0902a94452e3 100644 --- a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp +++ b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp @@ -255,6 +255,11 @@ static cl::opt<bool> PGOInstrumentEntry( "pgo-instrument-entry", cl::init(false), cl::Hidden, cl::desc("Force to instrument function entry basicblock.")); +static cl::opt<bool> PGOFunctionEntryCoverage( + "pgo-function-entry-coverage", cl::init(false), cl::Hidden, cl::ZeroOrMore, + cl::desc( + "Use this option to enable function entry coverage instrumentation.")); + static cl::opt<bool> PGOFixEntryCount("pgo-fix-entry-count", cl::init(true), cl::Hidden, cl::desc("Fix function entry count in profile use.")); @@ -337,6 +342,33 @@ static const char *ValueProfKindDescr[] = { #include "llvm/ProfileData/InstrProfData.inc" }; +// Create a COMDAT variable INSTR_PROF_RAW_VERSION_VAR to make the runtime +// aware this is an ir_level profile so it can set the version flag. +static GlobalVariable *createIRLevelProfileFlagVar(Module &M, bool IsCS) { + const StringRef VarName(INSTR_PROF_QUOTE(INSTR_PROF_RAW_VERSION_VAR)); + Type *IntTy64 = Type::getInt64Ty(M.getContext()); + uint64_t ProfileVersion = (INSTR_PROF_RAW_VERSION | VARIANT_MASK_IR_PROF); + if (IsCS) + ProfileVersion |= VARIANT_MASK_CSIR_PROF; + if (PGOInstrumentEntry) + ProfileVersion |= VARIANT_MASK_INSTR_ENTRY; + if (DebugInfoCorrelate) + ProfileVersion |= VARIANT_MASK_DBG_CORRELATE; + if (PGOFunctionEntryCoverage) + ProfileVersion |= + VARIANT_MASK_BYTE_COVERAGE | VARIANT_MASK_FUNCTION_ENTRY_ONLY; + auto IRLevelVersionVariable = new GlobalVariable( + M, IntTy64, true, GlobalValue::WeakAnyLinkage, + Constant::getIntegerValue(IntTy64, APInt(64, ProfileVersion)), VarName); + IRLevelVersionVariable->setVisibility(GlobalValue::DefaultVisibility); + Triple TT(M.getTargetTriple()); + if (TT.supportsCOMDAT()) { + IRLevelVersionVariable->setLinkage(GlobalValue::ExternalLinkage); + IRLevelVersionVariable->setComdat(M.getOrInsertComdat(VarName)); + } + return IRLevelVersionVariable; +} + namespace { /// The select instruction visitor plays three roles specified @@ -469,9 +501,7 @@ private: createProfileFileNameVar(M, InstrProfileOutput); // The variable in a comdat may be discarded by LTO. Ensure the // declaration will be retained. - appendToCompilerUsed(M, createIRLevelProfileFlagVar(M, /*IsCS=*/true, - PGOInstrumentEntry, - DebugInfoCorrelate)); + appendToCompilerUsed(M, createIRLevelProfileFlagVar(M, /*IsCS=*/true)); return false; } std::string InstrProfileOutput; @@ -914,22 +944,39 @@ static void instrumentOneFunc( FuncPGOInstrumentation<PGOEdge, BBInfo> FuncInfo( F, TLI, ComdatMembers, true, BPI, BFI, IsCS, PGOInstrumentEntry); + + Type *I8PtrTy = Type::getInt8PtrTy(M->getContext()); + auto Name = ConstantExpr::getBitCast(FuncInfo.FuncNameVar, I8PtrTy); + auto CFGHash = ConstantInt::get(Type::getInt64Ty(M->getContext()), + FuncInfo.FunctionHash); + if (PGOFunctionEntryCoverage) { + assert(!IsCS && + "entry coverge does not support context-sensitive instrumentation"); + auto &EntryBB = F.getEntryBlock(); + IRBuilder<> Builder(&EntryBB, EntryBB.getFirstInsertionPt()); + // llvm.instrprof.cover(i8* <name>, i64 <hash>, i32 <num-counters>, + // i32 <index>) + Builder.CreateCall( + Intrinsic::getDeclaration(M, Intrinsic::instrprof_cover), + {Name, CFGHash, Builder.getInt32(1), Builder.getInt32(0)}); + return; + } + std::vector<BasicBlock *> InstrumentBBs; FuncInfo.getInstrumentBBs(InstrumentBBs); unsigned NumCounters = InstrumentBBs.size() + FuncInfo.SIVisitor.getNumOfSelectInsts(); uint32_t I = 0; - Type *I8PtrTy = Type::getInt8PtrTy(M->getContext()); for (auto *InstrBB : InstrumentBBs) { IRBuilder<> Builder(InstrBB, InstrBB->getFirstInsertionPt()); assert(Builder.GetInsertPoint() != InstrBB->end() && "Cannot get the Instrumentation point"); + // llvm.instrprof.increment(i8* <name>, i64 <hash>, i32 <num-counters>, + // i32 <index>) Builder.CreateCall( Intrinsic::getDeclaration(M, Intrinsic::instrprof_increment), - {ConstantExpr::getBitCast(FuncInfo.FuncNameVar, I8PtrTy), - Builder.getInt64(FuncInfo.FunctionHash), Builder.getInt32(NumCounters), - Builder.getInt32(I++)}); + {Name, CFGHash, Builder.getInt32(NumCounters), Builder.getInt32(I++)}); } // Now instrument select instructions: @@ -1502,6 +1549,8 @@ void PGOUseFunc::annotateIrrLoopHeaderWeights() { } void SelectInstVisitor::instrumentOneSelectInst(SelectInst &SI) { + if (PGOFunctionEntryCoverage) + return; Module *M = F.getParent(); IRBuilder<> Builder(&SI); Type *Int64Ty = Builder.getInt64Ty(); @@ -1622,8 +1671,7 @@ static bool InstrumentAllFunctions( // For the context-sensitve instrumentation, we should have a separated pass // (before LTO/ThinLTO linking) to create these variables. if (!IsCS) - createIRLevelProfileFlagVar(M, /*IsCS=*/false, PGOInstrumentEntry, - DebugInfoCorrelate); + createIRLevelProfileFlagVar(M, /*IsCS=*/false); std::unordered_multimap<Comdat *, GlobalValue *> ComdatMembers; collectComdatMembers(M, ComdatMembers); @@ -1645,9 +1693,7 @@ PGOInstrumentationGenCreateVar::run(Module &M, ModuleAnalysisManager &AM) { createProfileFileNameVar(M, CSInstrName); // The variable in a comdat may be discarded by LTO. Ensure the declaration // will be retained. - appendToCompilerUsed(M, createIRLevelProfileFlagVar(M, /*IsCS=*/true, - PGOInstrumentEntry, - DebugInfoCorrelate)); + appendToCompilerUsed(M, createIRLevelProfileFlagVar(M, /*IsCS=*/true)); return PreservedAnalyses::all(); } @@ -1844,6 +1890,18 @@ static bool annotateAllFunctions( ProfileFileName.data(), "Not an IR level instrumentation profile")); return false; } + if (PGOReader->hasSingleByteCoverage()) { + Ctx.diagnose(DiagnosticInfoPGOProfile( + ProfileFileName.data(), + "Cannot use coverage profiles for optimization")); + return false; + } + if (PGOReader->functionEntryOnly()) { + Ctx.diagnose(DiagnosticInfoPGOProfile( + ProfileFileName.data(), + "Function entry profiles are not yet supported for optimization")); + return false; + } // Add the profile summary (read from the header of the indexed summary) here // so that we can use it below when reading counters (which checks if the diff --git a/llvm/lib/Transforms/ObjCARC/ObjCARC.cpp b/llvm/lib/Transforms/ObjCARC/ObjCARC.cpp index 1ca6ddabac5b..126845bb3308 100644 --- a/llvm/lib/Transforms/ObjCARC/ObjCARC.cpp +++ b/llvm/lib/Transforms/ObjCARC/ObjCARC.cpp @@ -123,20 +123,9 @@ BundledRetainClaimRVs::~BundledRetainClaimRVs() { // can't be tail calls. if (auto *CI = dyn_cast<CallInst>(CB)) CI->setTailCallKind(CallInst::TCK_NoTail); - - if (UseMarker) { - // Remove the retainRV/claimRV function operand from the operand bundle - // to reflect the fact that the backend is responsible for emitting only - // the marker instruction, but not the retainRV/claimRV call. - OperandBundleDef OB("clang.arc.attachedcall", None); - auto *NewCB = CallBase::Create(CB, OB, CB); - CB->replaceAllUsesWith(NewCB); - CB->eraseFromParent(); - } } - if (!ContractPass || !UseMarker) - EraseInstruction(P.first); + EraseInstruction(P.first); } RVCalls.clear(); diff --git a/llvm/lib/Transforms/ObjCARC/ObjCARC.h b/llvm/lib/Transforms/ObjCARC/ObjCARC.h index 2b47bec7ffe8..62f88a8cc02b 100644 --- a/llvm/lib/Transforms/ObjCARC/ObjCARC.h +++ b/llvm/lib/Transforms/ObjCARC/ObjCARC.h @@ -105,8 +105,7 @@ CallInst *createCallInstWithColors( class BundledRetainClaimRVs { public: - BundledRetainClaimRVs(bool ContractPass, bool UseMarker) - : ContractPass(ContractPass), UseMarker(UseMarker) {} + BundledRetainClaimRVs(bool ContractPass) : ContractPass(ContractPass) {} ~BundledRetainClaimRVs(); /// Insert a retainRV/claimRV call to the normal destination blocks of invokes @@ -156,9 +155,6 @@ private: DenseMap<CallInst *, CallBase *> RVCalls; bool ContractPass; - - /// Indicates whether the target uses a special inline-asm marker. - bool UseMarker; }; } // end namespace objcarc diff --git a/llvm/lib/Transforms/ObjCARC/ObjCARCContract.cpp b/llvm/lib/Transforms/ObjCARC/ObjCARCContract.cpp index 9e2832827686..2985ae004d3c 100644 --- a/llvm/lib/Transforms/ObjCARC/ObjCARCContract.cpp +++ b/llvm/lib/Transforms/ObjCARC/ObjCARCContract.cpp @@ -434,23 +434,20 @@ bool ObjCARCContract::tryToPeepholeInstruction( LLVM_FALLTHROUGH; case ARCInstKind::RetainRV: case ARCInstKind::UnsafeClaimRV: { - bool IsInstContainedInBundle = BundledInsts->contains(Inst); - - // Return now if the target doesn't need a special inline-asm marker. Return - // true if this is a bundled retainRV/claimRV call, which is going to be - // erased at the end of this pass, to avoid undoing objc-arc-expand and + // Return true if this is a bundled retainRV/claimRV call, which is always + // redundant with the attachedcall in the bundle, and is going to be erased + // at the end of this pass. This avoids undoing objc-arc-expand and // replacing uses of the retainRV/claimRV call's argument with its result. - if (!RVInstMarker) - return IsInstContainedInBundle; - - // The target needs a special inline-asm marker. + if (BundledInsts->contains(Inst)) + return true; - // We don't have to emit the marker if this is a bundled call since the - // backend is responsible for emitting it. Return false to undo - // objc-arc-expand. - if (IsInstContainedInBundle) + // If this isn't a bundled call, and the target doesn't need a special + // inline-asm marker, we're done: return now, and undo objc-arc-expand. + if (!RVInstMarker) return false; + // The target needs a special inline-asm marker. Insert it. + BasicBlock::iterator BBI = Inst->getIterator(); BasicBlock *InstParent = Inst->getParent(); @@ -548,7 +545,7 @@ bool ObjCARCContract::run(Function &F, AAResults *A, DominatorTree *D) { AA = A; DT = D; PA.setAA(A); - BundledRetainClaimRVs BRV(true, RVInstMarker); + BundledRetainClaimRVs BRV(/*ContractPass=*/true); BundledInsts = &BRV; std::pair<bool, bool> R = BundledInsts->insertAfterInvokes(F, DT); diff --git a/llvm/lib/Transforms/ObjCARC/ObjCARCOpts.cpp b/llvm/lib/Transforms/ObjCARC/ObjCARCOpts.cpp index b6dc97f1e43f..e1a000b31cf9 100644 --- a/llvm/lib/Transforms/ObjCARC/ObjCARCOpts.cpp +++ b/llvm/lib/Transforms/ObjCARC/ObjCARCOpts.cpp @@ -2459,7 +2459,7 @@ bool ObjCARCOpt::run(Function &F, AAResults &AA) { return false; Changed = CFGChanged = false; - BundledRetainClaimRVs BRV(false, objcarc::getRVInstMarker(*F.getParent())); + BundledRetainClaimRVs BRV(/*ContractPass=*/false); BundledInsts = &BRV; LLVM_DEBUG(dbgs() << "<<< ObjCARCOpt: Visiting Function: " << F.getName() diff --git a/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp b/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp index dda1a2f08076..143a78f604fc 100644 --- a/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp +++ b/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp @@ -357,7 +357,7 @@ typedef DenseMap<BasicBlock *, CloneList> DuplicateBlockMap; // This map keeps track of all the new definitions for an instruction. This // information is needed when restoring SSA form after cloning blocks. -typedef DenseMap<Instruction *, std::vector<Instruction *>> DefMap; +typedef MapVector<Instruction *, std::vector<Instruction *>> DefMap; inline raw_ostream &operator<<(raw_ostream &OS, const PathType &Path) { OS << "< "; @@ -1126,6 +1126,9 @@ private: /// Add new value mappings to the DefMap to keep track of all new definitions /// for a particular instruction. These will be used while updating SSA form. void updateDefMap(DefMap &NewDefs, ValueToValueMapTy &VMap) { + SmallVector<std::pair<Instruction *, Instruction *>> NewDefsVector; + NewDefsVector.reserve(VMap.size()); + for (auto Entry : VMap) { Instruction *Inst = dyn_cast<Instruction>(const_cast<Value *>(Entry.first)); @@ -1138,11 +1141,18 @@ private: if (!Cloned) continue; - if (NewDefs.find(Inst) == NewDefs.end()) - NewDefs[Inst] = {Cloned}; - else - NewDefs[Inst].push_back(Cloned); + NewDefsVector.push_back({Inst, Cloned}); } + + // Sort the defs to get deterministic insertion order into NewDefs. + sort(NewDefsVector, [](const auto &LHS, const auto &RHS) { + if (LHS.first == RHS.first) + return LHS.second->comesBefore(RHS.second); + return LHS.first->comesBefore(RHS.first); + }); + + for (const auto &KV : NewDefsVector) + NewDefs[KV.first].push_back(KV.second); } /// Update the last branch of a particular cloned path to point to the correct diff --git a/llvm/lib/Transforms/Scalar/LoopFuse.cpp b/llvm/lib/Transforms/Scalar/LoopFuse.cpp index ca19913e37ee..bf4d275e04ba 100644 --- a/llvm/lib/Transforms/Scalar/LoopFuse.cpp +++ b/llvm/lib/Transforms/Scalar/LoopFuse.cpp @@ -192,6 +192,7 @@ struct FusionCandidate { GuardBranch(L->getLoopGuardBranch()), PP(PP), AbleToPeel(canPeel(L)), Peeled(false), DT(DT), PDT(PDT), ORE(ORE) { + assert(DT && "Expected non-null DT!"); // Walk over all blocks in the loop and check for conditions that may // prevent fusion. For each block, walk over all instructions and collect // the memory reads and writes If any instructions that prevent fusion are @@ -767,7 +768,7 @@ private: LLVM_DEBUG(dbgs() << "Attempting to peel first " << PeelCount << " iterations of the first loop. \n"); - FC0.Peeled = peelLoop(FC0.L, PeelCount, &LI, &SE, &DT, &AC, true); + FC0.Peeled = peelLoop(FC0.L, PeelCount, &LI, &SE, DT, &AC, true); if (FC0.Peeled) { LLVM_DEBUG(dbgs() << "Done Peeling\n"); diff --git a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp index 35ba4e2b4032..318c4c06f0f7 100644 --- a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp +++ b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp @@ -1172,8 +1172,15 @@ bool LoopIdiomRecognize::processLoopStridedStore( CallInst *NewCall; if (SplatValue) { - NewCall = Builder.CreateMemSet(BasePtr, SplatValue, NumBytes, - MaybeAlign(StoreAlignment)); + AAMDNodes AATags = TheStore->getAAMetadata(); + if (auto CI = dyn_cast<ConstantInt>(NumBytes)) + AATags = AATags.extendTo(CI->getZExtValue()); + else + AATags = AATags.extendTo(-1); + + NewCall = Builder.CreateMemSet( + BasePtr, SplatValue, NumBytes, MaybeAlign(StoreAlignment), + /*isVolatile=*/false, AATags.TBAA, AATags.Scope, AATags.NoAlias); } else { // Everything is emitted in default address space Type *Int8PtrTy = DestInt8PtrTy; @@ -1452,17 +1459,28 @@ bool LoopIdiomRecognize::processLoopStoreOfLoopLoad( Value *NumBytes = Expander.expandCodeFor(NumBytesS, IntIdxTy, Preheader->getTerminator()); + AAMDNodes AATags = TheLoad->getAAMetadata(); + AAMDNodes StoreAATags = TheStore->getAAMetadata(); + AATags = AATags.merge(StoreAATags); + if (auto CI = dyn_cast<ConstantInt>(NumBytes)) + AATags = AATags.extendTo(CI->getZExtValue()); + else + AATags = AATags.extendTo(-1); + CallInst *NewCall = nullptr; // Check whether to generate an unordered atomic memcpy: // If the load or store are atomic, then they must necessarily be unordered // by previous checks. if (!TheStore->isAtomic() && !TheLoad->isAtomic()) { if (UseMemMove) - NewCall = Builder.CreateMemMove(StoreBasePtr, StoreAlign, LoadBasePtr, - LoadAlign, NumBytes); + NewCall = Builder.CreateMemMove( + StoreBasePtr, StoreAlign, LoadBasePtr, LoadAlign, NumBytes, + /*isVolatile=*/false, AATags.TBAA, AATags.Scope, AATags.NoAlias); else - NewCall = Builder.CreateMemCpy(StoreBasePtr, StoreAlign, LoadBasePtr, - LoadAlign, NumBytes); + NewCall = + Builder.CreateMemCpy(StoreBasePtr, StoreAlign, LoadBasePtr, LoadAlign, + NumBytes, /*isVolatile=*/false, AATags.TBAA, + AATags.TBAAStruct, AATags.Scope, AATags.NoAlias); } else { // For now don't support unordered atomic memmove. if (UseMemMove) @@ -1486,7 +1504,8 @@ bool LoopIdiomRecognize::processLoopStoreOfLoopLoad( // have an alignment but non-atomic loads/stores may not. NewCall = Builder.CreateElementUnorderedAtomicMemCpy( StoreBasePtr, StoreAlign.getValue(), LoadBasePtr, LoadAlign.getValue(), - NumBytes, StoreSize); + NumBytes, StoreSize, AATags.TBAA, AATags.TBAAStruct, AATags.Scope, + AATags.NoAlias); } NewCall->setDebugLoc(TheStore->getDebugLoc()); diff --git a/llvm/lib/Transforms/Scalar/LoopSimplifyCFG.cpp b/llvm/lib/Transforms/Scalar/LoopSimplifyCFG.cpp index 728d63fe2847..d3fcba10c275 100644 --- a/llvm/lib/Transforms/Scalar/LoopSimplifyCFG.cpp +++ b/llvm/lib/Transforms/Scalar/LoopSimplifyCFG.cpp @@ -468,7 +468,7 @@ private: LI.removeBlock(BB); } - DetatchDeadBlocks(DeadLoopBlocks, &DTUpdates, /*KeepOneInputPHIs*/true); + detachDeadBlocks(DeadLoopBlocks, &DTUpdates, /*KeepOneInputPHIs*/true); DTU.applyUpdates(DTUpdates); DTUpdates.clear(); for (auto *BB : DeadLoopBlocks) diff --git a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp index 022d9c7abc8c..9beb2281cf0f 100644 --- a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp +++ b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp @@ -1281,7 +1281,7 @@ static LoopUnrollResult tryToUnrollLoop( << " iterations"; }); - if (peelLoop(L, PP.PeelCount, LI, &SE, &DT, &AC, PreserveLCSSA)) { + if (peelLoop(L, PP.PeelCount, LI, &SE, DT, &AC, PreserveLCSSA)) { simplifyLoopAfterUnroll(L, true, LI, &SE, &DT, &AC, &TTI); // If the loop was peeled, we already "used up" the profile information // we had, so we don't want to unroll or peel again. diff --git a/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp b/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp index 8f1d0181ee5b..296becb31e8f 100644 --- a/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp +++ b/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp @@ -1339,16 +1339,21 @@ public: // Copy load operand to new alloca. Builder.SetInsertPoint(Copy, Copy->begin()); - AllocaInst *NewLd = - Builder.CreateAlloca(Load->getType(), Load->getPointerAddressSpace()); - Builder.CreateMemCpy(NewLd, NewLd->getAlign(), - Load->getPointerOperand(), Load->getAlign(), - LoadLoc.Size.getValue()); + auto *VT = cast<FixedVectorType>(Load->getType()); + // Use an array type for the alloca, to avoid potentially huge alignment + // requirements for large vector types. + auto *ArrayTy = ArrayType::get(VT->getElementType(), VT->getNumElements()); + AllocaInst *Alloca = + Builder.CreateAlloca(ArrayTy, Load->getPointerAddressSpace()); + Value *BC = Builder.CreateBitCast(Alloca, VT->getPointerTo()); + + Builder.CreateMemCpy(BC, Alloca->getAlign(), Load->getPointerOperand(), + Load->getAlign(), LoadLoc.Size.getValue()); Builder.SetInsertPoint(Fusion, Fusion->begin()); PHINode *PHI = Builder.CreatePHI(Load->getPointerOperandType(), 3); PHI->addIncoming(Load->getPointerOperand(), Check0); PHI->addIncoming(Load->getPointerOperand(), Check1); - PHI->addIncoming(NewLd, Copy); + PHI->addIncoming(BC, Copy); // Adjust DT. DTUpdates.push_back({DT->Insert, Check0, Check1}); diff --git a/llvm/lib/Transforms/Scalar/NewGVN.cpp b/llvm/lib/Transforms/Scalar/NewGVN.cpp index 2476e6c408b1..f35c9212a6f9 100644 --- a/llvm/lib/Transforms/Scalar/NewGVN.cpp +++ b/llvm/lib/Transforms/Scalar/NewGVN.cpp @@ -77,6 +77,7 @@ #include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/Analysis/MemorySSA.h" #include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/Argument.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/Constant.h" @@ -1736,18 +1737,18 @@ NewGVN::performSymbolicPHIEvaluation(ArrayRef<ValPair> PHIOps, if (Filtered.empty()) { // If it has undef or poison at this point, it means there are no-non-undef // arguments, and thus, the value of the phi node must be undef. - if (HasPoison && !HasUndef) { - LLVM_DEBUG( - dbgs() << "PHI Node " << *I - << " has no non-poison arguments, valuing it as poison\n"); - return createConstantExpression(PoisonValue::get(I->getType())); - } if (HasUndef) { LLVM_DEBUG( dbgs() << "PHI Node " << *I << " has no non-undef arguments, valuing it as undef\n"); return createConstantExpression(UndefValue::get(I->getType())); } + if (HasPoison) { + LLVM_DEBUG( + dbgs() << "PHI Node " << *I + << " has no non-poison arguments, valuing it as poison\n"); + return createConstantExpression(PoisonValue::get(I->getType())); + } LLVM_DEBUG(dbgs() << "No arguments of PHI node " << *I << " are live\n"); deleteExpression(E); @@ -1757,6 +1758,11 @@ NewGVN::performSymbolicPHIEvaluation(ArrayRef<ValPair> PHIOps, ++Filtered.begin(); // Can't use std::equal here, sadly, because filter.begin moves. if (llvm::all_of(Filtered, [&](Value *Arg) { return Arg == AllSameValue; })) { + // Can't fold phi(undef, X) -> X unless X can't be poison (thus X is undef + // in the worst case). + if (HasUndef && !isGuaranteedNotToBePoison(AllSameValue, AC, nullptr, DT)) + return E; + // In LLVM's non-standard representation of phi nodes, it's possible to have // phi nodes with cycles (IE dependent on other phis that are .... dependent // on the original phi node), especially in weird CFG's where some arguments @@ -1764,8 +1770,8 @@ NewGVN::performSymbolicPHIEvaluation(ArrayRef<ValPair> PHIOps, // infinite loops during evaluation. We work around this by not trying to // really evaluate them independently, but instead using a variable // expression to say if one is equivalent to the other. - // We also special case undef, so that if we have an undef, we can't use the - // common value unless it dominates the phi block. + // We also special case undef/poison, so that if we have an undef, we can't + // use the common value unless it dominates the phi block. if (HasPoison || HasUndef) { // If we have undef and at least one other value, this is really a // multivalued phi, and we need to know if it's cycle free in order to @@ -2853,14 +2859,14 @@ NewGVN::makePossiblePHIOfOps(Instruction *I, } // The algorithm initially places the values of the routine in the TOP -// congruence class. The leader of TOP is the undetermined value `undef`. +// congruence class. The leader of TOP is the undetermined value `poison`. // When the algorithm has finished, values still in TOP are unreachable. void NewGVN::initializeCongruenceClasses(Function &F) { NextCongruenceNum = 0; // Note that even though we use the live on entry def as a representative // MemoryAccess, it is *not* the same as the actual live on entry def. We - // have no real equivalemnt to undef for MemoryAccesses, and so we really + // have no real equivalent to poison for MemoryAccesses, and so we really // should be checking whether the MemoryAccess is top if we want to know if it // is equivalent to everything. Otherwise, what this really signifies is that // the access "it reaches all the way back to the beginning of the function" @@ -3031,7 +3037,7 @@ void NewGVN::valueNumberMemoryPhi(MemoryPhi *MP) { !isMemoryAccessTOP(cast<MemoryAccess>(U)) && ReachableEdges.count({MP->getIncomingBlock(U), PHIBlock}); }); - // If all that is left is nothing, our memoryphi is undef. We keep it as + // If all that is left is nothing, our memoryphi is poison. We keep it as // InitialClass. Note: The only case this should happen is if we have at // least one self-argument. if (Filtered.begin() == Filtered.end()) { diff --git a/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp b/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp index 3da367341d2a..b795ad3899bc 100644 --- a/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp +++ b/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp @@ -258,6 +258,7 @@ struct GCPtrLivenessData { // base relation will remain. Internally, we add a mixture of the two // types, then update all the second type to the first type using DefiningValueMapTy = MapVector<Value *, Value *>; +using PointerToBaseTy = MapVector<Value *, Value *>; using StatepointLiveSetTy = SetVector<Value *>; using RematerializedValueMapTy = MapVector<AssertingVH<Instruction>, AssertingVH<Value>>; @@ -266,9 +267,6 @@ struct PartiallyConstructedSafepointRecord { /// The set of values known to be live across this safepoint StatepointLiveSetTy LiveSet; - /// Mapping from live pointers to a base-defining-value - MapVector<Value *, Value *> PointerToBase; - /// The *new* gc.statepoint instruction itself. This produces the token /// that normal path gc.relocates and the gc.result are tied to. GCStatepointInst *StatepointToken; @@ -1255,10 +1253,9 @@ static Value *findBasePointer(Value *I, DefiningValueMapTy &Cache) { // post condition: PointerToBase contains one (derived, base) pair for every // pointer in live. Note that derived can be equal to base if the original // pointer was a base pointer. -static void -findBasePointers(const StatepointLiveSetTy &live, - MapVector<Value *, Value *> &PointerToBase, - DominatorTree *DT, DefiningValueMapTy &DVCache) { +static void findBasePointers(const StatepointLiveSetTy &live, + PointerToBaseTy &PointerToBase, DominatorTree *DT, + DefiningValueMapTy &DVCache) { for (Value *ptr : live) { Value *base = findBasePointer(ptr, DVCache); assert(base && "failed to find base pointer"); @@ -1274,8 +1271,8 @@ findBasePointers(const StatepointLiveSetTy &live, /// parse point. static void findBasePointers(DominatorTree &DT, DefiningValueMapTy &DVCache, CallBase *Call, - PartiallyConstructedSafepointRecord &result) { - MapVector<Value *, Value *> PointerToBase; + PartiallyConstructedSafepointRecord &result, + PointerToBaseTy &PointerToBase) { StatepointLiveSetTy PotentiallyDerivedPointers = result.LiveSet; // We assume that all pointers passed to deopt are base pointers; as an // optimization, we can use this to avoid seperately materializing the base @@ -1290,37 +1287,27 @@ static void findBasePointers(DominatorTree &DT, DefiningValueMapTy &DVCache, PointerToBase[V] = V; } findBasePointers(PotentiallyDerivedPointers, PointerToBase, &DT, DVCache); - - if (PrintBasePointers) { - errs() << "Base Pairs (w/o Relocation):\n"; - for (auto &Pair : PointerToBase) { - errs() << " derived "; - Pair.first->printAsOperand(errs(), false); - errs() << " base "; - Pair.second->printAsOperand(errs(), false); - errs() << "\n";; - } - } - - result.PointerToBase = PointerToBase; } /// Given an updated version of the dataflow liveness results, update the /// liveset and base pointer maps for the call site CS. static void recomputeLiveInValues(GCPtrLivenessData &RevisedLivenessData, CallBase *Call, - PartiallyConstructedSafepointRecord &result); + PartiallyConstructedSafepointRecord &result, + PointerToBaseTy &PointerToBase); static void recomputeLiveInValues( Function &F, DominatorTree &DT, ArrayRef<CallBase *> toUpdate, - MutableArrayRef<struct PartiallyConstructedSafepointRecord> records) { + MutableArrayRef<struct PartiallyConstructedSafepointRecord> records, + PointerToBaseTy &PointerToBase) { // TODO-PERF: reuse the original liveness, then simply run the dataflow // again. The old values are still live and will help it stabilize quickly. GCPtrLivenessData RevisedLivenessData; computeLiveInValues(DT, F, RevisedLivenessData); for (size_t i = 0; i < records.size(); i++) { struct PartiallyConstructedSafepointRecord &info = records[i]; - recomputeLiveInValues(RevisedLivenessData, toUpdate[i], info); + recomputeLiveInValues(RevisedLivenessData, toUpdate[i], info, + PointerToBase); } } @@ -1537,7 +1524,8 @@ makeStatepointExplicitImpl(CallBase *Call, /* to replace */ const SmallVectorImpl<Value *> &BasePtrs, const SmallVectorImpl<Value *> &LiveVariables, PartiallyConstructedSafepointRecord &Result, - std::vector<DeferredReplacement> &Replacements) { + std::vector<DeferredReplacement> &Replacements, + const PointerToBaseTy &PointerToBase) { assert(BasePtrs.size() == LiveVariables.size()); // Then go ahead and use the builder do actually do the inserts. We insert @@ -1626,10 +1614,10 @@ makeStatepointExplicitImpl(CallBase *Call, /* to replace */ auto &Context = Call->getContext(); auto &DL = Call->getModule()->getDataLayout(); auto GetBaseAndOffset = [&](Value *Derived) { - assert(Result.PointerToBase.count(Derived)); + assert(PointerToBase.count(Derived)); unsigned AddressSpace = Derived->getType()->getPointerAddressSpace(); unsigned IntPtrSize = DL.getPointerSizeInBits(AddressSpace); - Value *Base = Result.PointerToBase.find(Derived)->second; + Value *Base = PointerToBase.find(Derived)->second; Value *Base_int = Builder.CreatePtrToInt( Base, Type::getIntNTy(Context, IntPtrSize)); Value *Derived_int = Builder.CreatePtrToInt( @@ -1819,9 +1807,9 @@ makeStatepointExplicitImpl(CallBase *Call, /* to replace */ static void makeStatepointExplicit(DominatorTree &DT, CallBase *Call, PartiallyConstructedSafepointRecord &Result, - std::vector<DeferredReplacement> &Replacements) { + std::vector<DeferredReplacement> &Replacements, + const PointerToBaseTy &PointerToBase) { const auto &LiveSet = Result.LiveSet; - const auto &PointerToBase = Result.PointerToBase; // Convert to vector for efficient cross referencing. SmallVector<Value *, 64> BaseVec, LiveVec; @@ -1836,7 +1824,8 @@ makeStatepointExplicit(DominatorTree &DT, CallBase *Call, assert(LiveVec.size() == BaseVec.size()); // Do the actual rewriting and delete the old statepoint - makeStatepointExplicitImpl(Call, BaseVec, LiveVec, Result, Replacements); + makeStatepointExplicitImpl(Call, BaseVec, LiveVec, Result, Replacements, + PointerToBase); } // Helper function for the relocationViaAlloca. @@ -2238,6 +2227,7 @@ static bool AreEquivalentPhiNodes(PHINode &OrigRootPhi, PHINode &AlternateRootPh // relocated values we don't do any user adjustments here. static void rematerializeLiveValues(CallBase *Call, PartiallyConstructedSafepointRecord &Info, + PointerToBaseTy &PointerToBase, TargetTransformInfo &TTI) { const unsigned int ChainLengthThreshold = 10; @@ -2248,7 +2238,7 @@ static void rematerializeLiveValues(CallBase *Call, for (Value *LiveValue: Info.LiveSet) { // For each live pointer find its defining chain SmallVector<Instruction *, 3> ChainToBase; - assert(Info.PointerToBase.count(LiveValue)); + assert(PointerToBase.count(LiveValue)); Value *RootOfChain = findRematerializableChainToBasePointer(ChainToBase, LiveValue); @@ -2260,9 +2250,9 @@ static void rematerializeLiveValues(CallBase *Call, // Handle the scenario where the RootOfChain is not equal to the // Base Value, but they are essentially the same phi values. - if (RootOfChain != Info.PointerToBase[LiveValue]) { + if (RootOfChain != PointerToBase[LiveValue]) { PHINode *OrigRootPhi = dyn_cast<PHINode>(RootOfChain); - PHINode *AlternateRootPhi = dyn_cast<PHINode>(Info.PointerToBase[LiveValue]); + PHINode *AlternateRootPhi = dyn_cast<PHINode>(PointerToBase[LiveValue]); if (!OrigRootPhi || !AlternateRootPhi) continue; // PHI nodes that have the same incoming values, and belonging to the same @@ -2362,7 +2352,7 @@ static void rematerializeLiveValues(CallBase *Call, Instruction *InsertBefore = Call->getNextNode(); assert(InsertBefore); Instruction *RematerializedValue = rematerializeChain( - InsertBefore, RootOfChain, Info.PointerToBase[LiveValue]); + InsertBefore, RootOfChain, PointerToBase[LiveValue]); Info.RematerializedValues[RematerializedValue] = LiveValue; } else { auto *Invoke = cast<InvokeInst>(Call); @@ -2373,9 +2363,9 @@ static void rematerializeLiveValues(CallBase *Call, &*Invoke->getUnwindDest()->getFirstInsertionPt(); Instruction *NormalRematerializedValue = rematerializeChain( - NormalInsertBefore, RootOfChain, Info.PointerToBase[LiveValue]); + NormalInsertBefore, RootOfChain, PointerToBase[LiveValue]); Instruction *UnwindRematerializedValue = rematerializeChain( - UnwindInsertBefore, RootOfChain, Info.PointerToBase[LiveValue]); + UnwindInsertBefore, RootOfChain, PointerToBase[LiveValue]); Info.RematerializedValues[NormalRematerializedValue] = LiveValue; Info.RematerializedValues[UnwindRematerializedValue] = LiveValue; @@ -2491,10 +2481,24 @@ static bool insertParsePoints(Function &F, DominatorTree &DT, // site. findLiveReferences(F, DT, ToUpdate, Records); + /// Global mapping from live pointers to a base-defining-value. + PointerToBaseTy PointerToBase; + // B) Find the base pointers for each live pointer for (size_t i = 0; i < Records.size(); i++) { PartiallyConstructedSafepointRecord &info = Records[i]; - findBasePointers(DT, DVCache, ToUpdate[i], info); + findBasePointers(DT, DVCache, ToUpdate[i], info, PointerToBase); + } + if (PrintBasePointers) { + errs() << "Base Pairs (w/o Relocation):\n"; + for (auto &Pair : PointerToBase) { + errs() << " derived "; + Pair.first->printAsOperand(errs(), false); + errs() << " base "; + Pair.second->printAsOperand(errs(), false); + errs() << "\n"; + ; + } } // The base phi insertion logic (for any safepoint) may have inserted new @@ -2515,8 +2519,10 @@ static bool insertParsePoints(Function &F, DominatorTree &DT, PartiallyConstructedSafepointRecord &Info = Records[i]; SmallVector<Value *, 128> Bases; - for (auto Pair : Info.PointerToBase) - Bases.push_back(Pair.second); + for (auto *Derived : Info.LiveSet) { + assert(PointerToBase.count(Derived) && "Missed base for derived pointer"); + Bases.push_back(PointerToBase[Derived]); + } insertUseHolderAfter(ToUpdate[i], Bases, Holders); } @@ -2524,18 +2530,16 @@ static bool insertParsePoints(Function &F, DominatorTree &DT, // By selecting base pointers, we've effectively inserted new uses. Thus, we // need to rerun liveness. We may *also* have inserted new defs, but that's // not the key issue. - recomputeLiveInValues(F, DT, ToUpdate, Records); + recomputeLiveInValues(F, DT, ToUpdate, Records, PointerToBase); if (PrintBasePointers) { - for (auto &Info : Records) { - errs() << "Base Pairs: (w/Relocation)\n"; - for (auto Pair : Info.PointerToBase) { - errs() << " derived "; - Pair.first->printAsOperand(errs(), false); - errs() << " base "; - Pair.second->printAsOperand(errs(), false); - errs() << "\n"; - } + errs() << "Base Pairs: (w/Relocation)\n"; + for (auto Pair : PointerToBase) { + errs() << " derived "; + Pair.first->printAsOperand(errs(), false); + errs() << " base "; + Pair.second->printAsOperand(errs(), false); + errs() << "\n"; } } @@ -2547,10 +2551,12 @@ static bool insertParsePoints(Function &F, DominatorTree &DT, // Note that the relocation placement code relies on this filtering for // correctness as it expects the base to be in the liveset, which isn't true // if the base is constant. - for (auto &Info : Records) - for (auto &BasePair : Info.PointerToBase) - if (isa<Constant>(BasePair.second)) - Info.LiveSet.remove(BasePair.first); + for (auto &Info : Records) { + Info.LiveSet.remove_if([&](Value *LiveV) { + assert(PointerToBase.count(LiveV) && "Missed base for derived pointer"); + return isa<Constant>(PointerToBase[LiveV]); + }); + } for (CallInst *CI : Holders) CI->eraseFromParent(); @@ -2561,7 +2567,7 @@ static bool insertParsePoints(Function &F, DominatorTree &DT, // some values instead of relocating them. This is purely an optimization and // does not influence correctness. for (size_t i = 0; i < Records.size(); i++) - rematerializeLiveValues(ToUpdate[i], Records[i], TTI); + rematerializeLiveValues(ToUpdate[i], Records[i], PointerToBase, TTI); // We need this to safely RAUW and delete call or invoke return values that // may themselves be live over a statepoint. For details, please see usage in @@ -2575,7 +2581,8 @@ static bool insertParsePoints(Function &F, DominatorTree &DT, // previous statepoint can not be a live variable, thus we can and remove // the old statepoint calls as we go.) for (size_t i = 0; i < Records.size(); i++) - makeStatepointExplicit(DT, ToUpdate[i], Records[i], Replacements); + makeStatepointExplicit(DT, ToUpdate[i], Records[i], Replacements, + PointerToBase); ToUpdate.clear(); // prevent accident use of invalid calls. @@ -2594,8 +2601,8 @@ static bool insertParsePoints(Function &F, DominatorTree &DT, // these live sets, and migrate to using that data structure from this point // onward. Info.LiveSet.clear(); - Info.PointerToBase.clear(); } + PointerToBase.clear(); // Do all the fixups of the original live variables to their relocated selves SmallVector<Value *, 128> Live; @@ -3115,35 +3122,15 @@ static void findLiveSetAtInst(Instruction *Inst, GCPtrLivenessData &Data, static void recomputeLiveInValues(GCPtrLivenessData &RevisedLivenessData, CallBase *Call, - PartiallyConstructedSafepointRecord &Info) { + PartiallyConstructedSafepointRecord &Info, + PointerToBaseTy &PointerToBase) { StatepointLiveSetTy Updated; findLiveSetAtInst(Call, RevisedLivenessData, Updated); // We may have base pointers which are now live that weren't before. We need // to update the PointerToBase structure to reflect this. for (auto V : Updated) - Info.PointerToBase.insert({V, V}); - -#ifndef NDEBUG - for (auto V : Updated) - assert(Info.PointerToBase.count(V) && - "Must be able to find base for live value!"); -#endif - - // Remove any stale base mappings - this can happen since our liveness is - // more precise then the one inherent in the base pointer analysis. - DenseSet<Value *> ToErase; - for (auto KVPair : Info.PointerToBase) - if (!Updated.count(KVPair.first)) - ToErase.insert(KVPair.first); - - for (auto *V : ToErase) - Info.PointerToBase.erase(V); - -#ifndef NDEBUG - for (auto KVPair : Info.PointerToBase) - assert(Updated.count(KVPair.first) && "record for non-live value"); -#endif + PointerToBase.insert({ V, V }); Info.LiveSet = Updated; } diff --git a/llvm/lib/Transforms/Scalar/SROA.cpp b/llvm/lib/Transforms/Scalar/SROA.cpp index 35497ae5ed9a..8be8946702be 100644 --- a/llvm/lib/Transforms/Scalar/SROA.cpp +++ b/llvm/lib/Transforms/Scalar/SROA.cpp @@ -48,6 +48,7 @@ #include "llvm/IR/Constants.h" #include "llvm/IR/DIBuilder.h" #include "llvm/IR/DataLayout.h" +#include "llvm/IR/DebugInfo.h" #include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Dominators.h" diff --git a/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp b/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp index ac580b4161f4..b3a445368537 100644 --- a/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp +++ b/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp @@ -276,6 +276,8 @@ class StructurizeCFG { void insertConditions(bool Loops); + void simplifyConditions(); + void delPhiValues(BasicBlock *From, BasicBlock *To); void addPhiValues(BasicBlock *From, BasicBlock *To); @@ -586,6 +588,28 @@ void StructurizeCFG::insertConditions(bool Loops) { } } +/// Simplify any inverted conditions that were built by buildConditions. +void StructurizeCFG::simplifyConditions() { + SmallVector<Instruction *> InstToErase; + for (auto &I : concat<PredMap::value_type>(Predicates, LoopPreds)) { + auto &Preds = I.second; + for (auto &J : Preds) { + auto &Cond = J.second; + Instruction *Inverted; + if (match(Cond, m_Not(m_OneUse(m_Instruction(Inverted)))) && + !Cond->use_empty()) { + if (auto *InvertedCmp = dyn_cast<CmpInst>(Inverted)) { + InvertedCmp->setPredicate(InvertedCmp->getInversePredicate()); + Cond->replaceAllUsesWith(InvertedCmp); + InstToErase.push_back(cast<Instruction>(Cond)); + } + } + } + } + for (auto *I : InstToErase) + I->eraseFromParent(); +} + /// Remove all PHI values coming from "From" into "To" and remember /// them in DeletedPhis void StructurizeCFG::delPhiValues(BasicBlock *From, BasicBlock *To) { @@ -1065,6 +1089,7 @@ bool StructurizeCFG::run(Region *R, DominatorTree *DT) { createFlow(); insertConditions(false); insertConditions(true); + simplifyConditions(); setPhiValues(); simplifyAffectedPhis(); rebuildSSA(); diff --git a/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp b/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp index d6d6b1a7fa09..15c4a64eb794 100644 --- a/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp +++ b/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp @@ -59,7 +59,7 @@ static cl::opt<unsigned> MaxDeoptOrUnreachableSuccessorCheckDepth( "is followed by a block that either has a terminating " "deoptimizing call or is terminated with an unreachable")); -void llvm::DetatchDeadBlocks( +void llvm::detachDeadBlocks( ArrayRef<BasicBlock *> BBs, SmallVectorImpl<DominatorTree::UpdateType> *Updates, bool KeepOneInputPHIs) { @@ -110,7 +110,7 @@ void llvm::DeleteDeadBlocks(ArrayRef <BasicBlock *> BBs, DomTreeUpdater *DTU, #endif SmallVector<DominatorTree::UpdateType, 4> Updates; - DetatchDeadBlocks(BBs, DTU ? &Updates : nullptr, KeepOneInputPHIs); + detachDeadBlocks(BBs, DTU ? &Updates : nullptr, KeepOneInputPHIs); if (DTU) DTU->applyUpdates(Updates); diff --git a/llvm/lib/Transforms/Utils/CloneFunction.cpp b/llvm/lib/Transforms/Utils/CloneFunction.cpp index 048e691e33cf..86413df664a0 100644 --- a/llvm/lib/Transforms/Utils/CloneFunction.cpp +++ b/llvm/lib/Transforms/Utils/CloneFunction.cpp @@ -694,38 +694,39 @@ void llvm::CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc, VMap[OrigV] = I; } + // Simplify conditional branches and switches with a constant operand. We try + // to prune these out when cloning, but if the simplification required + // looking through PHI nodes, those are only available after forming the full + // basic block. That may leave some here, and we still want to prune the dead + // code as early as possible. + Function::iterator Begin = cast<BasicBlock>(VMap[StartingBB])->getIterator(); + for (BasicBlock &BB : make_range(Begin, NewFunc->end())) + ConstantFoldTerminator(&BB); + + // Some blocks may have become unreachable as a result. Find and delete them. + { + SmallPtrSet<BasicBlock *, 16> ReachableBlocks; + SmallVector<BasicBlock *, 16> Worklist; + Worklist.push_back(&*Begin); + while (!Worklist.empty()) { + BasicBlock *BB = Worklist.pop_back_val(); + if (ReachableBlocks.insert(BB).second) + append_range(Worklist, successors(BB)); + } + + SmallVector<BasicBlock *, 16> UnreachableBlocks; + for (BasicBlock &BB : make_range(Begin, NewFunc->end())) + if (!ReachableBlocks.contains(&BB)) + UnreachableBlocks.push_back(&BB); + DeleteDeadBlocks(UnreachableBlocks); + } + // Now that the inlined function body has been fully constructed, go through // and zap unconditional fall-through branches. This happens all the time when // specializing code: code specialization turns conditional branches into // uncond branches, and this code folds them. - Function::iterator Begin = cast<BasicBlock>(VMap[StartingBB])->getIterator(); Function::iterator I = Begin; while (I != NewFunc->end()) { - // We need to simplify conditional branches and switches with a constant - // operand. We try to prune these out when cloning, but if the - // simplification required looking through PHI nodes, those are only - // available after forming the full basic block. That may leave some here, - // and we still want to prune the dead code as early as possible. - // - // Do the folding before we check if the block is dead since we want code - // like - // bb: - // br i1 undef, label %bb, label %bb - // to be simplified to - // bb: - // br label %bb - // before we call I->getSinglePredecessor(). - ConstantFoldTerminator(&*I); - - // Check if this block has become dead during inlining or other - // simplifications. Note that the first block will appear dead, as it has - // not yet been wired up properly. - if (I != Begin && (pred_empty(&*I) || I->getSinglePredecessor() == &*I)) { - BasicBlock *DeadBB = &*I++; - DeleteDeadBlock(DeadBB); - continue; - } - BranchInst *BI = dyn_cast<BranchInst>(I->getTerminator()); if (!BI || BI->isConditional()) { ++I; diff --git a/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/llvm/lib/Transforms/Utils/CodeExtractor.cpp index 24cd5747c5a4..cec159f6a448 100644 --- a/llvm/lib/Transforms/Utils/CodeExtractor.cpp +++ b/llvm/lib/Transforms/Utils/CodeExtractor.cpp @@ -33,6 +33,7 @@ #include "llvm/IR/Constants.h" #include "llvm/IR/DIBuilder.h" #include "llvm/IR/DataLayout.h" +#include "llvm/IR/DebugInfo.h" #include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Dominators.h" @@ -857,8 +858,8 @@ Function *CodeExtractor::constructFunction(const ValueSet &inputs, (ParamTy.size() + AggParamTy.size()) == (inputs.size() + outputs.size()) && "Number of scalar and aggregate params does not match inputs, outputs"); - assert(StructValues.empty() || - AggregateArgs && "Expeced StructValues only with AggregateArgs set"); + assert((StructValues.empty() || AggregateArgs) && + "Expeced StructValues only with AggregateArgs set"); // Concatenate scalar and aggregate params in ParamTy. size_t NumScalarParams = ParamTy.size(); diff --git a/llvm/lib/Transforms/Utils/GlobalStatus.cpp b/llvm/lib/Transforms/Utils/GlobalStatus.cpp index f8ec8c6ad426..c1c5f5cc879f 100644 --- a/llvm/lib/Transforms/Utils/GlobalStatus.cpp +++ b/llvm/lib/Transforms/Utils/GlobalStatus.cpp @@ -65,15 +65,18 @@ static bool analyzeGlobalAux(const Value *V, GlobalStatus &GS, for (const Use &U : V->uses()) { const User *UR = U.getUser(); - if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(UR)) { - // If the result of the constantexpr isn't pointer type, then we won't - // know to expect it in various places. Just reject early. - if (!isa<PointerType>(CE->getType())) - return true; - - // FIXME: Do we need to add constexpr selects to VisitedUsers? - if (analyzeGlobalAux(CE, GS, VisitedUsers)) - return true; + if (const Constant *C = dyn_cast<Constant>(UR)) { + const ConstantExpr *CE = dyn_cast<ConstantExpr>(C); + if (CE && isa<PointerType>(CE->getType())) { + // Recursively analyze pointer-typed constant expressions. + // FIXME: Do we need to add constexpr selects to VisitedUsers? + if (analyzeGlobalAux(CE, GS, VisitedUsers)) + return true; + } else { + // Ignore dead constant users. + if (!isSafeToDestroyConstant(C)) + return true; + } } else if (const Instruction *I = dyn_cast<Instruction>(UR)) { if (!GS.HasMultipleAccessingFunctions) { const Function *F = I->getParent()->getParent(); @@ -169,10 +172,6 @@ static bool analyzeGlobalAux(const Value *V, GlobalStatus &GS, } else { return true; // Any other non-load instruction might take address! } - } else if (const Constant *C = dyn_cast<Constant>(UR)) { - // We might have a dead and dangling constant hanging off of here. - if (!isSafeToDestroyConstant(C)) - return true; } else { // Otherwise must be some other user. return true; diff --git a/llvm/lib/Transforms/Utils/InlineFunction.cpp b/llvm/lib/Transforms/Utils/InlineFunction.cpp index c9f872f5b7e1..923bcc781e47 100644 --- a/llvm/lib/Transforms/Utils/InlineFunction.cpp +++ b/llvm/lib/Transforms/Utils/InlineFunction.cpp @@ -39,6 +39,7 @@ #include "llvm/IR/Constants.h" #include "llvm/IR/DIBuilder.h" #include "llvm/IR/DataLayout.h" +#include "llvm/IR/DebugInfo.h" #include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/DebugLoc.h" #include "llvm/IR/DerivedTypes.h" @@ -671,12 +672,9 @@ static void HandleInlinedEHPad(InvokeInst *II, BasicBlock *FirstNewBlock, // edge from this block. SmallVector<Value *, 8> UnwindDestPHIValues; BasicBlock *InvokeBB = II->getParent(); - for (Instruction &I : *UnwindDest) { + for (PHINode &PHI : UnwindDest->phis()) { // Save the value to use for this edge. - PHINode *PHI = dyn_cast<PHINode>(&I); - if (!PHI) - break; - UnwindDestPHIValues.push_back(PHI->getIncomingValueForBlock(InvokeBB)); + UnwindDestPHIValues.push_back(PHI.getIncomingValueForBlock(InvokeBB)); } // Add incoming-PHI values to the unwind destination block for the given basic diff --git a/llvm/lib/Transforms/Utils/Local.cpp b/llvm/lib/Transforms/Utils/Local.cpp index 9f33d2f82732..9a10535c9310 100644 --- a/llvm/lib/Transforms/Utils/Local.cpp +++ b/llvm/lib/Transforms/Utils/Local.cpp @@ -45,6 +45,7 @@ #include "llvm/IR/Constants.h" #include "llvm/IR/DIBuilder.h" #include "llvm/IR/DataLayout.h" +#include "llvm/IR/DebugInfo.h" #include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/DebugLoc.h" #include "llvm/IR/DerivedTypes.h" diff --git a/llvm/lib/Transforms/Utils/LoopPeel.cpp b/llvm/lib/Transforms/Utils/LoopPeel.cpp index 92333408aaef..5b66da1e7082 100644 --- a/llvm/lib/Transforms/Utils/LoopPeel.cpp +++ b/llvm/lib/Transforms/Utils/LoopPeel.cpp @@ -737,7 +737,7 @@ TargetTransformInfo::PeelingPreferences llvm::gatherPeelingPreferences( /// for the bulk of dynamic execution, can be further simplified by scalar /// optimizations. bool llvm::peelLoop(Loop *L, unsigned PeelCount, LoopInfo *LI, - ScalarEvolution *SE, DominatorTree *DT, AssumptionCache *AC, + ScalarEvolution *SE, DominatorTree &DT, AssumptionCache *AC, bool PreserveLCSSA) { assert(PeelCount > 0 && "Attempt to peel out zero iterations?"); assert(canPeel(L) && "Attempt to peel a loop which is not peelable?"); @@ -756,23 +756,21 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, LoopInfo *LI, // routes which can lead to the exit: we can reach it from the peeled // iterations too. DenseMap<BasicBlock *, BasicBlock *> NonLoopBlocksIDom; - if (DT) { - for (auto *BB : L->blocks()) { - auto *BBDomNode = DT->getNode(BB); - SmallVector<BasicBlock *, 16> ChildrenToUpdate; - for (auto *ChildDomNode : BBDomNode->children()) { - auto *ChildBB = ChildDomNode->getBlock(); - if (!L->contains(ChildBB)) - ChildrenToUpdate.push_back(ChildBB); - } - // The new idom of the block will be the nearest common dominator - // of all copies of the previous idom. This is equivalent to the - // nearest common dominator of the previous idom and the first latch, - // which dominates all copies of the previous idom. - BasicBlock *NewIDom = DT->findNearestCommonDominator(BB, Latch); - for (auto *ChildBB : ChildrenToUpdate) - NonLoopBlocksIDom[ChildBB] = NewIDom; + for (auto *BB : L->blocks()) { + auto *BBDomNode = DT.getNode(BB); + SmallVector<BasicBlock *, 16> ChildrenToUpdate; + for (auto *ChildDomNode : BBDomNode->children()) { + auto *ChildBB = ChildDomNode->getBlock(); + if (!L->contains(ChildBB)) + ChildrenToUpdate.push_back(ChildBB); } + // The new idom of the block will be the nearest common dominator + // of all copies of the previous idom. This is equivalent to the + // nearest common dominator of the previous idom and the first latch, + // which dominates all copies of the previous idom. + BasicBlock *NewIDom = DT.findNearestCommonDominator(BB, Latch); + for (auto *ChildBB : ChildrenToUpdate) + NonLoopBlocksIDom[ChildBB] = NewIDom; } Function *F = Header->getParent(); @@ -822,11 +820,11 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, LoopInfo *LI, // If (cond) goto Header // Exit: - BasicBlock *InsertTop = SplitEdge(PreHeader, Header, DT, LI); + BasicBlock *InsertTop = SplitEdge(PreHeader, Header, &DT, LI); BasicBlock *InsertBot = - SplitBlock(InsertTop, InsertTop->getTerminator(), DT, LI); + SplitBlock(InsertTop, InsertTop->getTerminator(), &DT, LI); BasicBlock *NewPreHeader = - SplitBlock(InsertBot, InsertBot->getTerminator(), DT, LI); + SplitBlock(InsertBot, InsertBot->getTerminator(), &DT, LI); InsertTop->setName(Header->getName() + ".peel.begin"); InsertBot->setName(Header->getName() + ".peel.next"); @@ -852,23 +850,21 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, LoopInfo *LI, ValueToValueMapTy VMap; cloneLoopBlocks(L, Iter, InsertTop, InsertBot, ExitEdges, NewBlocks, - LoopBlocks, VMap, LVMap, DT, LI, + LoopBlocks, VMap, LVMap, &DT, LI, LoopLocalNoAliasDeclScopes); // Remap to use values from the current iteration instead of the // previous one. remapInstructionsInBlocks(NewBlocks, VMap); - if (DT) { - // Update IDoms of the blocks reachable through exits. - if (Iter == 0) - for (auto BBIDom : NonLoopBlocksIDom) - DT->changeImmediateDominator(BBIDom.first, - cast<BasicBlock>(LVMap[BBIDom.second])); + // Update IDoms of the blocks reachable through exits. + if (Iter == 0) + for (auto BBIDom : NonLoopBlocksIDom) + DT.changeImmediateDominator(BBIDom.first, + cast<BasicBlock>(LVMap[BBIDom.second])); #ifdef EXPENSIVE_CHECKS - assert(DT->verify(DominatorTree::VerificationLevel::Fast)); + assert(DT.verify(DominatorTree::VerificationLevel::Fast)); #endif - } auto *LatchBRCopy = cast<BranchInst>(VMap[LatchBR]); updateBranchWeights(InsertBot, LatchBRCopy, ExitWeight, FallThroughWeight); @@ -877,7 +873,7 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, LoopInfo *LI, LatchBRCopy->setMetadata(LLVMContext::MD_loop, nullptr); InsertTop = InsertBot; - InsertBot = SplitBlock(InsertBot, InsertBot->getTerminator(), DT, LI); + InsertBot = SplitBlock(InsertBot, InsertBot->getTerminator(), &DT, LI); InsertBot->setName(Header->getName() + ".peel.next"); F->getBasicBlockList().splice(InsertTop->getIterator(), @@ -912,10 +908,10 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, LoopInfo *LI, SE->forgetTopmostLoop(L); // Finally DomtTree must be correct. - assert(DT->verify(DominatorTree::VerificationLevel::Fast)); + assert(DT.verify(DominatorTree::VerificationLevel::Fast)); // FIXME: Incrementally update loop-simplify - simplifyLoop(L, DT, LI, SE, AC, nullptr, PreserveLCSSA); + simplifyLoop(L, &DT, LI, SE, AC, nullptr, PreserveLCSSA); NumPeeled++; diff --git a/llvm/lib/Transforms/Utils/ModuleUtils.cpp b/llvm/lib/Transforms/Utils/ModuleUtils.cpp index 7c9ab7f6ca2c..d6a6be2762c7 100644 --- a/llvm/lib/Transforms/Utils/ModuleUtils.cpp +++ b/llvm/lib/Transforms/Utils/ModuleUtils.cpp @@ -264,3 +264,16 @@ void VFABI::setVectorVariantNames( CI->addFnAttr( Attribute::get(M->getContext(), MappingsAttrName, Buffer.str())); } + +void llvm::embedBufferInModule(Module &M, MemoryBufferRef Buf, + StringRef SectionName) { + // Embed the buffer into the module. + Constant *ModuleConstant = ConstantDataArray::get( + M.getContext(), makeArrayRef(Buf.getBufferStart(), Buf.getBufferSize())); + GlobalVariable *GV = new GlobalVariable( + M, ModuleConstant->getType(), true, GlobalValue::PrivateLinkage, + ModuleConstant, "llvm.embedded.object"); + GV->setSection(SectionName); + + appendToCompilerUsed(M, GV); +} diff --git a/llvm/lib/Transforms/Utils/NameAnonGlobals.cpp b/llvm/lib/Transforms/Utils/NameAnonGlobals.cpp index 7083789267d9..deaee467531d 100644 --- a/llvm/lib/Transforms/Utils/NameAnonGlobals.cpp +++ b/llvm/lib/Transforms/Utils/NameAnonGlobals.cpp @@ -15,6 +15,7 @@ #include "llvm/ADT/SmallString.h" #include "llvm/IR/Module.h" #include "llvm/InitializePasses.h" +#include "llvm/Pass.h" #include "llvm/Support/MD5.h" #include "llvm/Transforms/Utils/ModuleUtils.h" diff --git a/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp b/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp index b35ab57e0d87..01b433b4782a 100644 --- a/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp +++ b/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp @@ -25,13 +25,13 @@ #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/IteratedDominanceFrontier.h" -#include "llvm/Transforms/Utils/Local.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/CFG.h" #include "llvm/IR/Constant.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DIBuilder.h" +#include "llvm/IR/DebugInfo.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" @@ -45,6 +45,7 @@ #include "llvm/IR/Type.h" #include "llvm/IR/User.h" #include "llvm/Support/Casting.h" +#include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/PromoteMemToReg.h" #include <algorithm> #include <cassert> diff --git a/llvm/lib/Transforms/Utils/Utils.cpp b/llvm/lib/Transforms/Utils/Utils.cpp index 3ca36a1cad91..43eb5c87acee 100644 --- a/llvm/lib/Transforms/Utils/Utils.cpp +++ b/llvm/lib/Transforms/Utils/Utils.cpp @@ -16,6 +16,7 @@ #include "llvm-c/Transforms/Utils.h" #include "llvm/IR/LegacyPassManager.h" #include "llvm/InitializePasses.h" +#include "llvm/Pass.h" #include "llvm/PassRegistry.h" using namespace llvm; diff --git a/llvm/lib/Transforms/Utils/VNCoercion.cpp b/llvm/lib/Transforms/Utils/VNCoercion.cpp index bbe6b3dc23b3..637181722f63 100644 --- a/llvm/lib/Transforms/Utils/VNCoercion.cpp +++ b/llvm/lib/Transforms/Utils/VNCoercion.cpp @@ -2,6 +2,7 @@ #include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/IRBuilder.h" +#include "llvm/IR/IntrinsicInst.h" #include "llvm/Support/Debug.h" #define DEBUG_TYPE "vncoerce" diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index d11f4146b590..3290439ecd07 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -632,13 +632,6 @@ protected: Instruction *EntryVal, VPValue *Def, VPTransformState &State); - /// Returns true if an instruction \p I should be scalarized instead of - /// vectorized for the chosen vectorization factor. - bool shouldScalarizeInstruction(Instruction *I) const; - - /// Returns true if we should generate a scalar version of \p IV. - bool needsScalarInduction(Instruction *IV) const; - /// Returns (and creates if needed) the original loop trip count. Value *getOrCreateTripCount(Loop *NewLoop); @@ -2479,21 +2472,6 @@ void InnerLoopVectorizer::createVectorIntOrFpInductionPHI( VecInd->addIncoming(LastInduction, LoopVectorLatch); } -bool InnerLoopVectorizer::shouldScalarizeInstruction(Instruction *I) const { - return Cost->isScalarAfterVectorization(I, VF) || - Cost->isProfitableToScalarize(I, VF); -} - -bool InnerLoopVectorizer::needsScalarInduction(Instruction *IV) const { - if (shouldScalarizeInstruction(IV)) - return true; - auto isScalarInst = [&](User *U) -> bool { - auto *I = cast<Instruction>(U); - return (OrigLoop->contains(I) && shouldScalarizeInstruction(I)); - }; - return llvm::any_of(IV->users(), isScalarInst); -} - void InnerLoopVectorizer::widenIntOrFpInduction( PHINode *IV, VPWidenIntOrFpInductionRecipe *Def, VPTransformState &State, Value *CanonicalIV) { @@ -2549,27 +2527,6 @@ void InnerLoopVectorizer::widenIntOrFpInduction( return ScalarIV; }; - // Create the vector values from the scalar IV, in the absence of creating a - // vector IV. - auto CreateSplatIV = [&](Value *ScalarIV, Value *Step) { - Value *Broadcasted = getBroadcastInstrs(ScalarIV); - for (unsigned Part = 0; Part < UF; ++Part) { - Value *StartIdx; - if (Step->getType()->isFloatingPointTy()) - StartIdx = - getRuntimeVFAsFloat(Builder, Step->getType(), State.VF * Part); - else - StartIdx = getRuntimeVF(Builder, Step->getType(), State.VF * Part); - - Value *EntryPart = - getStepVector(Broadcasted, StartIdx, Step, ID.getInductionOpcode(), - State.VF, State.Builder); - State.set(Def, EntryPart, Part); - if (Trunc) - addMetadata(EntryPart, Trunc); - } - }; - // Fast-math-flags propagate from the original induction instruction. IRBuilder<>::FastMathFlagGuard FMFG(Builder); if (ID.getInductionBinOp() && isa<FPMathOperator>(ID.getInductionBinOp())) @@ -2605,36 +2562,18 @@ void InnerLoopVectorizer::widenIntOrFpInduction( return; } - // Determine if we want a scalar version of the induction variable. This is - // true if the induction variable itself is not widened, or if it has at - // least one user in the loop that is not widened. - auto NeedsScalarIV = needsScalarInduction(EntryVal); - if (!NeedsScalarIV) { + // Create a new independent vector induction variable, if one is needed. + if (Def->needsVectorIV()) createVectorIntOrFpInductionPHI(ID, Step, Start, EntryVal, Def, State); - return; - } - // Try to create a new independent vector induction variable. If we can't - // create the phi node, we will splat the scalar induction variable in each - // loop iteration. - if (!shouldScalarizeInstruction(EntryVal)) { - createVectorIntOrFpInductionPHI(ID, Step, Start, EntryVal, Def, State); - Value *ScalarIV = CreateScalarIV(Step); + if (Def->needsScalarIV()) { // Create scalar steps that can be used by instructions we will later // scalarize. Note that the addition of the scalar steps will not increase // the number of instructions in the loop in the common case prior to // InstCombine. We will be trading one vector extract for each scalar step. + Value *ScalarIV = CreateScalarIV(Step); buildScalarSteps(ScalarIV, Step, EntryVal, ID, Def, State); - return; } - - // All IV users are scalar instructions, so only emit a scalar IV, not a - // vectorised IV. Except when we tail-fold, then the splat IV feeds the - // predicate used by the masked loads/stores. - Value *ScalarIV = CreateScalarIV(Step); - if (!Cost->isScalarEpilogueAllowed()) - CreateSplatIV(ScalarIV, Step); - buildScalarSteps(ScalarIV, Step, EntryVal, ID, Def, State); } void InnerLoopVectorizer::buildScalarSteps(Value *ScalarIV, Value *Step, @@ -2663,17 +2602,15 @@ void InnerLoopVectorizer::buildScalarSteps(Value *ScalarIV, Value *Step, } // Determine the number of scalars we need to generate for each unroll - // iteration. If EntryVal is uniform, we only need to generate the first - // lane. Otherwise, we generate all VF values. - bool IsUniform = - Cost->isUniformAfterVectorization(cast<Instruction>(EntryVal), State.VF); - unsigned Lanes = IsUniform ? 1 : State.VF.getKnownMinValue(); + // iteration. + bool FirstLaneOnly = vputils::onlyFirstLaneUsed(Def); + unsigned Lanes = FirstLaneOnly ? 1 : State.VF.getKnownMinValue(); // Compute the scalar steps and save the results in State. Type *IntStepTy = IntegerType::get(ScalarIVTy->getContext(), ScalarIVTy->getScalarSizeInBits()); Type *VecIVTy = nullptr; Value *UnitStepVec = nullptr, *SplatStep = nullptr, *SplatIV = nullptr; - if (!IsUniform && State.VF.isScalable()) { + if (!FirstLaneOnly && State.VF.isScalable()) { VecIVTy = VectorType::get(ScalarIVTy, State.VF); UnitStepVec = Builder.CreateStepVector(VectorType::get(IntStepTy, State.VF)); @@ -2684,7 +2621,7 @@ void InnerLoopVectorizer::buildScalarSteps(Value *ScalarIV, Value *Step, for (unsigned Part = 0; Part < State.UF; ++Part) { Value *StartIdx0 = createStepForVF(Builder, IntStepTy, State.VF, Part); - if (!IsUniform && State.VF.isScalable()) { + if (!FirstLaneOnly && State.VF.isScalable()) { auto *SplatStartIdx = Builder.CreateVectorSplat(State.VF, StartIdx0); auto *InitVec = Builder.CreateAdd(SplatStartIdx, UnitStepVec); if (ScalarIVTy->isFloatingPointTy()) @@ -4565,7 +4502,7 @@ void InnerLoopVectorizer::widenPHIInstruction(Instruction *PN, // Determine the number of scalars we need to generate for each unroll // iteration. If the instruction is uniform, we only need to generate the // first lane. Otherwise, we generate all VF values. - bool IsUniform = Cost->isUniformAfterVectorization(P, State.VF); + bool IsUniform = vputils::onlyFirstLaneUsed(PhiR); assert((IsUniform || !State.VF.isScalable()) && "Cannot scalarize a scalable VF"); unsigned Lanes = IsUniform ? 1 : State.VF.getFixedValue(); @@ -5889,7 +5826,9 @@ bool LoopVectorizationCostModel::isEpilogueVectorizationProfitable( // consider interleaving beneficial (eg. MVE). if (TTI.getMaxInterleaveFactor(VF.getKnownMinValue()) <= 1) return false; - if (VF.getFixedValue() >= EpilogueVectorizationMinVF) + // FIXME: We should consider changing the threshold for scalable + // vectors to take VScaleForTuning into account. + if (VF.getKnownMinValue() >= EpilogueVectorizationMinVF) return true; return false; } @@ -5940,29 +5879,21 @@ LoopVectorizationCostModel::selectEpilogueVectorizationFactor( return Result; } - auto FixedMainLoopVF = ElementCount::getFixed(MainLoopVF.getKnownMinValue()); - if (MainLoopVF.isScalable()) - LLVM_DEBUG( - dbgs() << "LEV: Epilogue vectorization using scalable vectors not " - "yet supported. Converting to fixed-width (VF=" - << FixedMainLoopVF << ") instead\n"); - - if (!isEpilogueVectorizationProfitable(FixedMainLoopVF)) { + if (!isEpilogueVectorizationProfitable(MainLoopVF)) { LLVM_DEBUG(dbgs() << "LEV: Epilogue vectorization is not profitable for " "this loop\n"); return Result; } for (auto &NextVF : ProfitableVFs) - if (ElementCount::isKnownLT(NextVF.Width, FixedMainLoopVF) && - (Result.Width.getFixedValue() == 1 || - isMoreProfitable(NextVF, Result)) && + if (ElementCount::isKnownLT(NextVF.Width, MainLoopVF) && + (Result.Width.isScalar() || isMoreProfitable(NextVF, Result)) && LVP.hasPlanWithVF(NextVF.Width)) Result = NextVF; if (Result != VectorizationFactor::Disabled()) LLVM_DEBUG(dbgs() << "LEV: Vectorizing epilogue loop with VF = " - << Result.Width.getFixedValue() << "\n";); + << Result.Width << "\n";); return Result; } @@ -8546,16 +8477,54 @@ VPRecipeBase *VPRecipeBuilder::tryToWidenMemory(Instruction *I, Mask, Consecutive, Reverse); } -VPWidenIntOrFpInductionRecipe * -VPRecipeBuilder::tryToOptimizeInductionPHI(PHINode *Phi, - ArrayRef<VPValue *> Operands) const { +static VPWidenIntOrFpInductionRecipe * +createWidenInductionRecipe(PHINode *Phi, Instruction *PhiOrTrunc, + VPValue *Start, const InductionDescriptor &IndDesc, + LoopVectorizationCostModel &CM, Loop &OrigLoop, + VFRange &Range) { + // Returns true if an instruction \p I should be scalarized instead of + // vectorized for the chosen vectorization factor. + auto ShouldScalarizeInstruction = [&CM](Instruction *I, ElementCount VF) { + return CM.isScalarAfterVectorization(I, VF) || + CM.isProfitableToScalarize(I, VF); + }; + + bool NeedsScalarIV = LoopVectorizationPlanner::getDecisionAndClampRange( + [&](ElementCount VF) { + // Returns true if we should generate a scalar version of \p IV. + if (ShouldScalarizeInstruction(PhiOrTrunc, VF)) + return true; + auto isScalarInst = [&](User *U) -> bool { + auto *I = cast<Instruction>(U); + return OrigLoop.contains(I) && ShouldScalarizeInstruction(I, VF); + }; + return any_of(PhiOrTrunc->users(), isScalarInst); + }, + Range); + bool NeedsScalarIVOnly = LoopVectorizationPlanner::getDecisionAndClampRange( + [&](ElementCount VF) { + return ShouldScalarizeInstruction(PhiOrTrunc, VF); + }, + Range); + assert(IndDesc.getStartValue() == + Phi->getIncomingValueForBlock(OrigLoop.getLoopPreheader())); + if (auto *TruncI = dyn_cast<TruncInst>(PhiOrTrunc)) { + return new VPWidenIntOrFpInductionRecipe(Phi, Start, IndDesc, TruncI, + NeedsScalarIV, !NeedsScalarIVOnly); + } + assert(isa<PHINode>(PhiOrTrunc) && "must be a phi node here"); + return new VPWidenIntOrFpInductionRecipe(Phi, Start, IndDesc, NeedsScalarIV, + !NeedsScalarIVOnly); +} + +VPWidenIntOrFpInductionRecipe *VPRecipeBuilder::tryToOptimizeInductionPHI( + PHINode *Phi, ArrayRef<VPValue *> Operands, VFRange &Range) const { + // Check if this is an integer or fp induction. If so, build the recipe that // produces its scalar and vector values. - if (auto *II = Legal->getIntOrFpInductionDescriptor(Phi)) { - assert(II->getStartValue() == - Phi->getIncomingValueForBlock(OrigLoop->getLoopPreheader())); - return new VPWidenIntOrFpInductionRecipe(Phi, Operands[0], *II); - } + if (auto *II = Legal->getIntOrFpInductionDescriptor(Phi)) + return createWidenInductionRecipe(Phi, Phi, Operands[0], *II, CM, *OrigLoop, + Range); return nullptr; } @@ -8583,7 +8552,7 @@ VPWidenIntOrFpInductionRecipe *VPRecipeBuilder::tryToOptimizeInductionTruncate( auto *Phi = cast<PHINode>(I->getOperand(0)); const InductionDescriptor &II = *Legal->getIntOrFpInductionDescriptor(Phi); VPValue *Start = Plan.getOrAddVPValue(II.getStartValue()); - return new VPWidenIntOrFpInductionRecipe(Phi, Start, II, I); + return createWidenInductionRecipe(Phi, I, Start, II, CM, *OrigLoop, Range); } return nullptr; } @@ -8865,7 +8834,7 @@ VPRecipeBuilder::tryToCreateWidenRecipe(Instruction *Instr, if (auto Phi = dyn_cast<PHINode>(Instr)) { if (Phi->getParent() != OrigLoop->getHeader()) return tryToBlend(Phi, Operands, Plan); - if ((Recipe = tryToOptimizeInductionPHI(Phi, Operands))) + if ((Recipe = tryToOptimizeInductionPHI(Phi, Operands, Range))) return toVPRecipeResult(Recipe); VPHeaderPHIRecipe *PhiRecipe = nullptr; diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 99c265fc5101..15b349f53fd9 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -471,17 +471,36 @@ static bool isValidForAlternation(unsigned Opcode) { return true; } +static InstructionsState getSameOpcode(ArrayRef<Value *> VL, + unsigned BaseIndex = 0); + +/// Checks if the provided operands of 2 cmp instructions are compatible, i.e. +/// compatible instructions or constants, or just some other regular values. +static bool areCompatibleCmpOps(Value *BaseOp0, Value *BaseOp1, Value *Op0, + Value *Op1) { + return (isConstant(BaseOp0) && isConstant(Op0)) || + (isConstant(BaseOp1) && isConstant(Op1)) || + (!isa<Instruction>(BaseOp0) && !isa<Instruction>(Op0) && + !isa<Instruction>(BaseOp1) && !isa<Instruction>(Op1)) || + getSameOpcode({BaseOp0, Op0}).getOpcode() || + getSameOpcode({BaseOp1, Op1}).getOpcode(); +} + /// \returns analysis of the Instructions in \p VL described in /// InstructionsState, the Opcode that we suppose the whole list /// could be vectorized even if its structure is diverse. static InstructionsState getSameOpcode(ArrayRef<Value *> VL, - unsigned BaseIndex = 0) { + unsigned BaseIndex) { // Make sure these are all Instructions. if (llvm::any_of(VL, [](Value *V) { return !isa<Instruction>(V); })) return InstructionsState(VL[BaseIndex], nullptr, nullptr); bool IsCastOp = isa<CastInst>(VL[BaseIndex]); bool IsBinOp = isa<BinaryOperator>(VL[BaseIndex]); + bool IsCmpOp = isa<CmpInst>(VL[BaseIndex]); + CmpInst::Predicate BasePred = + IsCmpOp ? cast<CmpInst>(VL[BaseIndex])->getPredicate() + : CmpInst::BAD_ICMP_PREDICATE; unsigned Opcode = cast<Instruction>(VL[BaseIndex])->getOpcode(); unsigned AltOpcode = Opcode; unsigned AltIndex = BaseIndex; @@ -514,6 +533,57 @@ static InstructionsState getSameOpcode(ArrayRef<Value *> VL, continue; } } + } else if (IsCmpOp && isa<CmpInst>(VL[Cnt])) { + auto *BaseInst = cast<Instruction>(VL[BaseIndex]); + auto *Inst = cast<Instruction>(VL[Cnt]); + Type *Ty0 = BaseInst->getOperand(0)->getType(); + Type *Ty1 = Inst->getOperand(0)->getType(); + if (Ty0 == Ty1) { + Value *BaseOp0 = BaseInst->getOperand(0); + Value *BaseOp1 = BaseInst->getOperand(1); + Value *Op0 = Inst->getOperand(0); + Value *Op1 = Inst->getOperand(1); + CmpInst::Predicate CurrentPred = + cast<CmpInst>(VL[Cnt])->getPredicate(); + CmpInst::Predicate SwappedCurrentPred = + CmpInst::getSwappedPredicate(CurrentPred); + // Check for compatible operands. If the corresponding operands are not + // compatible - need to perform alternate vectorization. + if (InstOpcode == Opcode) { + if (BasePred == CurrentPred && + areCompatibleCmpOps(BaseOp0, BaseOp1, Op0, Op1)) + continue; + if (BasePred == SwappedCurrentPred && + areCompatibleCmpOps(BaseOp0, BaseOp1, Op1, Op0)) + continue; + if (E == 2 && + (BasePred == CurrentPred || BasePred == SwappedCurrentPred)) + continue; + auto *AltInst = cast<CmpInst>(VL[AltIndex]); + CmpInst::Predicate AltPred = AltInst->getPredicate(); + Value *AltOp0 = AltInst->getOperand(0); + Value *AltOp1 = AltInst->getOperand(1); + // Check if operands are compatible with alternate operands. + if (AltPred == CurrentPred && + areCompatibleCmpOps(AltOp0, AltOp1, Op0, Op1)) + continue; + if (AltPred == SwappedCurrentPred && + areCompatibleCmpOps(AltOp0, AltOp1, Op1, Op0)) + continue; + } + if (BaseIndex == AltIndex) { + assert(isValidForAlternation(Opcode) && + isValidForAlternation(InstOpcode) && + "Cast isn't safe for alternation, logic needs to be updated!"); + AltIndex = Cnt; + continue; + } + auto *AltInst = cast<CmpInst>(VL[AltIndex]); + CmpInst::Predicate AltPred = AltInst->getPredicate(); + if (BasePred == CurrentPred || BasePred == SwappedCurrentPred || + AltPred == CurrentPred || AltPred == SwappedCurrentPred) + continue; + } } else if (InstOpcode == Opcode || InstOpcode == AltOpcode) continue; return InstructionsState(VL[BaseIndex], nullptr, nullptr); @@ -3307,9 +3377,14 @@ void BoUpSLP::reorderBottomToTop(bool IgnoreReorder) { MapVector<OrdersType, unsigned, DenseMap<OrdersType, unsigned, OrdersTypeDenseMapInfo>> OrdersUses; + // Do the analysis for each tree entry only once, otherwise the order of + // the same node my be considered several times, though might be not + // profitable. SmallPtrSet<const TreeEntry *, 4> VisitedOps; for (const auto &Op : Data.second) { TreeEntry *OpTE = Op.second; + if (!VisitedOps.insert(OpTE).second) + continue; if (!OpTE->ReuseShuffleIndices.empty() || (IgnoreReorder && OpTE == VectorizableTree.front().get())) continue; @@ -3333,9 +3408,8 @@ void BoUpSLP::reorderBottomToTop(bool IgnoreReorder) { } else { ++OrdersUses.insert(std::make_pair(Order, 0)).first->second; } - if (VisitedOps.insert(OpTE).second) - OrdersUses.insert(std::make_pair(OrdersType(), 0)).first->second += - OpTE->UserTreeIndices.size(); + OrdersUses.insert(std::make_pair(OrdersType(), 0)).first->second += + OpTE->UserTreeIndices.size(); assert(OrdersUses[{}] > 0 && "Counter cannot be less than 0."); --OrdersUses[{}]; } @@ -4350,9 +4424,41 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth, LLVM_DEBUG(dbgs() << "SLP: added a ShuffleVector op.\n"); // Reorder operands if reordering would enable vectorization. - if (isa<BinaryOperator>(VL0)) { + auto *CI = dyn_cast<CmpInst>(VL0); + if (isa<BinaryOperator>(VL0) || CI) { ValueList Left, Right; - reorderInputsAccordingToOpcode(VL, Left, Right, *DL, *SE, *this); + if (!CI || all_of(VL, [](Value *V) { + return cast<CmpInst>(V)->isCommutative(); + })) { + reorderInputsAccordingToOpcode(VL, Left, Right, *DL, *SE, *this); + } else { + CmpInst::Predicate P0 = CI->getPredicate(); + CmpInst::Predicate AltP0 = cast<CmpInst>(S.AltOp)->getPredicate(); + CmpInst::Predicate AltP0Swapped = CmpInst::getSwappedPredicate(AltP0); + Value *BaseOp0 = VL0->getOperand(0); + Value *BaseOp1 = VL0->getOperand(1); + // Collect operands - commute if it uses the swapped predicate or + // alternate operation. + for (Value *V : VL) { + auto *Cmp = cast<CmpInst>(V); + Value *LHS = Cmp->getOperand(0); + Value *RHS = Cmp->getOperand(1); + CmpInst::Predicate CurrentPred = CI->getPredicate(); + CmpInst::Predicate CurrentPredSwapped = + CmpInst::getSwappedPredicate(CurrentPred); + if (P0 == AltP0 || P0 == AltP0Swapped) { + if ((P0 == CurrentPred && + !areCompatibleCmpOps(BaseOp0, BaseOp1, LHS, RHS)) || + (P0 == CurrentPredSwapped && + !areCompatibleCmpOps(BaseOp0, BaseOp1, RHS, LHS))) + std::swap(LHS, RHS); + } else if (!areCompatibleCmpOps(BaseOp0, BaseOp1, LHS, RHS)) { + std::swap(LHS, RHS); + } + Left.push_back(LHS); + Right.push_back(RHS); + } + } TE->setOperand(0, Left); TE->setOperand(1, Right); buildTree_rec(Left, Depth + 1, {TE, 0}); @@ -5284,7 +5390,8 @@ InstructionCost BoUpSLP::getEntryCost(const TreeEntry *E, ((Instruction::isBinaryOp(E->getOpcode()) && Instruction::isBinaryOp(E->getAltOpcode())) || (Instruction::isCast(E->getOpcode()) && - Instruction::isCast(E->getAltOpcode()))) && + Instruction::isCast(E->getAltOpcode())) || + (isa<CmpInst>(VL0) && isa<CmpInst>(E->getAltOp()))) && "Invalid Shuffle Vector Operand"); InstructionCost ScalarCost = 0; if (NeedToShuffleReuses) { @@ -5332,6 +5439,14 @@ InstructionCost BoUpSLP::getEntryCost(const TreeEntry *E, VecCost = TTI->getArithmeticInstrCost(E->getOpcode(), VecTy, CostKind); VecCost += TTI->getArithmeticInstrCost(E->getAltOpcode(), VecTy, CostKind); + } else if (auto *CI0 = dyn_cast<CmpInst>(VL0)) { + VecCost = TTI->getCmpSelInstrCost(E->getOpcode(), ScalarTy, + Builder.getInt1Ty(), + CI0->getPredicate(), CostKind, VL0); + VecCost += TTI->getCmpSelInstrCost( + E->getOpcode(), ScalarTy, Builder.getInt1Ty(), + cast<CmpInst>(E->getAltOp())->getPredicate(), CostKind, + E->getAltOp()); } else { Type *Src0SclTy = E->getMainOp()->getOperand(0)->getType(); Type *Src1SclTy = E->getAltOp()->getOperand(0)->getType(); @@ -5348,6 +5463,29 @@ InstructionCost BoUpSLP::getEntryCost(const TreeEntry *E, E->Scalars, E->ReorderIndices, E->ReuseShuffleIndices, [E](Instruction *I) { assert(E->isOpcodeOrAlt(I) && "Unexpected main/alternate opcode"); + if (auto *CI0 = dyn_cast<CmpInst>(E->getMainOp())) { + auto *AltCI0 = cast<CmpInst>(E->getAltOp()); + auto *CI = cast<CmpInst>(I); + CmpInst::Predicate P0 = CI0->getPredicate(); + CmpInst::Predicate AltP0 = AltCI0->getPredicate(); + CmpInst::Predicate AltP0Swapped = + CmpInst::getSwappedPredicate(AltP0); + CmpInst::Predicate CurrentPred = CI->getPredicate(); + CmpInst::Predicate CurrentPredSwapped = + CmpInst::getSwappedPredicate(CurrentPred); + if (P0 == AltP0 || P0 == AltP0Swapped) { + // Alternate cmps have same/swapped predicate as main cmps but + // different order of compatible operands. + return !( + (P0 == CurrentPred && + areCompatibleCmpOps(CI0->getOperand(0), CI0->getOperand(1), + I->getOperand(0), I->getOperand(1))) || + (P0 == CurrentPredSwapped && + areCompatibleCmpOps(CI0->getOperand(0), CI0->getOperand(1), + I->getOperand(1), I->getOperand(0)))); + } + return CurrentPred != P0 && CurrentPredSwapped != P0; + } return I->getOpcode() == E->getAltOpcode(); }, Mask); @@ -6830,11 +6968,12 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) { ((Instruction::isBinaryOp(E->getOpcode()) && Instruction::isBinaryOp(E->getAltOpcode())) || (Instruction::isCast(E->getOpcode()) && - Instruction::isCast(E->getAltOpcode()))) && + Instruction::isCast(E->getAltOpcode())) || + (isa<CmpInst>(VL0) && isa<CmpInst>(E->getAltOp()))) && "Invalid Shuffle Vector Operand"); Value *LHS = nullptr, *RHS = nullptr; - if (Instruction::isBinaryOp(E->getOpcode())) { + if (Instruction::isBinaryOp(E->getOpcode()) || isa<CmpInst>(VL0)) { setInsertPointAfterBundle(E); LHS = vectorizeTree(E->getOperand(0)); RHS = vectorizeTree(E->getOperand(1)); @@ -6854,6 +6993,15 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) { static_cast<Instruction::BinaryOps>(E->getOpcode()), LHS, RHS); V1 = Builder.CreateBinOp( static_cast<Instruction::BinaryOps>(E->getAltOpcode()), LHS, RHS); + } else if (auto *CI0 = dyn_cast<CmpInst>(VL0)) { + V0 = Builder.CreateCmp(CI0->getPredicate(), LHS, RHS); + auto *AltCI = cast<CmpInst>(E->getAltOp()); + CmpInst::Predicate AltPred = AltCI->getPredicate(); + unsigned AltIdx = + std::distance(E->Scalars.begin(), find(E->Scalars, AltCI)); + if (AltCI->getOperand(0) != E->getOperand(0)[AltIdx]) + AltPred = CmpInst::getSwappedPredicate(AltPred); + V1 = Builder.CreateCmp(AltPred, LHS, RHS); } else { V0 = Builder.CreateCast( static_cast<Instruction::CastOps>(E->getOpcode()), LHS, VecTy); @@ -6878,6 +7026,29 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) { E->Scalars, E->ReorderIndices, E->ReuseShuffleIndices, [E](Instruction *I) { assert(E->isOpcodeOrAlt(I) && "Unexpected main/alternate opcode"); + if (auto *CI0 = dyn_cast<CmpInst>(E->getMainOp())) { + auto *AltCI0 = cast<CmpInst>(E->getAltOp()); + auto *CI = cast<CmpInst>(I); + CmpInst::Predicate P0 = CI0->getPredicate(); + CmpInst::Predicate AltP0 = AltCI0->getPredicate(); + CmpInst::Predicate AltP0Swapped = + CmpInst::getSwappedPredicate(AltP0); + CmpInst::Predicate CurrentPred = CI->getPredicate(); + CmpInst::Predicate CurrentPredSwapped = + CmpInst::getSwappedPredicate(CurrentPred); + if (P0 == AltP0 || P0 == AltP0Swapped) { + // Alternate cmps have same/swapped predicate as main cmps but + // different order of compatible operands. + return !( + (P0 == CurrentPred && + areCompatibleCmpOps(CI0->getOperand(0), CI0->getOperand(1), + I->getOperand(0), I->getOperand(1))) || + (P0 == CurrentPredSwapped && + areCompatibleCmpOps(CI0->getOperand(0), CI0->getOperand(1), + I->getOperand(1), I->getOperand(0)))); + } + return CurrentPred != P0 && CurrentPredSwapped != P0; + } return I->getOpcode() == E->getAltOpcode(); }, Mask, &OpScalars, &AltScalars); @@ -7676,11 +7847,8 @@ void BoUpSLP::scheduleBlock(BlockScheduling *BS) { for (ScheduleData *BundleMember = picked; BundleMember; BundleMember = BundleMember->NextInBundle) { Instruction *pickedInst = BundleMember->Inst; - if (pickedInst->getNextNode() != LastScheduledInst) { - BS->BB->getInstList().remove(pickedInst); - BS->BB->getInstList().insert(LastScheduledInst->getIterator(), - pickedInst); - } + if (pickedInst->getNextNode() != LastScheduledInst) + pickedInst->moveBefore(LastScheduledInst); LastScheduledInst = pickedInst; } @@ -8444,7 +8612,7 @@ bool SLPVectorizerPass::tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R, if (R.isTreeTinyAndNotFullyVectorizable()) continue; R.reorderTopToBottom(); - R.reorderBottomToTop(); + R.reorderBottomToTop(!isa<InsertElementInst>(Ops.front())); R.buildExternalUses(); R.computeMinimumValueSizes(); diff --git a/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h b/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h index e5dded3c0f1e..8822c0004eb2 100644 --- a/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h +++ b/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h @@ -75,7 +75,8 @@ class VPRecipeBuilder { /// Check if an induction recipe should be constructed for \I. If so build and /// return it. If not, return null. VPWidenIntOrFpInductionRecipe * - tryToOptimizeInductionPHI(PHINode *Phi, ArrayRef<VPValue *> Operands) const; + tryToOptimizeInductionPHI(PHINode *Phi, ArrayRef<VPValue *> Operands, + VFRange &Range) const; /// Optimize the special case where the operand of \p I is a constant integer /// induction variable. diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp index a96c122db2a9..342d4a074e10 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp @@ -1649,3 +1649,9 @@ void VPSlotTracker::assignSlots(const VPlan &Plan) { for (VPValue *Def : Recipe.definedValues()) assignSlot(Def); } + +bool vputils::onlyFirstLaneUsed(VPValue *Def) { + return all_of(Def->users(), [Def](VPUser *U) { + return cast<VPRecipeBase>(U)->onlyFirstLaneUsed(Def); + }); +} diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index 824440f98a8b..bcaabca692cc 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -759,6 +759,14 @@ public: bool mayReadOrWriteMemory() const { return mayReadFromMemory() || mayWriteToMemory(); } + + /// Returns true if the recipe only uses the first lane of operand \p Op. + /// Conservatively returns false. + virtual bool onlyFirstLaneUsed(const VPValue *Op) const { + assert(is_contained(operands(), Op) && + "Op must be an operand of the recipe"); + return false; + } }; inline bool VPUser::classof(const VPDef *Def) { @@ -893,6 +901,24 @@ public: /// Set the fast-math flags. void setFastMathFlags(FastMathFlags FMFNew); + + /// Returns true if the recipe only uses the first lane of operand \p Op. + bool onlyFirstLaneUsed(const VPValue *Op) const override { + assert(is_contained(operands(), Op) && + "Op must be an operand of the recipe"); + if (getOperand(0) != Op) + return false; + switch (getOpcode()) { + default: + return false; + case VPInstruction::ActiveLaneMask: + case VPInstruction::CanonicalIVIncrement: + case VPInstruction::CanonicalIVIncrementNUW: + case VPInstruction::BranchOnCount: + return true; + }; + llvm_unreachable("switch should return"); + } }; /// VPWidenRecipe is a recipe for producing a copy of vector type its @@ -1027,18 +1053,24 @@ public: class VPWidenIntOrFpInductionRecipe : public VPRecipeBase, public VPValue { PHINode *IV; const InductionDescriptor &IndDesc; + bool NeedsScalarIV; + bool NeedsVectorIV; public: VPWidenIntOrFpInductionRecipe(PHINode *IV, VPValue *Start, - const InductionDescriptor &IndDesc) + const InductionDescriptor &IndDesc, + bool NeedsScalarIV, bool NeedsVectorIV) : VPRecipeBase(VPWidenIntOrFpInductionSC, {Start}), VPValue(IV, this), - IV(IV), IndDesc(IndDesc) {} + IV(IV), IndDesc(IndDesc), NeedsScalarIV(NeedsScalarIV), + NeedsVectorIV(NeedsVectorIV) {} VPWidenIntOrFpInductionRecipe(PHINode *IV, VPValue *Start, const InductionDescriptor &IndDesc, - TruncInst *Trunc) + TruncInst *Trunc, bool NeedsScalarIV, + bool NeedsVectorIV) : VPRecipeBase(VPWidenIntOrFpInductionSC, {Start}), VPValue(Trunc, this), - IV(IV), IndDesc(IndDesc) {} + IV(IV), IndDesc(IndDesc), NeedsScalarIV(NeedsScalarIV), + NeedsVectorIV(NeedsVectorIV) {} ~VPWidenIntOrFpInductionRecipe() override = default; @@ -1082,6 +1114,12 @@ public: const TruncInst *TruncI = getTruncInst(); return TruncI ? TruncI->getType() : IV->getType(); } + + /// Returns true if a scalar phi needs to be created for the induction. + bool needsScalarIV() const { return NeedsScalarIV; } + + /// Returns true if a vector phi needs to be created for the induction. + bool needsVectorIV() const { return NeedsVectorIV; } }; /// A pure virtual base class for all recipes modeling header phis, including @@ -1318,6 +1356,17 @@ public: void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override; #endif + + /// Returns true if the recipe only uses the first lane of operand \p Op. + bool onlyFirstLaneUsed(const VPValue *Op) const override { + assert(is_contained(operands(), Op) && + "Op must be an operand of the recipe"); + // Recursing through Blend recipes only, must terminate at header phi's the + // latest. + return all_of(users(), [this](VPUser *U) { + return cast<VPRecipeBase>(U)->onlyFirstLaneUsed(this); + }); + } }; /// VPInterleaveRecipe is a recipe for transforming an interleave group of load @@ -1495,6 +1544,13 @@ public: bool isPacked() const { return AlsoPack; } bool isPredicated() const { return IsPredicated; } + + /// Returns true if the recipe only uses the first lane of operand \p Op. + bool onlyFirstLaneUsed(const VPValue *Op) const override { + assert(is_contained(operands(), Op) && + "Op must be an operand of the recipe"); + return isUniform(); + } }; /// A recipe for generating conditional branches on the bits of a mask. @@ -1651,6 +1707,16 @@ public: void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override; #endif + + /// Returns true if the recipe only uses the first lane of operand \p Op. + bool onlyFirstLaneUsed(const VPValue *Op) const override { + assert(is_contained(operands(), Op) && + "Op must be an operand of the recipe"); + + // Widened, consecutive memory operations only demand the first lane of + // their address. + return Op == getAddr() && isConsecutive(); + } }; /// Canonical scalar induction phi of the vector loop. Starting at the specified @@ -1686,6 +1752,13 @@ public: const Type *getScalarType() const { return getOperand(0)->getLiveInIRValue()->getType(); } + + /// Returns true if the recipe only uses the first lane of operand \p Op. + bool onlyFirstLaneUsed(const VPValue *Op) const override { + assert(is_contained(operands(), Op) && + "Op must be an operand of the recipe"); + return true; + } }; /// A Recipe for widening the canonical induction variable of the vector loop. @@ -2766,6 +2839,14 @@ public: /// Return true if all visited instruction can be combined. bool isCompletelySLP() const { return CompletelySLP; } }; + +namespace vputils { + +/// Returns true if only the first lane of \p Def is used. +bool onlyFirstLaneUsed(VPValue *Def); + +} // end namespace vputils + } // end namespace llvm #endif // LLVM_TRANSFORMS_VECTORIZE_VPLAN_H diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index fb5f3d428189..70ce773a8a85 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -47,7 +47,8 @@ void VPlanTransforms::VPInstructionsToVPRecipes( auto *Phi = cast<PHINode>(VPPhi->getUnderlyingValue()); if (const auto *II = GetIntOrFpInductionDescriptor(Phi)) { VPValue *Start = Plan->getOrAddVPValue(II->getStartValue()); - NewRecipe = new VPWidenIntOrFpInductionRecipe(Phi, Start, *II); + NewRecipe = + new VPWidenIntOrFpInductionRecipe(Phi, Start, *II, false, true); } else { Plan->addVPValue(Phi, VPPhi); continue; @@ -341,10 +342,16 @@ void VPlanTransforms::removeRedundantCanonicalIVs(VPlan &Plan) { for (VPRecipeBase &Phi : HeaderVPBB->phis()) { auto *WidenOriginalIV = dyn_cast<VPWidenIntOrFpInductionRecipe>(&Phi); - // If the induction recipe is canonical and the types match, use it - // directly. - if (WidenOriginalIV && WidenOriginalIV->isCanonical() && - WidenOriginalIV->getScalarType() == WidenNewIV->getScalarType()) { + if (!WidenOriginalIV || !WidenOriginalIV->isCanonical() || + WidenOriginalIV->getScalarType() != WidenNewIV->getScalarType()) + continue; + + // Replace WidenNewIV with WidenOriginalIV if WidenOriginalIV provides + // everything WidenNewIV's users need. That is, WidenOriginalIV will + // generate a vector phi or all users of WidenNewIV demand the first lane + // only. + if (WidenOriginalIV->needsVectorIV() || + vputils::onlyFirstLaneUsed(WidenNewIV)) { WidenNewIV->replaceAllUsesWith(WidenOriginalIV); WidenNewIV->eraseFromParent(); return; diff --git a/llvm/lib/Transforms/Vectorize/Vectorize.cpp b/llvm/lib/Transforms/Vectorize/Vectorize.cpp index 0296a995ad29..010ca28fc237 100644 --- a/llvm/lib/Transforms/Vectorize/Vectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/Vectorize.cpp @@ -18,6 +18,7 @@ #include "llvm/Analysis/Passes.h" #include "llvm/IR/LegacyPassManager.h" #include "llvm/InitializePasses.h" +#include "llvm/PassRegistry.h" using namespace llvm; diff --git a/llvm/tools/llvm-ar/llvm-ar.cpp b/llvm/tools/llvm-ar/llvm-ar.cpp index f7b29b884027..8842162f5216 100644 --- a/llvm/tools/llvm-ar/llvm-ar.cpp +++ b/llvm/tools/llvm-ar/llvm-ar.cpp @@ -90,6 +90,7 @@ OPTIONS: --rsp-quoting - quoting style for response files =posix - posix =windows - windows + --thin - create a thin archive --version - print the version and exit @<file> - read options from <file> @@ -118,7 +119,7 @@ MODIFIERS: [P] - use full names when matching (implied for thin archives) [s] - create an archive index (cf. ranlib) [S] - do not build a symbol table - [T] - create a thin archive + [T] - deprecated, use --thin instead [u] - update only [files] newer than archive contents [U] - use actual timestamps and uids/gids [v] - be verbose about actions taken @@ -390,8 +391,6 @@ static ArchiveOperation parseCommandLine() { break; case 'T': Thin = true; - // Thin archives store path names, so P should be forced. - CompareFullPath = true; break; case 'L': AddLibrary = true; @@ -407,6 +406,10 @@ static ArchiveOperation parseCommandLine() { } } + // Thin archives store path names, so P should be forced. + if (Thin) + CompareFullPath = true; + // At this point, the next thing on the command line must be // the archive name. getArchive(); @@ -965,6 +968,8 @@ static void createSymbolTable(object::Archive *OldArchive) { if (OldArchive->hasSymbolTable()) return; + if (OldArchive->isThin()) + Thin = true; performWriteOperation(CreateSymTab, OldArchive, nullptr, nullptr); } @@ -1202,6 +1207,11 @@ static int ar_main(int argc, char **argv) { continue; } + if (strcmp(*ArgIt, "--thin") == 0) { + Thin = true; + continue; + } + Match = matchFlagWithArg("format", ArgIt, Argv); if (Match) { FormatType = StringSwitch<Format>(Match) diff --git a/llvm/tools/llvm-as/llvm-as.cpp b/llvm/tools/llvm-as/llvm-as.cpp index 307a7f9b7999..11dad0d9c369 100644 --- a/llvm/tools/llvm-as/llvm-as.cpp +++ b/llvm/tools/llvm-as/llvm-as.cpp @@ -106,7 +106,7 @@ static void WriteOutputFile(const Module *M, const ModuleSummaryIndex *Index) { else // Otherwise, with an empty Module but non-empty Index, we write a // combined index. - WriteIndexToFile(*IndexToWrite, Out->os()); + writeIndexToFile(*IndexToWrite, Out->os()); } // Declare success. diff --git a/llvm/tools/llvm-extract/llvm-extract.cpp b/llvm/tools/llvm-extract/llvm-extract.cpp index cb1c4116ff19..3cdef529504e 100644 --- a/llvm/tools/llvm-extract/llvm-extract.cpp +++ b/llvm/tools/llvm-extract/llvm-extract.cpp @@ -21,6 +21,7 @@ #include "llvm/IR/LegacyPassManager.h" #include "llvm/IR/Module.h" #include "llvm/IRReader/IRReader.h" +#include "llvm/Pass.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Error.h" #include "llvm/Support/FileSystem.h" diff --git a/llvm/tools/llvm-lto/llvm-lto.cpp b/llvm/tools/llvm-lto/llvm-lto.cpp index d78c4dff7db4..8fc3a5d68500 100644 --- a/llvm/tools/llvm-lto/llvm-lto.cpp +++ b/llvm/tools/llvm-lto/llvm-lto.cpp @@ -497,7 +497,7 @@ static void createCombinedModuleSummaryIndex() { raw_fd_ostream OS(OutputFilename + ".thinlto.bc", EC, sys::fs::OpenFlags::OF_None); error(EC, "error opening the file '" + OutputFilename + ".thinlto.bc'"); - WriteIndexToFile(CombinedIndex, OS); + writeIndexToFile(CombinedIndex, OS); OS.close(); } @@ -660,7 +660,7 @@ private: std::error_code EC; raw_fd_ostream OS(OutputFilename, EC, sys::fs::OpenFlags::OF_None); error(EC, "error opening the file '" + OutputFilename + "'"); - WriteIndexToFile(*CombinedIndex, OS); + writeIndexToFile(*CombinedIndex, OS); } /// Load the combined index from disk, then compute and generate @@ -698,7 +698,7 @@ private: std::error_code EC; raw_fd_ostream OS(OutputName, EC, sys::fs::OpenFlags::OF_None); error(EC, "error opening the file '" + OutputName + "'"); - WriteIndexToFile(*Index, OS, &ModuleToSummariesForIndex); + writeIndexToFile(*Index, OS, &ModuleToSummariesForIndex); } } diff --git a/llvm/tools/llvm-objcopy/MachO/MachOLayoutBuilder.cpp b/llvm/tools/llvm-objcopy/MachO/MachOLayoutBuilder.cpp index 3cac77411845..6b731abd9ed9 100644 --- a/llvm/tools/llvm-objcopy/MachO/MachOLayoutBuilder.cpp +++ b/llvm/tools/llvm-objcopy/MachO/MachOLayoutBuilder.cpp @@ -417,6 +417,7 @@ Error MachOLayoutBuilder::layoutTail(uint64_t Offset) { case MachO::LC_SUB_UMBRELLA: case MachO::LC_SUB_CLIENT: case MachO::LC_SUB_LIBRARY: + case MachO::LC_LINKER_OPTION: // Nothing to update. break; default: diff --git a/llvm/tools/llvm-profdata/llvm-profdata.cpp b/llvm/tools/llvm-profdata/llvm-profdata.cpp index 5e58c1365d80..6000460d3c23 100644 --- a/llvm/tools/llvm-profdata/llvm-profdata.cpp +++ b/llvm/tools/llvm-profdata/llvm-profdata.cpp @@ -255,9 +255,7 @@ static void loadInput(const WeightedFile &Input, SymbolRemapper *Remapper, } auto Reader = std::move(ReaderOrErr.get()); - bool IsIRProfile = Reader->isIRLevelProfile(); - bool HasCSIRProfile = Reader->hasCSIRLevelProfile(); - if (Error E = WC->Writer.setIsIRLevelProfile(IsIRProfile, HasCSIRProfile)) { + if (Error E = WC->Writer.mergeProfileKind(Reader->getProfileKind())) { consumeError(std::move(E)); WC->Errors.emplace_back( make_error<StringError>( @@ -266,7 +264,6 @@ static void loadInput(const WeightedFile &Input, SymbolRemapper *Remapper, Filename); return; } - WC->Writer.setInstrEntryBBEnabled(Reader->instrEntryBBEnabled()); for (auto &I : *Reader) { if (Remapper) @@ -2095,7 +2092,8 @@ static int showInstrProfile(const std::string &Filename, bool ShowCounts, bool ShowAllFunctions, bool ShowCS, uint64_t ValueCutoff, bool OnlyListBelow, const std::string &ShowFunction, bool TextFormat, - bool ShowBinaryIds, raw_fd_ostream &OS) { + bool ShowBinaryIds, bool ShowCovered, + raw_fd_ostream &OS) { auto ReaderOrErr = InstrProfReader::create(Filename); std::vector<uint32_t> Cutoffs = std::move(DetailedSummaryCutoffs); if (ShowDetailedSummary && Cutoffs.empty()) { @@ -2152,6 +2150,13 @@ static int showInstrProfile(const std::string &Filename, bool ShowCounts, assert(Func.Counts.size() > 0 && "function missing entry counter"); Builder.addRecord(Func); + if (ShowCovered) { + if (std::any_of(Func.Counts.begin(), Func.Counts.end(), + [](uint64_t C) { return C; })) + OS << Func.Name << "\n"; + continue; + } + uint64_t FuncMax = 0; uint64_t FuncSum = 0; for (size_t I = 0, E = Func.Counts.size(); I < E; ++I) { @@ -2228,7 +2233,7 @@ static int showInstrProfile(const std::string &Filename, bool ShowCounts, if (Reader->hasError()) exitWithError(Reader->getError(), Filename); - if (TextFormat) + if (TextFormat || ShowCovered) return 0; std::unique_ptr<ProfileSummary> PS(Builder.getSummary()); bool IsIR = Reader->isIRLevelProfile(); @@ -2579,6 +2584,9 @@ static int show_main(int argc, const char *argv[]) { "debug-info", cl::init(""), cl::desc("Read and extract profile metadata from debug info and show " "the functions it found.")); + cl::opt<bool> ShowCovered( + "covered", cl::init(false), + cl::desc("Show only the functions that have been executed.")); cl::ParseCommandLineOptions(argc, argv, "LLVM profile data summary\n"); @@ -2610,7 +2618,7 @@ static int show_main(int argc, const char *argv[]) { Filename, ShowCounts, TopNFunctions, ShowIndirectCallTargets, ShowMemOPSizes, ShowDetailedSummary, DetailedSummaryCutoffs, ShowAllFunctions, ShowCS, ValueCutoff, OnlyListBelow, ShowFunction, - TextFormat, ShowBinaryIds, OS); + TextFormat, ShowBinaryIds, ShowCovered, OS); if (ProfileKind == sample) return showSampleProfile(Filename, ShowCounts, TopNFunctions, ShowAllFunctions, ShowDetailedSummary, diff --git a/llvm/tools/llvm-readobj/ELFDumper.cpp b/llvm/tools/llvm-readobj/ELFDumper.cpp index cfb618117d2b..04a67225401f 100644 --- a/llvm/tools/llvm-readobj/ELFDumper.cpp +++ b/llvm/tools/llvm-readobj/ELFDumper.cpp @@ -6393,6 +6393,7 @@ template <class ELFT> void LLVMELFDumper<ELFT>::printFileHeaders() { unsigned(ELF::EF_AMDGPU_MACH)); break; case ELF::ELFABIVERSION_AMDGPU_HSA_V4: + case ELF::ELFABIVERSION_AMDGPU_HSA_V5: W.printFlags("Flags", E.e_flags, makeArrayRef(ElfHeaderAMDGPUFlagsABIVersion4), unsigned(ELF::EF_AMDGPU_MACH), diff --git a/llvm/tools/llvm-readobj/WasmDumper.cpp b/llvm/tools/llvm-readobj/WasmDumper.cpp index d76332d1ba36..b4d726016437 100644 --- a/llvm/tools/llvm-readobj/WasmDumper.cpp +++ b/llvm/tools/llvm-readobj/WasmDumper.cpp @@ -183,7 +183,10 @@ void WasmDumper::printSectionHeaders() { W.printNumber("Offset", Seg.Offset.Value.Int32); else if (Seg.Offset.Opcode == wasm::WASM_OPCODE_I64_CONST) W.printNumber("Offset", Seg.Offset.Value.Int64); - else + else if (Seg.Offset.Opcode == wasm::WASM_OPCODE_GLOBAL_GET) { + ListScope Group(W, "Offset"); + W.printNumber("Global", Seg.Offset.Value.Global); + } else llvm_unreachable("unknown init expr opcode"); } break; diff --git a/llvm/tools/llvm-stress/llvm-stress.cpp b/llvm/tools/llvm-stress/llvm-stress.cpp index 941b529da9b2..9135d60fdf92 100644 --- a/llvm/tools/llvm-stress/llvm-stress.cpp +++ b/llvm/tools/llvm-stress/llvm-stress.cpp @@ -34,14 +34,15 @@ #include "llvm/IR/Type.h" #include "llvm/IR/Value.h" #include "llvm/IR/Verifier.h" +#include "llvm/Pass.h" #include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/FileSystem.h" #include "llvm/Support/InitLLVM.h" #include "llvm/Support/ToolOutputFile.h" -#include "llvm/Support/raw_ostream.h" #include "llvm/Support/WithColor.h" +#include "llvm/Support/raw_ostream.h" #include <algorithm> #include <cassert> #include <cstddef> diff --git a/llvm/utils/TableGen/CodeGenDAGPatterns.cpp b/llvm/utils/TableGen/CodeGenDAGPatterns.cpp index 4de619df5b5f..a1f8f4809d5f 100644 --- a/llvm/utils/TableGen/CodeGenDAGPatterns.cpp +++ b/llvm/utils/TableGen/CodeGenDAGPatterns.cpp @@ -4645,39 +4645,33 @@ static void GenerateVariantsOf(TreePatternNodePtr N, // If this node is commutative, consider the commuted order. bool isCommIntrinsic = N->isCommutativeIntrinsic(CDP); if (NodeInfo.hasProperty(SDNPCommutative) || isCommIntrinsic) { - assert((N->getNumChildren()>=2 || isCommIntrinsic) && + unsigned Skip = isCommIntrinsic ? 1 : 0; // First operand is intrinsic id. + assert(N->getNumChildren() >= (2 + Skip) && "Commutative but doesn't have 2 children!"); - // Don't count children which are actually register references. - unsigned NC = 0; - for (unsigned i = 0, e = N->getNumChildren(); i != e; ++i) { + // Don't allow commuting children which are actually register references. + bool NoRegisters = true; + unsigned i = 0 + Skip; + unsigned e = 2 + Skip; + for (; i != e; ++i) { TreePatternNode *Child = N->getChild(i); if (Child->isLeaf()) if (DefInit *DI = dyn_cast<DefInit>(Child->getLeafValue())) { Record *RR = DI->getDef(); if (RR->isSubClassOf("Register")) - continue; + NoRegisters = false; } - NC++; } // Consider the commuted order. - if (isCommIntrinsic) { - // Commutative intrinsic. First operand is the intrinsic id, 2nd and 3rd - // operands are the commutative operands, and there might be more operands - // after those. - assert(NC >= 3 && - "Commutative intrinsic should have at least 3 children!"); - std::vector<std::vector<TreePatternNodePtr>> Variants; - Variants.push_back(std::move(ChildVariants[0])); // Intrinsic id. - Variants.push_back(std::move(ChildVariants[2])); - Variants.push_back(std::move(ChildVariants[1])); - for (unsigned i = 3; i != NC; ++i) - Variants.push_back(std::move(ChildVariants[i])); - CombineChildVariants(N, Variants, OutVariants, CDP, DepVars); - } else if (NC == N->getNumChildren()) { + if (NoRegisters) { std::vector<std::vector<TreePatternNodePtr>> Variants; - Variants.push_back(std::move(ChildVariants[1])); - Variants.push_back(std::move(ChildVariants[0])); - for (unsigned i = 2; i != NC; ++i) + unsigned i = 0; + if (isCommIntrinsic) + Variants.push_back(std::move(ChildVariants[i++])); // Intrinsic id. + Variants.push_back(std::move(ChildVariants[i + 1])); + Variants.push_back(std::move(ChildVariants[i])); + i += 2; + // Remaining operands are not commuted. + for (; i != N->getNumChildren(); ++i) Variants.push_back(std::move(ChildVariants[i])); CombineChildVariants(N, Variants, OutVariants, CDP, DepVars); } diff --git a/llvm/utils/TableGen/CodeGenSchedule.cpp b/llvm/utils/TableGen/CodeGenSchedule.cpp index 7c1c37f7b370..e47bda725a17 100644 --- a/llvm/utils/TableGen/CodeGenSchedule.cpp +++ b/llvm/utils/TableGen/CodeGenSchedule.cpp @@ -521,6 +521,15 @@ void CodeGenSchedModels::collectProcModels() { RecVec ProcRecords = Records.getAllDerivedDefinitions("Processor"); llvm::sort(ProcRecords, LessRecordFieldName()); + // Check for duplicated names. + auto I = std::adjacent_find(ProcRecords.begin(), ProcRecords.end(), + [](const Record *Rec1, const Record *Rec2) { + return Rec1->getValueAsString("Name") == Rec2->getValueAsString("Name"); + }); + if (I != ProcRecords.end()) + PrintFatalError((*I)->getLoc(), "Duplicate processor name " + + (*I)->getValueAsString("Name")); + // Reserve space because we can. Reallocation would be ok. ProcModels.reserve(ProcRecords.size()+1); @@ -1973,7 +1982,6 @@ void CodeGenSchedModels::collectProcResources() { void CodeGenSchedModels::checkCompleteness() { bool Complete = true; - bool HadCompleteModel = false; for (const CodeGenProcModel &ProcModel : procModels()) { const bool HasItineraries = ProcModel.hasItineraries(); if (!ProcModel.ModelDef->getValueAsBit("CompleteModel")) @@ -1985,7 +1993,7 @@ void CodeGenSchedModels::checkCompleteness() { continue; unsigned SCIdx = getSchedClassIdx(*Inst); if (!SCIdx) { - if (Inst->TheDef->isValueUnset("SchedRW") && !HadCompleteModel) { + if (Inst->TheDef->isValueUnset("SchedRW")) { PrintError(Inst->TheDef->getLoc(), "No schedule information for instruction '" + Inst->TheDef->getName() + "' in SchedMachineModel '" + @@ -2013,7 +2021,6 @@ void CodeGenSchedModels::checkCompleteness() { Complete = false; } } - HadCompleteModel = true; } if (!Complete) { errs() << "\n\nIncomplete schedule models found.\n" diff --git a/llvm/utils/TableGen/CompressInstEmitter.cpp b/llvm/utils/TableGen/CompressInstEmitter.cpp index 94ad6ee285d4..1fd85939e74e 100644 --- a/llvm/utils/TableGen/CompressInstEmitter.cpp +++ b/llvm/utils/TableGen/CompressInstEmitter.cpp @@ -72,7 +72,6 @@ #include "CodeGenTarget.h" #include "llvm/ADT/IndexedMap.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringMap.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" diff --git a/llvm/utils/TableGen/GICombinerEmitter.cpp b/llvm/utils/TableGen/GICombinerEmitter.cpp index 63a9ed682d4f..0dea1ef00e4b 100644 --- a/llvm/utils/TableGen/GICombinerEmitter.cpp +++ b/llvm/utils/TableGen/GICombinerEmitter.cpp @@ -11,21 +11,21 @@ /// //===----------------------------------------------------------------------===// +#include "CodeGenTarget.h" +#include "GlobalISel/CodeExpander.h" +#include "GlobalISel/CodeExpansions.h" +#include "GlobalISel/GIMatchDag.h" +#include "GlobalISel/GIMatchDagPredicate.h" +#include "GlobalISel/GIMatchTree.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/StringSet.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ScopedPrinter.h" -#include "llvm/Support/Timer.h" #include "llvm/TableGen/Error.h" #include "llvm/TableGen/StringMatcher.h" #include "llvm/TableGen/TableGenBackend.h" -#include "CodeGenTarget.h" -#include "GlobalISel/CodeExpander.h" -#include "GlobalISel/CodeExpansions.h" -#include "GlobalISel/GIMatchDag.h" -#include "GlobalISel/GIMatchTree.h" #include <cstdint> using namespace llvm; diff --git a/llvm/utils/TableGen/GlobalISel/CodeExpander.cpp b/llvm/utils/TableGen/GlobalISel/CodeExpander.cpp index 3ebb293f466e..42b4aabf2755 100644 --- a/llvm/utils/TableGen/GlobalISel/CodeExpander.cpp +++ b/llvm/utils/TableGen/GlobalISel/CodeExpander.cpp @@ -12,7 +12,6 @@ #include "CodeExpander.h" #include "CodeExpansions.h" -#include "llvm/Support/CommandLine.h" #include "llvm/Support/raw_ostream.h" #include "llvm/TableGen/Error.h" diff --git a/llvm/utils/TableGen/GlobalISel/CodeExpander.h b/llvm/utils/TableGen/GlobalISel/CodeExpander.h index bd6946de5925..1291eb1ad940 100644 --- a/llvm/utils/TableGen/GlobalISel/CodeExpander.h +++ b/llvm/utils/TableGen/GlobalISel/CodeExpander.h @@ -15,10 +15,10 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/StringRef.h" -#include "llvm/Support/SMLoc.h" namespace llvm { class CodeExpansions; +class SMLoc; class raw_ostream; /// Emit the given code with all '${foo}' placeholders expanded to their diff --git a/llvm/utils/TableGen/GlobalISel/GIMatchDag.h b/llvm/utils/TableGen/GlobalISel/GIMatchDag.h index 37570648cad1..4c3c610aff74 100644 --- a/llvm/utils/TableGen/GlobalISel/GIMatchDag.h +++ b/llvm/utils/TableGen/GlobalISel/GIMatchDag.h @@ -16,7 +16,6 @@ #include "GIMatchDagPredicateDependencyEdge.h" namespace llvm { -class GIMatchDag; /// This class manages lifetimes for data associated with the GIMatchDag object. class GIMatchDagContext { diff --git a/llvm/utils/TableGen/GlobalISel/GIMatchDagEdge.cpp b/llvm/utils/TableGen/GlobalISel/GIMatchDagEdge.cpp index e59cb3aae49a..796479467df7 100644 --- a/llvm/utils/TableGen/GlobalISel/GIMatchDagEdge.cpp +++ b/llvm/utils/TableGen/GlobalISel/GIMatchDagEdge.cpp @@ -8,6 +8,7 @@ #include "GIMatchDagEdge.h" #include "GIMatchDagInstr.h" +#include "GIMatchDagOperands.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; diff --git a/llvm/utils/TableGen/GlobalISel/GIMatchDagInstr.h b/llvm/utils/TableGen/GlobalISel/GIMatchDagInstr.h index 0c39b50442b4..5e60448b30c1 100644 --- a/llvm/utils/TableGen/GlobalISel/GIMatchDagInstr.h +++ b/llvm/utils/TableGen/GlobalISel/GIMatchDagInstr.h @@ -9,11 +9,14 @@ #ifndef LLVM_UTILS_TABLEGEN_GIMATCHDAGINSTR_H #define LLVM_UTILS_TABLEGEN_GIMATCHDAGINSTR_H -#include "GIMatchDagOperands.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/raw_ostream.h" namespace llvm { +class CodeGenInstruction; class GIMatchDag; +class GIMatchDagOperandList; /// Represents an instruction in the match DAG. This object knows very little /// about the actual instruction to be matched as the bulk of that is in diff --git a/llvm/utils/TableGen/GlobalISel/GIMatchDagPredicate.cpp b/llvm/utils/TableGen/GlobalISel/GIMatchDagPredicate.cpp index 1aca2f9dc135..6a9e33ac515e 100644 --- a/llvm/utils/TableGen/GlobalISel/GIMatchDagPredicate.cpp +++ b/llvm/utils/TableGen/GlobalISel/GIMatchDagPredicate.cpp @@ -10,8 +10,8 @@ #include "llvm/TableGen/Record.h" -#include "GIMatchDagOperands.h" #include "../CodeGenInstruction.h" +#include "GIMatchDag.h" using namespace llvm; diff --git a/llvm/utils/TableGen/GlobalISel/GIMatchDagPredicate.h b/llvm/utils/TableGen/GlobalISel/GIMatchDagPredicate.h index 9b030d6edb13..08e541b76a5a 100644 --- a/llvm/utils/TableGen/GlobalISel/GIMatchDagPredicate.h +++ b/llvm/utils/TableGen/GlobalISel/GIMatchDagPredicate.h @@ -9,8 +9,12 @@ #ifndef LLVM_UTILS_TABLEGEN_GIMATCHDAGPREDICATE_H #define LLVM_UTILS_TABLEGEN_GIMATCHDAGPREDICATE_H +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" -#include "GIMatchDag.h" + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +#include "llvm/Support/raw_ostream.h" +#endif namespace llvm { class CodeExpansions; diff --git a/llvm/utils/TableGen/GlobalISel/GIMatchDagPredicateDependencyEdge.cpp b/llvm/utils/TableGen/GlobalISel/GIMatchDagPredicateDependencyEdge.cpp index 2e804de1cd4e..921cbaf9c408 100644 --- a/llvm/utils/TableGen/GlobalISel/GIMatchDagPredicateDependencyEdge.cpp +++ b/llvm/utils/TableGen/GlobalISel/GIMatchDagPredicateDependencyEdge.cpp @@ -9,6 +9,7 @@ #include "GIMatchDagPredicateDependencyEdge.h" #include "GIMatchDagInstr.h" +#include "GIMatchDagOperands.h" #include "GIMatchDagPredicate.h" #include "llvm/Support/raw_ostream.h" diff --git a/llvm/utils/TableGen/GlobalISel/GIMatchDagPredicateDependencyEdge.h b/llvm/utils/TableGen/GlobalISel/GIMatchDagPredicateDependencyEdge.h index 9552adc5c625..af91afc6073d 100644 --- a/llvm/utils/TableGen/GlobalISel/GIMatchDagPredicateDependencyEdge.h +++ b/llvm/utils/TableGen/GlobalISel/GIMatchDagPredicateDependencyEdge.h @@ -9,12 +9,14 @@ #ifndef LLVM_UTILS_TABLEGEN_GIMATCHDAGPREDICATEEDGE_H #define LLVM_UTILS_TABLEGEN_GIMATCHDAGPREDICATEEDGE_H -#include "GIMatchDagOperands.h" +#include "llvm/Support/Compiler.h" namespace llvm { -class GIMatchDag; class GIMatchDagInstr; class GIMatchDagPredicate; +class GIMatchDagOperand; + +class raw_ostream; /// Represents a dependency that must be met to evaluate a predicate. /// diff --git a/llvm/utils/TableGen/GlobalISel/GIMatchTree.cpp b/llvm/utils/TableGen/GlobalISel/GIMatchTree.cpp index 00d57404b069..42055ad4f608 100644 --- a/llvm/utils/TableGen/GlobalISel/GIMatchTree.cpp +++ b/llvm/utils/TableGen/GlobalISel/GIMatchTree.cpp @@ -7,6 +7,7 @@ //===----------------------------------------------------------------------===// #include "GIMatchTree.h" +#include "GIMatchDagPredicate.h" #include "../CodeGenInstruction.h" diff --git a/llvm/utils/TableGen/GlobalISelEmitter.cpp b/llvm/utils/TableGen/GlobalISelEmitter.cpp index 25bc0adc2a81..018aa7ee2f71 100644 --- a/llvm/utils/TableGen/GlobalISelEmitter.cpp +++ b/llvm/utils/TableGen/GlobalISelEmitter.cpp @@ -32,7 +32,6 @@ #include "CodeGenDAGPatterns.h" #include "SubtargetFeatureInfo.h" #include "llvm/ADT/Optional.h" -#include "llvm/ADT/SmallSet.h" #include "llvm/ADT/Statistic.h" #include "llvm/Support/CodeGenCoverage.h" #include "llvm/Support/CommandLine.h" @@ -668,7 +667,6 @@ MatchTable &operator<<(MatchTable &Table, const MatchTableRecord &Value) { class OperandMatcher; class MatchAction; class PredicateMatcher; -class RuleMatcher; class Matcher { public: diff --git a/llvm/utils/TableGen/InfoByHwMode.cpp b/llvm/utils/TableGen/InfoByHwMode.cpp index 3d236b828032..73c4fbf0a5eb 100644 --- a/llvm/utils/TableGen/InfoByHwMode.cpp +++ b/llvm/utils/TableGen/InfoByHwMode.cpp @@ -18,7 +18,6 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include <set> #include <string> using namespace llvm; diff --git a/llvm/utils/TableGen/InfoByHwMode.h b/llvm/utils/TableGen/InfoByHwMode.h index c97add687ca2..44927d0bf0df 100644 --- a/llvm/utils/TableGen/InfoByHwMode.h +++ b/llvm/utils/TableGen/InfoByHwMode.h @@ -20,11 +20,9 @@ #include <map> #include <string> -#include <vector> namespace llvm { -struct CodeGenHwModes; class Record; class raw_ostream; diff --git a/llvm/utils/TableGen/IntrinsicEmitter.cpp b/llvm/utils/TableGen/IntrinsicEmitter.cpp index f4e5eb59cb80..a5aa4069e60f 100644 --- a/llvm/utils/TableGen/IntrinsicEmitter.cpp +++ b/llvm/utils/TableGen/IntrinsicEmitter.cpp @@ -18,7 +18,6 @@ #include "llvm/Support/CommandLine.h" #include "llvm/TableGen/Error.h" #include "llvm/TableGen/Record.h" -#include "llvm/TableGen/StringMatcher.h" #include "llvm/TableGen/StringToOffsetTable.h" #include "llvm/TableGen/TableGenBackend.h" #include <algorithm> diff --git a/llvm/utils/TableGen/OptParserEmitter.cpp b/llvm/utils/TableGen/OptParserEmitter.cpp index 0809432dfd0d..d54132f3190b 100644 --- a/llvm/utils/TableGen/OptParserEmitter.cpp +++ b/llvm/utils/TableGen/OptParserEmitter.cpp @@ -13,7 +13,6 @@ #include "llvm/Support/raw_ostream.h" #include "llvm/TableGen/Record.h" #include "llvm/TableGen/TableGenBackend.h" -#include <cctype> #include <cstring> #include <map> #include <memory> diff --git a/llvm/utils/TableGen/OptRSTEmitter.cpp b/llvm/utils/TableGen/OptRSTEmitter.cpp index 5e44d033109a..11d896229f5b 100644 --- a/llvm/utils/TableGen/OptRSTEmitter.cpp +++ b/llvm/utils/TableGen/OptRSTEmitter.cpp @@ -8,15 +8,8 @@ #include "OptEmitter.h" #include "llvm/ADT/STLExtras.h" -#include "llvm/ADT/SmallString.h" #include "llvm/ADT/StringMap.h" -#include "llvm/ADT/Twine.h" -#include "llvm/TableGen/Error.h" #include "llvm/TableGen/Record.h" -#include "llvm/TableGen/TableGenBackend.h" -#include <cctype> -#include <cstring> -#include <map> using namespace llvm; diff --git a/llvm/utils/TableGen/PredicateExpander.h b/llvm/utils/TableGen/PredicateExpander.h index 9e7a4a3925ac..27f049a715aa 100644 --- a/llvm/utils/TableGen/PredicateExpander.h +++ b/llvm/utils/TableGen/PredicateExpander.h @@ -17,12 +17,12 @@ #define LLVM_UTILS_TABLEGEN_PREDICATEEXPANDER_H #include "llvm/ADT/StringRef.h" -#include "llvm/Support/raw_ostream.h" -#include "llvm/TableGen/Record.h" +#include <vector> namespace llvm { class raw_ostream; +class Record; class PredicateExpander { bool EmitCallsByRef; diff --git a/llvm/utils/TableGen/RegisterBankEmitter.cpp b/llvm/utils/TableGen/RegisterBankEmitter.cpp index 61f71309b6fb..d97d7acb87a7 100644 --- a/llvm/utils/TableGen/RegisterBankEmitter.cpp +++ b/llvm/utils/TableGen/RegisterBankEmitter.cpp @@ -17,7 +17,6 @@ #include "llvm/TableGen/Record.h" #include "llvm/TableGen/TableGenBackend.h" -#include "CodeGenHwModes.h" #include "CodeGenRegisters.h" #include "CodeGenTarget.h" diff --git a/llvm/utils/TableGen/SearchableTableEmitter.cpp b/llvm/utils/TableGen/SearchableTableEmitter.cpp index 327b90d59db6..dc5c96c662be 100644 --- a/llvm/utils/TableGen/SearchableTableEmitter.cpp +++ b/llvm/utils/TableGen/SearchableTableEmitter.cpp @@ -16,9 +16,6 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/StringExtras.h" -#include "llvm/Support/Format.h" -#include "llvm/Support/MemoryBuffer.h" -#include "llvm/Support/SourceMgr.h" #include "llvm/TableGen/Error.h" #include "llvm/TableGen/Record.h" #include <algorithm> @@ -32,8 +29,6 @@ using namespace llvm; namespace { -struct GenericTable; - int getAsInt(Init *B) { return cast<IntInit>(B->convertInitializerTo(IntRecTy::get()))->getValue(); } diff --git a/llvm/utils/TableGen/TableGen.cpp b/llvm/utils/TableGen/TableGen.cpp index 24c11c8bc831..2d4a45f889be 100644 --- a/llvm/utils/TableGen/TableGen.cpp +++ b/llvm/utils/TableGen/TableGen.cpp @@ -289,7 +289,8 @@ int main(int argc, char **argv) { #define __has_feature(x) 0 #endif -#if __has_feature(address_sanitizer) || defined(__SANITIZE_ADDRESS__) || \ +#if __has_feature(address_sanitizer) || \ + (defined(__SANITIZE_ADDRESS__) && defined(__GNUC__)) || \ __has_feature(leak_sanitizer) #include <sanitizer/lsan_interface.h> diff --git a/llvm/utils/TableGen/WebAssemblyDisassemblerEmitter.cpp b/llvm/utils/TableGen/WebAssemblyDisassemblerEmitter.cpp index 7518b262e6e9..74969053f095 100644 --- a/llvm/utils/TableGen/WebAssemblyDisassemblerEmitter.cpp +++ b/llvm/utils/TableGen/WebAssemblyDisassemblerEmitter.cpp @@ -14,6 +14,9 @@ //===----------------------------------------------------------------------===// #include "WebAssemblyDisassemblerEmitter.h" +#include "CodeGenInstruction.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/TableGen/Record.h" namespace llvm { diff --git a/llvm/utils/TableGen/WebAssemblyDisassemblerEmitter.h b/llvm/utils/TableGen/WebAssemblyDisassemblerEmitter.h index 60d3d9433eca..aba3a4bfd302 100644 --- a/llvm/utils/TableGen/WebAssemblyDisassemblerEmitter.h +++ b/llvm/utils/TableGen/WebAssemblyDisassemblerEmitter.h @@ -14,12 +14,13 @@ #ifndef LLVM_UTILS_TABLEGEN_WEBASSEMBLYDISASSEMBLEREMITTER_H #define LLVM_UTILS_TABLEGEN_WEBASSEMBLYDISASSEMBLEREMITTER_H -#include "CodeGenInstruction.h" #include "llvm/ADT/ArrayRef.h" -#include "llvm/Support/raw_ostream.h" namespace llvm { +class CodeGenInstruction; +class raw_ostream; + void emitWebAssemblyDisassemblerTables( raw_ostream &OS, const ArrayRef<const CodeGenInstruction *> &NumberedInstructions); diff --git a/llvm/utils/TableGen/X86DisassemblerTables.cpp b/llvm/utils/TableGen/X86DisassemblerTables.cpp index 90e71a354d17..81ddea99740d 100644 --- a/llvm/utils/TableGen/X86DisassemblerTables.cpp +++ b/llvm/utils/TableGen/X86DisassemblerTables.cpp @@ -15,9 +15,12 @@ #include "X86DisassemblerTables.h" #include "X86DisassemblerShared.h" -#include "llvm/ADT/STLExtras.h" +#include "X86ModRMFilters.h" +#include "llvm/ADT/STLArrayExtras.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/Format.h" +#include "llvm/Support/raw_ostream.h" #include <map> using namespace llvm; diff --git a/llvm/utils/TableGen/X86DisassemblerTables.h b/llvm/utils/TableGen/X86DisassemblerTables.h index 2e4ff1e2ce08..966f7406efec 100644 --- a/llvm/utils/TableGen/X86DisassemblerTables.h +++ b/llvm/utils/TableGen/X86DisassemblerTables.h @@ -17,15 +17,18 @@ #define LLVM_UTILS_TABLEGEN_X86DISASSEMBLERTABLES_H #include "X86DisassemblerShared.h" -#include "X86ModRMFilters.h" -#include "llvm/Support/raw_ostream.h" +#include "llvm/Support/X86DisassemblerDecoderCommon.h" #include <map> +#include <memory> #include <vector> namespace llvm { +class raw_ostream; namespace X86Disassembler { +class ModRMFilter; + /// DisassemblerTables - Encapsulates all the decode tables being generated by /// the table emitter. Contains functions to populate the tables as well as /// to emit them as hierarchical C structures suitable for consumption by the diff --git a/llvm/utils/TableGen/X86FoldTablesEmitter.cpp b/llvm/utils/TableGen/X86FoldTablesEmitter.cpp index 0a8d0750cf13..2a29331eb7e8 100644 --- a/llvm/utils/TableGen/X86FoldTablesEmitter.cpp +++ b/llvm/utils/TableGen/X86FoldTablesEmitter.cpp @@ -40,8 +40,6 @@ struct ManualMapEntry { : RegInstStr(RegInstStr), MemInstStr(MemInstStr), Strategy(Strategy) {} }; -class IsMatch; - // List of instructions requiring explicitly aligned memory. const char *ExplicitAlign[] = {"MOVDQA", "MOVAPS", "MOVAPD", "MOVNTPS", "MOVNTPD", "MOVNTDQ", "MOVNTDQA"}; diff --git a/llvm/utils/TableGen/X86RecognizableInstr.cpp b/llvm/utils/TableGen/X86RecognizableInstr.cpp index a9b384155965..4023d8f57318 100644 --- a/llvm/utils/TableGen/X86RecognizableInstr.cpp +++ b/llvm/utils/TableGen/X86RecognizableInstr.cpp @@ -15,8 +15,10 @@ #include "X86RecognizableInstr.h" #include "X86DisassemblerShared.h" +#include "X86DisassemblerTables.h" #include "X86ModRMFilters.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/TableGen/Record.h" #include <string> using namespace llvm; diff --git a/llvm/utils/TableGen/X86RecognizableInstr.h b/llvm/utils/TableGen/X86RecognizableInstr.h index d4fad2cc3f0f..8f557d9ee5f5 100644 --- a/llvm/utils/TableGen/X86RecognizableInstr.h +++ b/llvm/utils/TableGen/X86RecognizableInstr.h @@ -16,13 +16,16 @@ #ifndef LLVM_UTILS_TABLEGEN_X86RECOGNIZABLEINSTR_H #define LLVM_UTILS_TABLEGEN_X86RECOGNIZABLEINSTR_H -#include "CodeGenTarget.h" -#include "X86DisassemblerTables.h" +#include "CodeGenInstruction.h" #include "llvm/Support/DataTypes.h" -#include "llvm/TableGen/Record.h" +#include "llvm/Support/X86DisassemblerDecoderCommon.h" + +struct InstructionSpecifier; namespace llvm { +class Record; + #define X86_INSTR_MRM_MAPPING \ MAP(C0, 64) \ MAP(C1, 65) \ @@ -153,6 +156,8 @@ namespace X86Local { namespace X86Disassembler { +class DisassemblerTables; + /// RecognizableInstr - Encapsulates all information required to decode a single /// instruction, as extracted from the LLVM instruction tables. Has methods /// to interpret the information available in the LLVM tables, and to emit the diff --git a/openmp/runtime/src/dllexports b/openmp/runtime/src/dllexports index 6c0e86e3aab2..6e446658607a 100644 --- a/openmp/runtime/src/dllexports +++ b/openmp/runtime/src/dllexports @@ -553,6 +553,9 @@ kmp_set_disp_num_buffers 890 omp_realloc 777 omp_aligned_alloc 778 omp_aligned_calloc 806 + omp_get_interop_int 807 + omp_get_interop_ptr 808 + omp_get_interop_str 809 omp_null_allocator DATA omp_default_mem_alloc DATA diff --git a/openmp/runtime/src/kmp_ftn_entry.h b/openmp/runtime/src/kmp_ftn_entry.h index 0786ed3c119a..53802b7a9c3c 100644 --- a/openmp/runtime/src/kmp_ftn_entry.h +++ b/openmp/runtime/src/kmp_ftn_entry.h @@ -1446,6 +1446,120 @@ int FTN_STDCALL FTN_GET_TEAMS_THREAD_LIMIT(void) { #endif } +/// TODO: Include the `omp.h` of the current build +/* OpenMP 5.1 interop */ +typedef intptr_t omp_intptr_t; + +/* 0..omp_get_num_interop_properties()-1 are reserved for implementation-defined + * properties */ +typedef enum omp_interop_property { + omp_ipr_fr_id = -1, + omp_ipr_fr_name = -2, + omp_ipr_vendor = -3, + omp_ipr_vendor_name = -4, + omp_ipr_device_num = -5, + omp_ipr_platform = -6, + omp_ipr_device = -7, + omp_ipr_device_context = -8, + omp_ipr_targetsync = -9, + omp_ipr_first = -9 +} omp_interop_property_t; + +#define omp_interop_none 0 + +typedef enum omp_interop_rc { + omp_irc_no_value = 1, + omp_irc_success = 0, + omp_irc_empty = -1, + omp_irc_out_of_range = -2, + omp_irc_type_int = -3, + omp_irc_type_ptr = -4, + omp_irc_type_str = -5, + omp_irc_other = -6 +} omp_interop_rc_t; + +typedef enum omp_interop_fr { + omp_ifr_cuda = 1, + omp_ifr_cuda_driver = 2, + omp_ifr_opencl = 3, + omp_ifr_sycl = 4, + omp_ifr_hip = 5, + omp_ifr_level_zero = 6, + omp_ifr_last = 7 +} omp_interop_fr_t; + +typedef void *omp_interop_t; + +// libomptarget, if loaded, provides this function +int FTN_STDCALL FTN_GET_NUM_INTEROP_PROPERTIES(const omp_interop_t interop) { +#if KMP_MIC || KMP_OS_DARWIN || defined(KMP_STUB) + return 0; +#else + int (*fptr)(const omp_interop_t); + if ((*(void **)(&fptr) = KMP_DLSYM_NEXT("omp_get_num_interop_properties"))) + return (*fptr)(interop); + return 0; +#endif // KMP_MIC || KMP_OS_DARWIN || KMP_OS_WINDOWS || defined(KMP_STUB) +} + +/// TODO Convert FTN_GET_INTEROP_XXX functions into a macro like interop.cpp +// libomptarget, if loaded, provides this function +intptr_t FTN_STDCALL FTN_GET_INTEROP_INT(const omp_interop_t interop, + omp_interop_property_t property_id, + int *err) { + intptr_t (*fptr)(const omp_interop_t, omp_interop_property_t, int *); + if ((*(void **)(&fptr) = KMP_DLSYM_NEXT("omp_get_interop_int"))) + return (*fptr)(interop, property_id, err); + return 0; +} + +// libomptarget, if loaded, provides this function +void *FTN_STDCALL FTN_GET_INTEROP_PTR(const omp_interop_t interop, + omp_interop_property_t property_id, + int *err) { + void *(*fptr)(const omp_interop_t, omp_interop_property_t, int *); + if ((*(void **)(&fptr) = KMP_DLSYM_NEXT("omp_get_interop_ptr"))) + return (*fptr)(interop, property_id, err); + return nullptr; +} + +// libomptarget, if loaded, provides this function +const char *FTN_STDCALL FTN_GET_INTEROP_STR(const omp_interop_t interop, + omp_interop_property_t property_id, + int *err) { + const char *(*fptr)(const omp_interop_t, omp_interop_property_t, int *); + if ((*(void **)(&fptr) = KMP_DLSYM_NEXT("omp_get_interop_str"))) + return (*fptr)(interop, property_id, err); + return nullptr; +} + +// libomptarget, if loaded, provides this function +const char *FTN_STDCALL FTN_GET_INTEROP_NAME( + const omp_interop_t interop, omp_interop_property_t property_id) { + const char *(*fptr)(const omp_interop_t, omp_interop_property_t); + if ((*(void **)(&fptr) = KMP_DLSYM_NEXT("omp_get_interop_name"))) + return (*fptr)(interop, property_id); + return nullptr; +} + +// libomptarget, if loaded, provides this function +const char *FTN_STDCALL FTN_GET_INTEROP_TYPE_DESC( + const omp_interop_t interop, omp_interop_property_t property_id) { + const char *(*fptr)(const omp_interop_t, omp_interop_property_t); + if ((*(void **)(&fptr) = KMP_DLSYM_NEXT("omp_get_interop_type_desc"))) + return (*fptr)(interop, property_id); + return nullptr; +} + +// libomptarget, if loaded, provides this function +const char *FTN_STDCALL FTN_GET_INTEROP_RC_DESC( + const omp_interop_t interop, omp_interop_property_t property_id) { + const char *(*fptr)(const omp_interop_t, omp_interop_property_t); + if ((*(void **)(&fptr) = KMP_DLSYM_NEXT("omp_get_interop_rec_desc"))) + return (*fptr)(interop, property_id); + return nullptr; +} + // display environment variables when requested void FTN_STDCALL FTN_DISPLAY_ENV(int verbose) { #ifndef KMP_STUB diff --git a/openmp/runtime/src/kmp_ftn_os.h b/openmp/runtime/src/kmp_ftn_os.h index fc2bff595d7c..66e1e1ecd2e6 100644 --- a/openmp/runtime/src/kmp_ftn_os.h +++ b/openmp/runtime/src/kmp_ftn_os.h @@ -140,6 +140,14 @@ #define FTN_SET_TEAMS_THREAD_LIMIT omp_set_teams_thread_limit #define FTN_GET_TEAMS_THREAD_LIMIT omp_get_teams_thread_limit +#define FTN_GET_NUM_INTEROP_PROPERTIES omp_get_num_interop_properties +#define FTN_GET_INTEROP_INT omp_get_interop_int +#define FTN_GET_INTEROP_PTR omp_get_interop_ptr +#define FTN_GET_INTEROP_STR omp_get_interop_str +#define FTN_GET_INTEROP_NAME omp_get_interop_name +#define FTN_GET_INTEROP_TYPE_DESC omp_get_interop_type_desc +#define FTN_GET_INTEROP_RC_DESC omp_get_interop_rc_desc + #endif /* KMP_FTN_PLAIN */ /* ------------------------------------------------------------------------ */ @@ -268,6 +276,14 @@ #define FTN_SET_TEAMS_THREAD_LIMIT omp_set_teams_thread_limit_ #define FTN_GET_TEAMS_THREAD_LIMIT omp_get_teams_thread_limit_ +#define FTN_GET_NUM_INTEROP_PROPERTIES omp_get_num_interop_properties_ +#define FTN_GET_INTEROP_INT omp_get_interop_int_ +#define FTN_GET_INTEROP_PTR omp_get_interop_ptr_ +#define FTN_GET_INTEROP_STR omp_get_interop_str_ +#define FTN_GET_INTEROP_NAME omp_get_interop_name_ +#define FTN_GET_INTEROP_TYPE_DESC omp_get_interop_type_desc_ +#define FTN_GET_INTEROP_RC_DESC omp_get_interop_rc_desc_ + #endif /* KMP_FTN_APPEND */ /* ------------------------------------------------------------------------ */ @@ -394,6 +410,14 @@ #define FTN_SET_TEAMS_THREAD_LIMIT OMP_SET_TEAMS_THREAD_LIMIT #define FTN_GET_TEAMS_THREAD_LIMIT OMP_GET_TEAMS_THREAD_LIMIT +#define FTN_GET_NUM_INTEROP_PROPERTIES OMP_GET_NUM_INTEROP_PROPERTIES +#define FTN_GET_INTEROP_INT OMP_GET_INTEROP_INT +#define FTN_GET_INTEROP_PTR OMP_GET_INTEROP_PTR +#define FTN_GET_INTEROP_STR OMP_GET_INTEROP_STR +#define FTN_GET_INTEROP_NAME OMP_GET_INTEROP_NAME +#define FTN_GET_INTEROP_TYPE_DESC OMP_GET_INTEROP_TYPE_DESC +#define FTN_GET_INTEROP_RC_DESC OMP_GET_INTEROP_RC_DESC + #endif /* KMP_FTN_UPPER */ /* ------------------------------------------------------------------------ */ @@ -522,6 +546,14 @@ #define FTN_SET_TEAMS_THREAD_LIMIT OMP_SET_TEAMS_THREAD_LIMIT_ #define FTN_GET_TEAMS_THREAD_LIMIT OMP_GET_TEAMS_THREAD_LIMIT_ +#define FTN_GET_NUM_INTEROP_PROPERTIES OMP_GET_NUM_INTEROP_PROPERTIES_ +#define FTN_GET_INTEROP_INT OMP_GET_INTEROP_INT_ +#define FTN_GET_INTEROP_PTR OMP_GET_INTEROP_PTR_ +#define FTN_GET_INTEROP_STR OMP_GET_INTEROP_STR_ +#define FTN_GET_INTEROP_NAME OMP_GET_INTEROP_NAME_ +#define FTN_GET_INTEROP_TYPE_DESC OMP_GET_INTEROP_TYPE_DESC_ +#define FTN_GET_INTEROP_RC_DESC OMP_GET_INTEROP_RC_DESC_ + #endif /* KMP_FTN_UAPPEND */ /* -------------------------- GOMP API NAMES ------------------------ */ |