diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2022-07-24 15:03:44 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2022-07-24 15:03:44 +0000 |
commit | 4b4fe385e49bd883fd183b5f21c1ea486c722e61 (patch) | |
tree | c3d8fdb355c9c73e57723718c22103aaf7d15aa6 | |
parent | 1f917f69ff07f09b6dbb670971f57f8efe718b84 (diff) |
Vendor import of llvm-project main llvmorg-15-init-17485-ga3e38b4a206b.vendor/llvm-project/llvmorg-15-init-17485-ga3e38b4a206b
975 files changed, 23780 insertions, 10386 deletions
diff --git a/clang/include/clang/APINotes/Types.h b/clang/include/clang/APINotes/Types.h index 0e5b43080e4b..f155d6a06327 100644 --- a/clang/include/clang/APINotes/Types.h +++ b/clang/include/clang/APINotes/Types.h @@ -77,7 +77,7 @@ public: void setSwiftPrivate(llvm::Optional<bool> Private) { SwiftPrivateSpecified = Private.has_value(); - SwiftPrivate = Private ? *Private : 0; + SwiftPrivate = Private.value_or(0); } friend bool operator==(const CommonEntityInfo &, const CommonEntityInfo &); diff --git a/clang/include/clang/AST/ASTContext.h b/clang/include/clang/AST/ASTContext.h index 85eba45e4de6..9536b3faa02d 100644 --- a/clang/include/clang/AST/ASTContext.h +++ b/clang/include/clang/AST/ASTContext.h @@ -473,6 +473,9 @@ class ASTContext : public RefCountedBase<ASTContext> { }; llvm::DenseMap<Module*, PerModuleInitializers*> ModuleInitializers; + /// For module code-gen cases, this is the top-level module we are building. + Module *TopLevelModule = nullptr; + static constexpr unsigned ConstantArrayTypesLog2InitSize = 8; static constexpr unsigned GeneralTypesLog2InitSize = 9; static constexpr unsigned FunctionProtoTypesLog2InitSize = 12; @@ -1076,6 +1079,12 @@ public: /// Get the initializations to perform when importing a module, if any. ArrayRef<Decl*> getModuleInitializers(Module *M); + /// Set the (C++20) module we are building. + void setModuleForCodeGen(Module *M) { TopLevelModule = M; } + + /// Get module under construction, nullptr if this is not a C++20 module. + Module *getModuleForCodeGen() const { return TopLevelModule; } + TranslationUnitDecl *getTranslationUnitDecl() const { return TUDecl->getMostRecentDecl(); } diff --git a/clang/include/clang/AST/Decl.h b/clang/include/clang/AST/Decl.h index 66fab94b45b8..fb87a75a1241 100644 --- a/clang/include/clang/AST/Decl.h +++ b/clang/include/clang/AST/Decl.h @@ -1890,7 +1890,10 @@ public: TK_FunctionTemplateSpecialization, // A function template specialization that hasn't yet been resolved to a // particular specialized function template. - TK_DependentFunctionTemplateSpecialization + TK_DependentFunctionTemplateSpecialization, + // A non-template function which is in a dependent scope. + TK_DependentNonTemplate + }; /// Stashed information about a defaulted function definition whose body has @@ -1939,20 +1942,21 @@ private: /// The template or declaration that this declaration /// describes or was instantiated from, respectively. /// - /// For non-templates, this value will be NULL. For function - /// declarations that describe a function template, this will be a - /// pointer to a FunctionTemplateDecl. For member functions - /// of class template specializations, this will be a MemberSpecializationInfo + /// For non-templates this value will be NULL, unless this declaration was + /// declared directly inside of a function template, in which case it will + /// have a pointer to a FunctionDecl, stored in the NamedDecl. For function + /// declarations that describe a function template, this will be a pointer to + /// a FunctionTemplateDecl, stored in the NamedDecl. For member functions of + /// class template specializations, this will be a MemberSpecializationInfo /// pointer containing information about the specialization. /// For function template specializations, this will be a /// FunctionTemplateSpecializationInfo, which contains information about /// the template being specialized and the template arguments involved in /// that specialization. - llvm::PointerUnion<FunctionTemplateDecl *, - MemberSpecializationInfo *, + llvm::PointerUnion<NamedDecl *, MemberSpecializationInfo *, FunctionTemplateSpecializationInfo *, DependentFunctionTemplateSpecializationInfo *> - TemplateOrSpecialization; + TemplateOrSpecialization; /// Provides source/type location info for the declaration name embedded in /// the DeclaratorDecl base class. @@ -2695,6 +2699,13 @@ public: setInstantiationOfMemberFunction(getASTContext(), FD, TSK); } + /// Specify that this function declaration was instantiated from a + /// FunctionDecl FD. This is only used if this is a function declaration + /// declared locally inside of a function template. + void setInstantiatedFromDecl(FunctionDecl *FD); + + FunctionDecl *getInstantiatedFromDecl() const; + /// Retrieves the function template that is described by this /// function declaration. /// diff --git a/clang/include/clang/AST/DeclBase.h b/clang/include/clang/AST/DeclBase.h index 52fe8dd6b1e5..d1193161fd75 100644 --- a/clang/include/clang/AST/DeclBase.h +++ b/clang/include/clang/AST/DeclBase.h @@ -920,10 +920,12 @@ public: /// If this decl is defined inside a function/method/block it returns /// the corresponding DeclContext, otherwise it returns null. - const DeclContext *getParentFunctionOrMethod() const; - DeclContext *getParentFunctionOrMethod() { - return const_cast<DeclContext*>( - const_cast<const Decl*>(this)->getParentFunctionOrMethod()); + const DeclContext * + getParentFunctionOrMethod(bool LexicalParent = false) const; + DeclContext *getParentFunctionOrMethod(bool LexicalParent = false) { + return const_cast<DeclContext *>( + const_cast<const Decl *>(this)->getParentFunctionOrMethod( + LexicalParent)); } /// Retrieves the "canonical" declaration of the given declaration. diff --git a/clang/include/clang/ASTMatchers/ASTMatchers.h b/clang/include/clang/ASTMatchers/ASTMatchers.h index ae5502d7af71..9f4d807c232d 100644 --- a/clang/include/clang/ASTMatchers/ASTMatchers.h +++ b/clang/include/clang/ASTMatchers/ASTMatchers.h @@ -3838,8 +3838,9 @@ AST_MATCHER_P(CallExpr, callee, internal::Matcher<Stmt>, InnerMatcher.matches(*ExprNode, Finder, Builder)); } -/// Matches if the call expression's callee's declaration matches the -/// given matcher. +/// Matches 1) if the call expression's callee's declaration matches the +/// given matcher; or 2) if the Obj-C message expression's callee's method +/// declaration matches the given matcher. /// /// Example matches y.x() (matcher = callExpr(callee( /// cxxMethodDecl(hasName("x"))))) @@ -3847,9 +3848,31 @@ AST_MATCHER_P(CallExpr, callee, internal::Matcher<Stmt>, /// class Y { public: void x(); }; /// void z() { Y y; y.x(); } /// \endcode -AST_MATCHER_P_OVERLOAD(CallExpr, callee, internal::Matcher<Decl>, InnerMatcher, - 1) { - return callExpr(hasDeclaration(InnerMatcher)).matches(Node, Finder, Builder); +/// +/// Example 2. Matches [I foo] with +/// objcMessageExpr(callee(objcMethodDecl(hasName("foo")))) +/// +/// \code +/// @interface I: NSObject +/// +(void)foo; +/// @end +/// ... +/// [I foo] +/// \endcode +AST_POLYMORPHIC_MATCHER_P_OVERLOAD( + callee, AST_POLYMORPHIC_SUPPORTED_TYPES(ObjCMessageExpr, CallExpr), + internal::Matcher<Decl>, InnerMatcher, 1) { + if (const auto *CallNode = dyn_cast<CallExpr>(&Node)) + return callExpr(hasDeclaration(InnerMatcher)) + .matches(Node, Finder, Builder); + else { + // The dynamic cast below is guaranteed to succeed as there are only 2 + // supported return types. + const auto *MsgNode = cast<ObjCMessageExpr>(&Node); + const Decl *DeclNode = MsgNode->getMethodDecl(); + return (DeclNode != nullptr && + InnerMatcher.matches(*DeclNode, Finder, Builder)); + } } /// Matches if the expression's or declaration's type matches a type diff --git a/clang/include/clang/Analysis/Analyses/ThreadSafetyTIL.h b/clang/include/clang/Analysis/Analyses/ThreadSafetyTIL.h index 77a800c28754..65556c8d584c 100644 --- a/clang/include/clang/Analysis/Analyses/ThreadSafetyTIL.h +++ b/clang/include/clang/Analysis/Analyses/ThreadSafetyTIL.h @@ -75,7 +75,7 @@ namespace til { class BasicBlock; /// Enum for the different distinct classes of SExpr -enum TIL_Opcode { +enum TIL_Opcode : unsigned char { #define TIL_OPCODE_DEF(X) COP_##X, #include "ThreadSafetyOps.def" #undef TIL_OPCODE_DEF @@ -278,7 +278,7 @@ class SExpr { public: SExpr() = delete; - TIL_Opcode opcode() const { return static_cast<TIL_Opcode>(Opcode); } + TIL_Opcode opcode() const { return Opcode; } // Subclasses of SExpr must define the following: // @@ -321,7 +321,7 @@ protected: SExpr(TIL_Opcode Op) : Opcode(Op) {} SExpr(const SExpr &E) : Opcode(E.Opcode), Flags(E.Flags) {} - const unsigned char Opcode; + const TIL_Opcode Opcode; unsigned char Reserved = 0; unsigned short Flags = 0; unsigned SExprID = 0; @@ -332,7 +332,7 @@ protected: namespace ThreadSafetyTIL { inline bool isTrivial(const SExpr *E) { - unsigned Op = E->opcode(); + TIL_Opcode Op = E->opcode(); return Op == COP_Variable || Op == COP_Literal || Op == COP_LiteralPtr; } diff --git a/clang/include/clang/Analysis/FlowSensitive/DataflowAnalysis.h b/clang/include/clang/Analysis/FlowSensitive/DataflowAnalysis.h index 40ac95b3abdd..ef8f7a51496c 100644 --- a/clang/include/clang/Analysis/FlowSensitive/DataflowAnalysis.h +++ b/clang/include/clang/Analysis/FlowSensitive/DataflowAnalysis.h @@ -63,9 +63,15 @@ public: using Lattice = LatticeT; explicit DataflowAnalysis(ASTContext &Context) : Context(Context) {} + + /// Deprecated. Use the `DataflowAnalysisOptions` constructor instead. explicit DataflowAnalysis(ASTContext &Context, bool ApplyBuiltinTransfer) : TypeErasedDataflowAnalysis(ApplyBuiltinTransfer), Context(Context) {} + explicit DataflowAnalysis(ASTContext &Context, + DataflowAnalysisOptions Options) + : TypeErasedDataflowAnalysis(Options), Context(Context) {} + ASTContext &getASTContext() final { return Context; } TypeErasedLattice typeErasedInitialElement() final { diff --git a/clang/include/clang/Analysis/FlowSensitive/DataflowAnalysisContext.h b/clang/include/clang/Analysis/FlowSensitive/DataflowAnalysisContext.h index 358ace0430f6..abc3183e1b0b 100644 --- a/clang/include/clang/Analysis/FlowSensitive/DataflowAnalysisContext.h +++ b/clang/include/clang/Analysis/FlowSensitive/DataflowAnalysisContext.h @@ -23,6 +23,7 @@ #include "clang/Analysis/FlowSensitive/Value.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseSet.h" +#include "llvm/Support/Compiler.h" #include <cassert> #include <memory> #include <type_traits> @@ -251,6 +252,8 @@ public: /// `Val2` imposed by the flow condition. bool equivalentBoolValues(BoolValue &Val1, BoolValue &Val2); + LLVM_DUMP_METHOD void dumpFlowCondition(AtomicBoolValue &Token); + private: struct NullableQualTypeDenseMapInfo : private llvm::DenseMapInfo<QualType> { static QualType getEmptyKey() { diff --git a/clang/include/clang/Analysis/FlowSensitive/DataflowEnvironment.h b/clang/include/clang/Analysis/FlowSensitive/DataflowEnvironment.h index ce195f0662f5..f17df36f6a4a 100644 --- a/clang/include/clang/Analysis/FlowSensitive/DataflowEnvironment.h +++ b/clang/include/clang/Analysis/FlowSensitive/DataflowEnvironment.h @@ -19,7 +19,6 @@ #include "clang/AST/DeclBase.h" #include "clang/AST/Expr.h" #include "clang/AST/Type.h" -#include "clang/AST/TypeOrdering.h" #include "clang/Analysis/FlowSensitive/DataflowAnalysisContext.h" #include "clang/Analysis/FlowSensitive/DataflowLattice.h" #include "clang/Analysis/FlowSensitive/StorageLocation.h" @@ -325,6 +324,8 @@ public: /// imply that `Val` is true. bool flowConditionImplies(BoolValue &Val) const; + LLVM_DUMP_METHOD void dump() const; + private: /// Creates a value appropriate for `Type`, if `Type` is supported, otherwise /// return null. diff --git a/clang/include/clang/Analysis/FlowSensitive/DebugSupport.h b/clang/include/clang/Analysis/FlowSensitive/DebugSupport.h index ef903d807e12..b8efdeb61d28 100644 --- a/clang/include/clang/Analysis/FlowSensitive/DebugSupport.h +++ b/clang/include/clang/Analysis/FlowSensitive/DebugSupport.h @@ -23,6 +23,13 @@ namespace clang { namespace dataflow { + +/// Returns a string representation of a boolean assignment to true or false. +std::string debugString(Solver::Result::Assignment Assignment); + +/// Returns a string representation of the result status of a SAT check. +std::string debugString(Solver::Result::Status Status); + /// Returns a string representation for the boolean value `B`. /// /// Atomic booleans appearing in the boolean value `B` are assigned to labels @@ -36,6 +43,20 @@ std::string debugString( llvm::DenseMap<const AtomicBoolValue *, std::string> AtomNames = {{}}); /// Returns a string representation for `Constraints` - a collection of boolean +/// formulas. +/// +/// Atomic booleans appearing in the boolean value `Constraints` are assigned to +/// labels either specified in `AtomNames` or created by default rules as B0, +/// B1, ... +/// +/// Requirements: +/// +/// Names assigned to atoms should not be repeated in `AtomNames`. +std::string debugString( + const llvm::DenseSet<BoolValue *> &Constraints, + llvm::DenseMap<const AtomicBoolValue *, std::string> AtomNames = {{}}); + +/// Returns a string representation for `Constraints` - a collection of boolean /// formulas and the `Result` of satisfiability checking. /// /// Atomic booleans appearing in `Constraints` and `Result` are assigned to @@ -46,7 +67,7 @@ std::string debugString( /// /// Names assigned to atoms should not be repeated in `AtomNames`. std::string debugString( - const std::vector<BoolValue *> &Constraints, const Solver::Result &Result, + ArrayRef<BoolValue *> Constraints, const Solver::Result &Result, llvm::DenseMap<const AtomicBoolValue *, std::string> AtomNames = {{}}); inline std::string debugString( const llvm::DenseSet<BoolValue *> &Constraints, diff --git a/clang/include/clang/Analysis/FlowSensitive/NoopAnalysis.h b/clang/include/clang/Analysis/FlowSensitive/NoopAnalysis.h new file mode 100644 index 000000000000..4f05f5f4554b --- /dev/null +++ b/clang/include/clang/Analysis/FlowSensitive/NoopAnalysis.h @@ -0,0 +1,47 @@ +//===-- NoopAnalysis.h ------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines a NoopAnalysis class that just uses the builtin transfer. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_ANALYSIS_FLOWSENSITIVE_NOOPANALYSIS_H +#define LLVM_CLANG_ANALYSIS_FLOWSENSITIVE_NOOPANALYSIS_H + +#include "clang/AST/ASTContext.h" +#include "clang/AST/Stmt.h" +#include "clang/Analysis/FlowSensitive/DataflowAnalysis.h" +#include "clang/Analysis/FlowSensitive/DataflowEnvironment.h" +#include "clang/Analysis/FlowSensitive/NoopLattice.h" + +namespace clang { +namespace dataflow { + +class NoopAnalysis : public DataflowAnalysis<NoopAnalysis, NoopLattice> { +public: + /// Deprecated. Use the `DataflowAnalysisOptions` constructor instead. + NoopAnalysis(ASTContext &Context, bool ApplyBuiltinTransfer) + : DataflowAnalysis<NoopAnalysis, NoopLattice>(Context, + ApplyBuiltinTransfer) {} + + /// `ApplyBuiltinTransfer` controls whether to run the built-in transfer + /// functions that model memory during the analysis. Their results are not + /// used by `NoopAnalysis`, but tests that need to inspect the environment + /// should enable them. + NoopAnalysis(ASTContext &Context, DataflowAnalysisOptions Options) + : DataflowAnalysis<NoopAnalysis, NoopLattice>(Context, Options) {} + + static NoopLattice initialElement() { return {}; } + + void transfer(const Stmt *S, NoopLattice &E, Environment &Env) {} +}; + +} // namespace dataflow +} // namespace clang + +#endif // LLVM_CLANG_ANALYSIS_FLOWSENSITIVE_NOOPANALYSIS_H diff --git a/clang/include/clang/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.h b/clang/include/clang/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.h index 5e168194064f..b043062459e4 100644 --- a/clang/include/clang/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.h +++ b/clang/include/clang/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.h @@ -30,6 +30,14 @@ namespace clang { namespace dataflow { +struct DataflowAnalysisOptions { + /// Determines whether to apply the built-in transfer functions. + // FIXME: Remove this option once the framework supports composing analyses + // (at which point the built-in transfer functions can be simply a standalone + // analysis). + bool ApplyBuiltinTransfer = true; +}; + /// Type-erased lattice element container. /// /// Requirements: @@ -42,16 +50,17 @@ struct TypeErasedLattice { /// Type-erased base class for dataflow analyses built on a single lattice type. class TypeErasedDataflowAnalysis : public Environment::ValueModel { - /// Determines whether to apply the built-in transfer functions. - // FIXME: Remove this option once the framework supports composing analyses - // (at which point the built-in transfer functions can be simply a standalone - // analysis). - bool ApplyBuiltinTransfer; + DataflowAnalysisOptions Options; public: - TypeErasedDataflowAnalysis() : ApplyBuiltinTransfer(true) {} + TypeErasedDataflowAnalysis() : Options({}) {} + + /// Deprecated. Use the `DataflowAnalysisOptions` constructor instead. TypeErasedDataflowAnalysis(bool ApplyBuiltinTransfer) - : ApplyBuiltinTransfer(ApplyBuiltinTransfer) {} + : Options({ApplyBuiltinTransfer}) {} + + TypeErasedDataflowAnalysis(DataflowAnalysisOptions Options) + : Options(Options) {} virtual ~TypeErasedDataflowAnalysis() {} @@ -80,7 +89,7 @@ public: /// Determines whether to apply the built-in transfer functions, which model /// the heap and stack in the `Environment`. - bool applyBuiltinTransfer() const { return ApplyBuiltinTransfer; } + bool applyBuiltinTransfer() const { return Options.ApplyBuiltinTransfer; } }; /// Type-erased model of the program at a given program point. diff --git a/clang/include/clang/Basic/Attr.td b/clang/include/clang/Basic/Attr.td index 78e0fce917a0..d61f3583281d 100644 --- a/clang/include/clang/Basic/Attr.td +++ b/clang/include/clang/Basic/Attr.td @@ -2703,7 +2703,7 @@ def Target : InheritableAttr { template<class Compare> ParsedTargetAttr parse(Compare cmp) const { ParsedTargetAttr Attrs = parse(); - llvm::sort(std::begin(Attrs.Features), std::end(Attrs.Features), cmp); + llvm::sort(Attrs.Features, cmp); return Attrs; } diff --git a/clang/include/clang/Basic/BuiltinsAMDGPU.def b/clang/include/clang/Basic/BuiltinsAMDGPU.def index 68bcf546d177..cdf5f5a85418 100644 --- a/clang/include/clang/Basic/BuiltinsAMDGPU.def +++ b/clang/include/clang/Basic/BuiltinsAMDGPU.def @@ -339,12 +339,37 @@ TARGET_BUILTIN(__builtin_amdgcn_mfma_i32_16x16x32_i8, "V4iWiWiV4iIiIiIi", "nc", TARGET_BUILTIN(__builtin_amdgcn_mfma_i32_32x32x16_i8, "V16iWiWiV16iIiIiIi", "nc", "mai-insts") TARGET_BUILTIN(__builtin_amdgcn_mfma_f32_16x16x8_xf32, "V4fV2fV2fV4fIiIiIi", "nc", "mai-insts") TARGET_BUILTIN(__builtin_amdgcn_mfma_f32_32x32x4_xf32, "V16fV2fV2fV16fIiIiIi", "nc", "mai-insts") +TARGET_BUILTIN(__builtin_amdgcn_mfma_f32_16x16x32_bf8_bf8, "V4fWiWiV4fIiIiIi", "nc", "fp8-insts") +TARGET_BUILTIN(__builtin_amdgcn_mfma_f32_16x16x32_bf8_fp8, "V4fWiWiV4fIiIiIi", "nc", "fp8-insts") +TARGET_BUILTIN(__builtin_amdgcn_mfma_f32_16x16x32_fp8_bf8, "V4fWiWiV4fIiIiIi", "nc", "fp8-insts") +TARGET_BUILTIN(__builtin_amdgcn_mfma_f32_16x16x32_fp8_fp8, "V4fWiWiV4fIiIiIi", "nc", "fp8-insts") +TARGET_BUILTIN(__builtin_amdgcn_mfma_f32_32x32x16_bf8_bf8, "V16fWiWiV16fIiIiIi", "nc", "fp8-insts") +TARGET_BUILTIN(__builtin_amdgcn_mfma_f32_32x32x16_bf8_fp8, "V16fWiWiV16fIiIiIi", "nc", "fp8-insts") +TARGET_BUILTIN(__builtin_amdgcn_mfma_f32_32x32x16_fp8_bf8, "V16fWiWiV16fIiIiIi", "nc", "fp8-insts") +TARGET_BUILTIN(__builtin_amdgcn_mfma_f32_32x32x16_fp8_fp8, "V16fWiWiV16fIiIiIi", "nc", "fp8-insts") TARGET_BUILTIN(__builtin_amdgcn_smfmac_f32_16x16x32_f16, "V4fV4hV8hV4fiIiIi", "nc", "mai-insts") TARGET_BUILTIN(__builtin_amdgcn_smfmac_f32_32x32x16_f16, "V16fV4hV8hV16fiIiIi", "nc", "mai-insts") TARGET_BUILTIN(__builtin_amdgcn_smfmac_f32_16x16x32_bf16, "V4fV4sV8sV4fiIiIi", "nc", "mai-insts") TARGET_BUILTIN(__builtin_amdgcn_smfmac_f32_32x32x16_bf16, "V16fV4sV8sV16fiIiIi", "nc", "mai-insts") TARGET_BUILTIN(__builtin_amdgcn_smfmac_i32_16x16x64_i8, "V4iV2iV4iV4iiIiIi", "nc", "mai-insts") TARGET_BUILTIN(__builtin_amdgcn_smfmac_i32_32x32x32_i8, "V16iV2iV4iV16iiIiIi", "nc", "mai-insts") +TARGET_BUILTIN(__builtin_amdgcn_smfmac_f32_16x16x64_bf8_bf8, "V4fV2iV4iV4fiIiIi", "nc", "fp8-insts") +TARGET_BUILTIN(__builtin_amdgcn_smfmac_f32_16x16x64_bf8_fp8, "V4fV2iV4iV4fiIiIi", "nc", "fp8-insts") +TARGET_BUILTIN(__builtin_amdgcn_smfmac_f32_16x16x64_fp8_bf8, "V4fV2iV4iV4fiIiIi", "nc", "fp8-insts") +TARGET_BUILTIN(__builtin_amdgcn_smfmac_f32_16x16x64_fp8_fp8, "V4fV2iV4iV4fiIiIi", "nc", "fp8-insts") +TARGET_BUILTIN(__builtin_amdgcn_smfmac_f32_32x32x32_bf8_bf8, "V16fV2iV4iV16fiIiIi", "nc", "fp8-insts") +TARGET_BUILTIN(__builtin_amdgcn_smfmac_f32_32x32x32_bf8_fp8, "V16fV2iV4iV16fiIiIi", "nc", "fp8-insts") +TARGET_BUILTIN(__builtin_amdgcn_smfmac_f32_32x32x32_fp8_bf8, "V16fV2iV4iV16fiIiIi", "nc", "fp8-insts") +TARGET_BUILTIN(__builtin_amdgcn_smfmac_f32_32x32x32_fp8_fp8, "V16fV2iV4iV16fiIiIi", "nc", "fp8-insts") + +TARGET_BUILTIN(__builtin_amdgcn_cvt_f32_bf8, "fiIi", "nc", "fp8-insts") +TARGET_BUILTIN(__builtin_amdgcn_cvt_f32_fp8, "fiIi", "nc", "fp8-insts") +TARGET_BUILTIN(__builtin_amdgcn_cvt_pk_f32_bf8, "V2fiIb", "nc", "fp8-insts") +TARGET_BUILTIN(__builtin_amdgcn_cvt_pk_f32_fp8, "V2fiIb", "nc", "fp8-insts") +TARGET_BUILTIN(__builtin_amdgcn_cvt_pk_bf8_f32, "iffiIb", "nc", "fp8-insts") +TARGET_BUILTIN(__builtin_amdgcn_cvt_pk_fp8_f32, "iffiIb", "nc", "fp8-insts") +TARGET_BUILTIN(__builtin_amdgcn_cvt_sr_bf8_f32, "ifiiIi", "nc", "fp8-insts") +TARGET_BUILTIN(__builtin_amdgcn_cvt_sr_fp8_f32, "ifiiIi", "nc", "fp8-insts") #undef BUILTIN #undef TARGET_BUILTIN diff --git a/clang/include/clang/Basic/CodeGenOptions.def b/clang/include/clang/Basic/CodeGenOptions.def index b1d394edd04a..ef7957979dcc 100644 --- a/clang/include/clang/Basic/CodeGenOptions.def +++ b/clang/include/clang/Basic/CodeGenOptions.def @@ -213,6 +213,10 @@ CODEGENOPT(AtomicProfileUpdate , 1, 0) ///< Set -fprofile-update=atomic ENUM_CODEGENOPT(ProfileInstr, ProfileInstrKind, 2, ProfileNone) /// Choose profile kind for PGO use compilation. ENUM_CODEGENOPT(ProfileUse, ProfileInstrKind, 2, ProfileNone) +/// Partition functions into N groups and select only functions in group i to be +/// instrumented. Selected group numbers can be 0 to N-1 inclusive. +VALUE_CODEGENOPT(ProfileTotalFunctionGroups, 32, 1) +VALUE_CODEGENOPT(ProfileSelectedFunctionGroup, 32, 0) CODEGENOPT(CoverageMapping , 1, 0) ///< Generate coverage mapping regions to ///< enable code coverage analysis. CODEGENOPT(DumpCoverageMapping , 1, 0) ///< Dump the generated coverage mapping diff --git a/clang/include/clang/Basic/DiagnosticDriverKinds.td b/clang/include/clang/Basic/DiagnosticDriverKinds.td index 68685baf7633..2f600d28fea0 100644 --- a/clang/include/clang/Basic/DiagnosticDriverKinds.td +++ b/clang/include/clang/Basic/DiagnosticDriverKinds.td @@ -678,4 +678,11 @@ def err_drv_invalid_format_dxil_validator_version : Error< def err_drv_invalid_empty_dxil_validator_version : Error< "invalid validator version : %0\n" "If validator major version is 0, minor version must also be 0.">; + +def warn_drv_sarif_format_unstable : Warning< + "diagnostic formatting in SARIF mode is currently unstable">, + InGroup<DiagGroup<"sarif-format-unstable">>; + +def err_drv_riscv_unsupported_with_linker_relaxation : Error< + "%0 is unsupported with RISC-V linker relaxation (-mrelax)">; } diff --git a/clang/include/clang/Basic/DiagnosticGroups.td b/clang/include/clang/Basic/DiagnosticGroups.td index 53e246a39ed8..4412c93683ed 100644 --- a/clang/include/clang/Basic/DiagnosticGroups.td +++ b/clang/include/clang/Basic/DiagnosticGroups.td @@ -35,7 +35,7 @@ def ArrayParameter : DiagGroup<"array-parameter">; def AutoDisableVptrSanitizer : DiagGroup<"auto-disable-vptr-sanitizer">; def Availability : DiagGroup<"availability">; def Section : DiagGroup<"section">; -def AutoImport : DiagGroup<"auto-import">; +def : DiagGroup<"auto-import">; def FrameworkHdrQuotedInclude : DiagGroup<"quoted-include-in-framework-header">; def FrameworkIncludePrivateFromPublic : DiagGroup<"framework-include-private-from-public">; @@ -490,6 +490,7 @@ def ModuleBuild : DiagGroup<"module-build">; def ModuleImport : DiagGroup<"module-import">; def ModuleConflict : DiagGroup<"module-conflict">; def ModuleFileExtension : DiagGroup<"module-file-extension">; +def ModuleIncludeDirectiveTranslation : DiagGroup<"module-include-translation">; def RoundTripCC1Args : DiagGroup<"round-trip-cc1-args">; def NewlineEOF : DiagGroup<"newline-eof">; def Nullability : DiagGroup<"nullability">; diff --git a/clang/include/clang/Basic/DiagnosticLexKinds.td b/clang/include/clang/Basic/DiagnosticLexKinds.td index dd0909704492..6032fbd18d56 100644 --- a/clang/include/clang/Basic/DiagnosticLexKinds.td +++ b/clang/include/clang/Basic/DiagnosticLexKinds.td @@ -387,7 +387,15 @@ def ext_pp_include_search_ms : ExtWarn< def ext_pp_ident_directive : Extension<"#ident is a language extension">; def ext_pp_include_next_directive : Extension< "#include_next is a language extension">, InGroup<GNUIncludeNext>; -def ext_pp_warning_directive : Extension<"#warning is a language extension">; + +def ext_pp_warning_directive : Extension< + "#warning is a %select{C2x|C++2b}0 extension">; +def warn_cxx2b_compat_warning_directive : Warning< + "#warning is incompatible with C++ standards before C++2b">, + InGroup<CXXPre2bCompat>, DefaultIgnore; +def warn_c2x_compat_warning_directive : Warning< + "#warning is incompatible with C standards before C2x">, + InGroup<CPre2xCompat>, DefaultIgnore; def ext_pp_extra_tokens_at_eol : ExtWarn< "extra tokens at end of #%0 directive">, InGroup<ExtraTokens>; @@ -851,9 +859,9 @@ def warn_framework_include_private_from_public : Warning< "public framework header includes private framework header '%0'" >, InGroup<FrameworkIncludePrivateFromPublic>; -def warn_auto_module_import : Warning< +def remark_pp_include_directive_modular_translation : Remark< "treating #%select{include|import|include_next|__include_macros}0 as an " - "import of module '%1'">, InGroup<AutoImport>, DefaultIgnore; + "import of module '%1'">, InGroup<ModuleIncludeDirectiveTranslation>; def note_implicit_top_level_module_import_here : Note< "submodule of top-level module '%0' implicitly imported here">; def warn_uncovered_module_header : Warning< diff --git a/clang/include/clang/Basic/DiagnosticOptions.h b/clang/include/clang/Basic/DiagnosticOptions.h index 17533b38ff5f..c4134835b5de 100644 --- a/clang/include/clang/Basic/DiagnosticOptions.h +++ b/clang/include/clang/Basic/DiagnosticOptions.h @@ -74,7 +74,7 @@ class DiagnosticOptions : public RefCountedBase<DiagnosticOptions>{ friend class CompilerInvocation; public: - enum TextDiagnosticFormat { Clang, MSVC, Vi }; + enum TextDiagnosticFormat { Clang, MSVC, Vi, SARIF }; // Default values. enum { diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index 550029f58b54..756102720049 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -8037,24 +8037,6 @@ def err_incompatible_qualified_id : Error< "sending type to parameter of incompatible type}0,1" "|%diff{casting $ to incompatible type $|" "casting type to incompatible type}0,1}2">; -def ext_typecheck_convert_pointer_int : ExtWarn< - "incompatible pointer to integer conversion " - "%select{%diff{assigning to $ from $|assigning to different types}0,1" - "|%diff{passing $ to parameter of type $|" - "passing to parameter of different type}0,1" - "|%diff{returning $ from a function with result type $|" - "returning from function with different return type}0,1" - "|%diff{converting $ to type $|converting between types}0,1" - "|%diff{initializing $ with an expression of type $|" - "initializing with expression of different type}0,1" - "|%diff{sending $ to parameter of type $|" - "sending to parameter of different type}0,1" - "|%diff{casting $ to type $|casting between types}0,1}2" - "%select{|; dereference with *|" - "; take the address with &|" - "; remove *|" - "; remove &}3">, - InGroup<IntConversion>; def err_typecheck_convert_pointer_int : Error< "incompatible pointer to integer conversion " "%select{%diff{assigning to $ from $|assigning to different types}0,1" @@ -8072,24 +8054,9 @@ def err_typecheck_convert_pointer_int : Error< "; take the address with &|" "; remove *|" "; remove &}3">; -def ext_typecheck_convert_int_pointer : ExtWarn< - "incompatible integer to pointer conversion " - "%select{%diff{assigning to $ from $|assigning to different types}0,1" - "|%diff{passing $ to parameter of type $|" - "passing to parameter of different type}0,1" - "|%diff{returning $ from a function with result type $|" - "returning from function with different return type}0,1" - "|%diff{converting $ to type $|converting between types}0,1" - "|%diff{initializing $ with an expression of type $|" - "initializing with expression of different type}0,1" - "|%diff{sending $ to parameter of type $|" - "sending to parameter of different type}0,1" - "|%diff{casting $ to type $|casting between types}0,1}2" - "%select{|; dereference with *|" - "; take the address with &|" - "; remove *|" - "; remove &}3">, - InGroup<IntConversion>, SFINAEFailure; +def ext_typecheck_convert_pointer_int : ExtWarn< + err_typecheck_convert_pointer_int.Text>, + InGroup<IntConversion>, DefaultError; def err_typecheck_convert_int_pointer : Error< "incompatible integer to pointer conversion " "%select{%diff{assigning to $ from $|assigning to different types}0,1" @@ -8107,6 +8074,9 @@ def err_typecheck_convert_int_pointer : Error< "; take the address with &|" "; remove *|" "; remove &}3">; +def ext_typecheck_convert_int_pointer : ExtWarn< + err_typecheck_convert_int_pointer.Text>, + InGroup<IntConversion>, DefaultError; def ext_typecheck_convert_pointer_void_func : Extension< "%select{%diff{assigning to $ from $|assigning to different types}0,1" "|%diff{passing $ to parameter of type $|" diff --git a/clang/include/clang/Basic/Features.def b/clang/include/clang/Basic/Features.def index a7f0ae03e0be..7151e923ae9c 100644 --- a/clang/include/clang/Basic/Features.def +++ b/clang/include/clang/Basic/Features.def @@ -179,7 +179,6 @@ FEATURE(cxx_thread_local, FEATURE(cxx_trailing_return, LangOpts.CPlusPlus11) FEATURE(cxx_unicode_literals, LangOpts.CPlusPlus11) FEATURE(cxx_unrestricted_unions, LangOpts.CPlusPlus11) -FEATURE(cxx_unstable, LangOpts.Unstable) FEATURE(cxx_user_literals, LangOpts.CPlusPlus11) FEATURE(cxx_variadic_templates, LangOpts.CPlusPlus11) // C++14 features @@ -235,6 +234,7 @@ FEATURE(shadow_call_stack, LangOpts.Sanitize.has(SanitizerKind::ShadowCallStack)) FEATURE(tls, PP.getTargetInfo().isTLSSupported()) FEATURE(underlying_type, LangOpts.CPlusPlus) +FEATURE(experimental_library, LangOpts.ExperimentalLibrary) // C11 features supported by other languages as extensions. EXTENSION(c_alignas, true) diff --git a/clang/include/clang/Basic/LangOptions.def b/clang/include/clang/Basic/LangOptions.def index c41b5ddc7fa1..6fb31c5655ab 100644 --- a/clang/include/clang/Basic/LangOptions.def +++ b/clang/include/clang/Basic/LangOptions.def @@ -155,7 +155,7 @@ LANGOPT(GNUAsm , 1, 1, "GNU-style inline assembly") LANGOPT(Coroutines , 1, 0, "C++20 coroutines") LANGOPT(DllExportInlines , 1, 1, "dllexported classes dllexport inline methods") LANGOPT(RelaxedTemplateTemplateArgs, 1, 0, "C++17 relaxed matching of template template arguments") -LANGOPT(Unstable , 1, 0, "Enable unstable and experimental features") +LANGOPT(ExperimentalLibrary, 1, 0, "enable unstable and experimental library features") LANGOPT(DoubleSquareBracketAttributes, 1, 0, "'[[]]' attributes extension for all language standard modes") @@ -424,6 +424,7 @@ LANGOPT(PaddingOnUnsignedFixedPoint, 1, 0, LANGOPT(RegisterStaticDestructors, 1, 1, "Register C++ static destructors") LANGOPT(MatrixTypes, 1, 0, "Enable or disable the builtin matrix type") +LANGOPT(StrictFlexArrays, 2, 0, "Rely on strict definition of flexible arrays") COMPATIBLE_VALUE_LANGOPT(MaxTokens, 32, 0, "Max number of tokens per TU or 0") diff --git a/clang/include/clang/Basic/Module.h b/clang/include/clang/Basic/Module.h index a1778baa0453..47d736a3b455 100644 --- a/clang/include/clang/Basic/Module.h +++ b/clang/include/clang/Basic/Module.h @@ -665,6 +665,18 @@ public: Module *findSubmodule(StringRef Name) const; Module *findOrInferSubmodule(StringRef Name); + /// Get the Global Module Fragment (sub-module) for this module, it there is + /// one. + /// + /// \returns The GMF sub-module if found, or NULL otherwise. + Module *getGlobalModuleFragment() { return findSubmodule("<global>"); } + + /// Get the Private Module Fragment (sub-module) for this module, it there is + /// one. + /// + /// \returns The PMF sub-module if found, or NULL otherwise. + Module *getPrivateModuleFragment() { return findSubmodule("<private>"); } + /// Determine whether the specified module would be visible to /// a lookup at the end of this module. /// diff --git a/clang/include/clang/Basic/NoSanitizeList.h b/clang/include/clang/Basic/NoSanitizeList.h index 3f80e0fdedda..43415859fcd5 100644 --- a/clang/include/clang/Basic/NoSanitizeList.h +++ b/clang/include/clang/Basic/NoSanitizeList.h @@ -41,6 +41,8 @@ public: bool containsFunction(SanitizerMask Mask, StringRef FunctionName) const; bool containsFile(SanitizerMask Mask, StringRef FileName, StringRef Category = StringRef()) const; + bool containsMainFile(SanitizerMask Mask, StringRef FileName, + StringRef Category = StringRef()) const; bool containsLocation(SanitizerMask Mask, SourceLocation Loc, StringRef Category = StringRef()) const; }; diff --git a/clang/include/clang/Basic/Sarif.h b/clang/include/clang/Basic/Sarif.h new file mode 100644 index 000000000000..818d78668ff1 --- /dev/null +++ b/clang/include/clang/Basic/Sarif.h @@ -0,0 +1,440 @@ +//== clang/Basic/Sarif.h - SARIF Diagnostics Object Model -------*- C++ -*--==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// \file +/// Defines clang::SarifDocumentWriter, clang::SarifRule, clang::SarifResult. +/// +/// The document built can be accessed as a JSON Object. +/// Several value semantic types are also introduced which represent properties +/// of the SARIF standard, such as 'artifact', 'result', 'rule'. +/// +/// A SARIF (Static Analysis Results Interchange Format) document is JSON +/// document that describes in detail the results of running static analysis +/// tools on a project. Each (non-trivial) document consists of at least one +/// "run", which are themselves composed of details such as: +/// * Tool: The tool that was run +/// * Rules: The rules applied during the tool run, represented by +/// \c reportingDescriptor objects in SARIF +/// * Results: The matches for the rules applied against the project(s) being +/// evaluated, represented by \c result objects in SARIF +/// +/// Reference: +/// 1. <a href="https://docs.oasis-open.org/sarif/sarif/v2.1.0/os/sarif-v2.1.0-os.html">The SARIF standard</a> +/// 2. <a href="https://docs.oasis-open.org/sarif/sarif/v2.1.0/os/sarif-v2.1.0-os.html#_Toc34317836">SARIF<pre>reportingDescriptor</pre></a> +/// 3. <a href="https://docs.oasis-open.org/sarif/sarif/v2.1.0/os/sarif-v2.1.0-os.html#_Toc34317638">SARIF<pre>result</pre></a> +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_BASIC_SARIF_H +#define LLVM_CLANG_BASIC_SARIF_H + +#include "clang/Basic/SourceLocation.h" +#include "clang/Basic/Version.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/Optional.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringMap.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/JSON.h" +#include <cassert> +#include <cstddef> +#include <cstdint> +#include <initializer_list> +#include <string> + +namespace clang { + +class SarifDocumentWriter; +class SourceManager; + +namespace detail { + +/// \internal +/// An artifact location is SARIF's way of describing the complete location +/// of an artifact encountered during analysis. The \c artifactLocation object +/// typically consists of a URI, and/or an index to reference the artifact it +/// locates. +/// +/// This builder makes an additional assumption: that every artifact encountered +/// by \c clang will be a physical, top-level artifact. Which is why the static +/// creation method \ref SarifArtifactLocation::create takes a mandatory URI +/// parameter. The official standard states that either a \c URI or \c Index +/// must be available in the object, \c clang picks the \c URI as a reasonable +/// default, because it intends to deal in physical artifacts for now. +/// +/// Reference: +/// 1. <a href="https://docs.oasis-open.org/sarif/sarif/v2.1.0/os/sarif-v2.1.0-os.html#_Toc34317427">artifactLocation object</a> +/// 2. \ref SarifArtifact +class SarifArtifactLocation { +private: + friend class clang::SarifDocumentWriter; + + llvm::Optional<uint32_t> Index; + std::string URI; + + SarifArtifactLocation() = delete; + explicit SarifArtifactLocation(const std::string &URI) : URI(URI) {} + +public: + static SarifArtifactLocation create(llvm::StringRef URI) { + return SarifArtifactLocation{URI.str()}; + } + + SarifArtifactLocation setIndex(uint32_t Idx) { + Index = Idx; + return *this; + } +}; + +/// \internal +/// An artifact in SARIF is any object (a sequence of bytes) addressable by +/// a URI (RFC 3986). The most common type of artifact for clang's use-case +/// would be source files. SARIF's artifact object is described in detail in +/// section 3.24. +// +/// Since every clang artifact MUST have a location (there being no nested +/// artifacts), the creation method \ref SarifArtifact::create requires a +/// \ref SarifArtifactLocation object. +/// +/// Reference: +/// 1. <a href="https://docs.oasis-open.org/sarif/sarif/v2.1.0/os/sarif-v2.1.0-os.html#_Toc34317611">artifact object</a> +class SarifArtifact { +private: + friend class clang::SarifDocumentWriter; + + llvm::Optional<uint32_t> Offset; + llvm::Optional<size_t> Length; + std::string MimeType; + SarifArtifactLocation Location; + llvm::SmallVector<std::string, 4> Roles; + + SarifArtifact() = delete; + + explicit SarifArtifact(const SarifArtifactLocation &Loc) : Location(Loc) {} + +public: + static SarifArtifact create(const SarifArtifactLocation &Loc) { + return SarifArtifact{Loc}; + } + + SarifArtifact setOffset(uint32_t ArtifactOffset) { + Offset = ArtifactOffset; + return *this; + } + + SarifArtifact setLength(size_t NumBytes) { + Length = NumBytes; + return *this; + } + + SarifArtifact setRoles(std::initializer_list<llvm::StringRef> ArtifactRoles) { + Roles.assign(ArtifactRoles.begin(), ArtifactRoles.end()); + return *this; + } + + SarifArtifact setMimeType(llvm::StringRef ArtifactMimeType) { + MimeType = ArtifactMimeType.str(); + return *this; + } +}; + +} // namespace detail + +enum class ThreadFlowImportance { Important, Essential, Unimportant }; + +/// A thread flow is a sequence of code locations that specify a possible path +/// through a single thread of execution. +/// A thread flow in SARIF is related to a code flow which describes +/// the progress of one or more programs through one or more thread flows. +/// +/// Reference: +/// 1. <a href="https://docs.oasis-open.org/sarif/sarif/v2.1.0/os/sarif-v2.1.0-os.html#_Toc34317744">threadFlow object</a> +/// 2. <a href="https://docs.oasis-open.org/sarif/sarif/v2.1.0/os/sarif-v2.1.0-os.html#_Toc34317740">codeFlow object</a> +class ThreadFlow { + friend class SarifDocumentWriter; + + CharSourceRange Range; + ThreadFlowImportance Importance; + std::string Message; + + ThreadFlow() = default; + +public: + static ThreadFlow create() { return {}; } + + ThreadFlow setRange(const CharSourceRange &ItemRange) { + assert(ItemRange.isCharRange() && + "ThreadFlows require a character granular source range!"); + Range = ItemRange; + return *this; + } + + ThreadFlow setImportance(const ThreadFlowImportance &ItemImportance) { + Importance = ItemImportance; + return *this; + } + + ThreadFlow setMessage(llvm::StringRef ItemMessage) { + Message = ItemMessage.str(); + return *this; + } +}; + +/// A SARIF rule (\c reportingDescriptor object) contains information that +/// describes a reporting item generated by a tool. A reporting item is +/// either a result of analysis or notification of a condition encountered by +/// the tool. Rules are arbitrary but are identifiable by a hierarchical +/// rule-id. +/// +/// This builder provides an interface to create SARIF \c reportingDescriptor +/// objects via the \ref SarifRule::create static method. +/// +/// Reference: +/// 1. <a href="https://docs.oasis-open.org/sarif/sarif/v2.1.0/os/sarif-v2.1.0-os.html#_Toc34317836">reportingDescriptor object</a> +class SarifRule { + friend class clang::SarifDocumentWriter; + + std::string Name; + std::string Id; + std::string Description; + std::string HelpURI; + + SarifRule() = default; + +public: + static SarifRule create() { return {}; } + + SarifRule setName(llvm::StringRef RuleName) { + Name = RuleName.str(); + return *this; + } + + SarifRule setRuleId(llvm::StringRef RuleId) { + Id = RuleId.str(); + return *this; + } + + SarifRule setDescription(llvm::StringRef RuleDesc) { + Description = RuleDesc.str(); + return *this; + } + + SarifRule setHelpURI(llvm::StringRef RuleHelpURI) { + HelpURI = RuleHelpURI.str(); + return *this; + } +}; + +/// A SARIF result (also called a "reporting item") is a unit of output +/// produced when one of the tool's \c reportingDescriptor encounters a match +/// on the file being analysed by the tool. +/// +/// This builder provides a \ref SarifResult::create static method that can be +/// used to create an empty shell onto which attributes can be added using the +/// \c setX(...) methods. +/// +/// For example: +/// \code{.cpp} +/// SarifResult result = SarifResult::create(...) +/// .setRuleId(...) +/// .setDiagnosticMessage(...); +/// \endcode +/// +/// Reference: +/// 1. <a href="https://docs.oasis-open.org/sarif/sarif/v2.1.0/os/sarif-v2.1.0-os.html#_Toc34317638">SARIF<pre>result</pre></a> +class SarifResult { + friend class clang::SarifDocumentWriter; + + // NOTE: + // This type cannot fit all possible indexes representable by JSON, but is + // chosen because it is the largest unsigned type that can be safely + // converted to an \c int64_t. + uint32_t RuleIdx; + std::string RuleId; + std::string DiagnosticMessage; + llvm::SmallVector<CharSourceRange, 8> Locations; + llvm::SmallVector<ThreadFlow, 8> ThreadFlows; + + SarifResult() = delete; + explicit SarifResult(uint32_t RuleIdx) : RuleIdx(RuleIdx) {} + +public: + static SarifResult create(uint32_t RuleIdx) { return SarifResult{RuleIdx}; } + + SarifResult setIndex(uint32_t Idx) { + RuleIdx = Idx; + return *this; + } + + SarifResult setRuleId(llvm::StringRef Id) { + RuleId = Id.str(); + return *this; + } + + SarifResult setDiagnosticMessage(llvm::StringRef Message) { + DiagnosticMessage = Message.str(); + return *this; + } + + SarifResult setLocations(llvm::ArrayRef<CharSourceRange> DiagLocs) { +#ifndef NDEBUG + for (const auto &Loc : DiagLocs) { + assert(Loc.isCharRange() && + "SARIF Results require character granular source ranges!"); + } +#endif + Locations.assign(DiagLocs.begin(), DiagLocs.end()); + return *this; + } + SarifResult setThreadFlows(llvm::ArrayRef<ThreadFlow> ThreadFlowResults) { + ThreadFlows.assign(ThreadFlowResults.begin(), ThreadFlowResults.end()); + return *this; + } +}; + +/// This class handles creating a valid SARIF document given various input +/// attributes. However, it requires an ordering among certain method calls: +/// +/// 1. Because every SARIF document must contain at least 1 \c run, callers +/// must ensure that \ref SarifDocumentWriter::createRun is is called before +/// any other methods. +/// 2. If SarifDocumentWriter::endRun is called, callers MUST call +/// SarifDocumentWriter::createRun, before invoking any of the result +/// aggregation methods such as SarifDocumentWriter::appendResult etc. +class SarifDocumentWriter { +private: + const llvm::StringRef SchemaURI{ + "https://docs.oasis-open.org/sarif/sarif/v2.1.0/cos02/schemas/" + "sarif-schema-2.1.0.json"}; + const llvm::StringRef SchemaVersion{"2.1.0"}; + + /// \internal + /// Return a pointer to the current tool. Asserts that a run exists. + llvm::json::Object &getCurrentTool(); + + /// \internal + /// Checks if there is a run associated with this document. + /// + /// \return true on success + bool hasRun() const; + + /// \internal + /// Reset portions of the internal state so that the document is ready to + /// receive data for a new run. + void reset(); + + /// \internal + /// Return a mutable reference to the current run, after asserting it exists. + /// + /// \note It is undefined behavior to call this if a run does not exist in + /// the SARIF document. + llvm::json::Object &getCurrentRun(); + + /// Create a code flow object for the given threadflows. + /// See \ref ThreadFlow. + /// + /// \note It is undefined behavior to call this if a run does not exist in + /// the SARIF document. + llvm::json::Object + createCodeFlow(const llvm::ArrayRef<ThreadFlow> ThreadFlows); + + /// Add the given threadflows to the ones this SARIF document knows about. + llvm::json::Array + createThreadFlows(const llvm::ArrayRef<ThreadFlow> ThreadFlows); + + /// Add the given \ref CharSourceRange to the SARIF document as a physical + /// location, with its corresponding artifact. + llvm::json::Object createPhysicalLocation(const CharSourceRange &R); + +public: + SarifDocumentWriter() = delete; + + /// Create a new empty SARIF document with the given source manager. + SarifDocumentWriter(const SourceManager &SourceMgr) : SourceMgr(SourceMgr) {} + + /// Release resources held by this SARIF document. + ~SarifDocumentWriter() = default; + + /// Create a new run with which any upcoming analysis will be associated. + /// Each run requires specifying the tool that is generating reporting items. + void createRun(const llvm::StringRef ShortToolName, + const llvm::StringRef LongToolName, + const llvm::StringRef ToolVersion = CLANG_VERSION_STRING); + + /// If there is a current run, end it. + /// + /// This method collects various book-keeping required to clear and close + /// resources associated with the current run, but may also allocate some + /// for the next run. + /// + /// Calling \ref endRun before associating a run through \ref createRun leads + /// to undefined behaviour. + void endRun(); + + /// Associate the given rule with the current run. + /// + /// Returns an integer rule index for the created rule that is unique within + /// the current run, which can then be used to create a \ref SarifResult + /// to add to the current run. Note that a rule must exist before being + /// referenced by a result. + /// + /// \pre + /// There must be a run associated with the document, failing to do so will + /// cause undefined behaviour. + size_t createRule(const SarifRule &Rule); + + /// Append a new result to the currently in-flight run. + /// + /// \pre + /// There must be a run associated with the document, failing to do so will + /// cause undefined behaviour. + /// \pre + /// \c RuleIdx used to create the result must correspond to a rule known by + /// the SARIF document. It must be the value returned by a previous call + /// to \ref createRule. + void appendResult(const SarifResult &SarifResult); + + /// Return the SARIF document in its current state. + /// Calling this will trigger a copy of the internal state including all + /// reported diagnostics, resulting in an expensive call. + llvm::json::Object createDocument(); + +private: + /// Source Manager to use for the current SARIF document. + const SourceManager &SourceMgr; + + /// Flag to track the state of this document: + /// A closed document is one on which a new runs must be created. + /// This could be a document that is freshly created, or has recently + /// finished writing to a previous run. + bool Closed = true; + + /// A sequence of SARIF runs. + /// Each run object describes a single run of an analysis tool and contains + /// the output of that run. + /// + /// Reference: <a href="https://docs.oasis-open.org/sarif/sarif/v2.1.0/os/sarif-v2.1.0-os.html#_Toc34317484">run object</a> + llvm::json::Array Runs; + + /// The list of rules associated with the most recent active run. These are + /// defined using the diagnostics passed to the SarifDocument. Each rule + /// need not be unique through the result set. E.g. there may be several + /// 'syntax' errors throughout code under analysis, each of which has its + /// own specific diagnostic message (and consequently, RuleId). Rules are + /// also known as "reportingDescriptor" objects in SARIF. + /// + /// Reference: <a href="https://docs.oasis-open.org/sarif/sarif/v2.1.0/os/sarif-v2.1.0-os.html#_Toc34317556">rules property</a> + llvm::SmallVector<SarifRule, 32> CurrentRules; + + /// The list of artifacts that have been encountered on the most recent active + /// run. An artifact is defined in SARIF as a sequence of bytes addressable + /// by a URI. A common example for clang's case would be files named by + /// filesystem paths. + llvm::StringMap<detail::SarifArtifact> CurrentArtifacts; +}; +} // namespace clang + +#endif // LLVM_CLANG_BASIC_SARIF_H diff --git a/clang/include/clang/Driver/Options.h b/clang/include/clang/Driver/Options.h index f9b9632ee7cb..f7ee154b7a7a 100644 --- a/clang/include/clang/Driver/Options.h +++ b/clang/include/clang/Driver/Options.h @@ -36,7 +36,8 @@ enum ClangFlags { FC1Option = (1 << 15), FlangOnlyOption = (1 << 16), DXCOption = (1 << 17), - Ignored = (1 << 18), + CLDXCOption = (1 << 18), + Ignored = (1 << 19), }; enum ID { diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index 532d7780c529..b9c2e4d528e4 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -53,6 +53,10 @@ def CC1AsOption : OptionFlag; // are made available when the driver is running in DXC compatibility mode. def DXCOption : OptionFlag; +// CLDXCOption - This is a cl.exe/dxc.exe compatibility option. Options with this flag +// are made available when the driver is running in CL/DXC compatibility mode. +def CLDXCOption : OptionFlag; + // NoDriverOption - This option should not be accepted by the driver. def NoDriverOption : OptionFlag; @@ -1140,6 +1144,12 @@ def fallow_unsupported : Flag<["-"], "fallow-unsupported">, Group<f_Group>; def fapple_kext : Flag<["-"], "fapple-kext">, Group<f_Group>, Flags<[CC1Option]>, HelpText<"Use Apple's kernel extensions ABI">, MarshallingInfoFlag<LangOpts<"AppleKext">>; +def fstrict_flex_arrays_EQ : Joined<["-"], "fstrict-flex-arrays=">,Group<f_Group>, + MetaVarName<"<n>">, Values<"0,1,2">, + LangOpts<"StrictFlexArrays">, + Flags<[CC1Option]>, + HelpText<"Enable optimizations based on the strict definition of flexible arrays">, + MarshallingInfoInt<LangOpts<"StrictFlexArrays">>; defm apple_pragma_pack : BoolFOption<"apple-pragma-pack", LangOpts<"ApplePragmaPack">, DefaultFalse, PosFlag<SetTrue, [CC1Option], "Enable Apple gcc-compatible #pragma pack handling">, @@ -1183,9 +1193,13 @@ defm coroutines_ts : BoolFOption<"coroutines-ts", PosFlag<SetTrue, [CC1Option], "Enable support for the C++ Coroutines TS">, NegFlag<SetFalse>>; -defm unstable : BoolFOption<"unstable", - LangOpts<"Unstable">, DefaultFalse, - PosFlag<SetTrue, [CC1Option, CoreOption], "Enable unstable and experimental features">, +defm experimental_library : BoolFOption<"experimental-library", + LangOpts<"ExperimentalLibrary">, DefaultFalse, + PosFlag<SetTrue, [CC1Option, CoreOption], "Control whether unstable and experimental library features are enabled. " + "This option enables various library features that are either experimental (also known as TSes), or have been " + "but are not stable yet in the selected Standard Library implementation. It is not recommended to use this option " + "in production code, since neither ABI nor API stability are guaranteed. This is intended to provide a preview " + "of features that will ship in the future for experimentation purposes">, NegFlag<SetFalse>>; def fembed_offload_object_EQ : Joined<["-"], "fembed-offload-object=">, @@ -1333,6 +1347,15 @@ def fprofile_list_EQ : Joined<["-"], "fprofile-list=">, Group<f_Group>, Flags<[CC1Option, CoreOption]>, HelpText<"Filename defining the list of functions/files to instrument">, MarshallingInfoStringVector<LangOpts<"ProfileListFiles">>; +def fprofile_function_groups : Joined<["-"], "fprofile-function-groups=">, + Group<f_Group>, Flags<[CC1Option]>, MetaVarName<"<N>">, + HelpText<"Partition functions into N groups and select only functions in group i to be instrumented using -fprofile-selected-function-group">, + MarshallingInfoInt<CodeGenOpts<"ProfileTotalFunctionGroups">, "1">; +def fprofile_selected_function_group : + Joined<["-"], "fprofile-selected-function-group=">, Group<f_Group>, + Flags<[CC1Option]>, MetaVarName<"<i>">, + HelpText<"Partition functions into N groups using -fprofile-function-groups and select only functions in group i to be instrumented. The valid range is 0 to N-1 inclusive">, + MarshallingInfoInt<CodeGenOpts<"ProfileSelectedFunctionGroup">>; def fswift_async_fp_EQ : Joined<["-"], "fswift-async-fp=">, Group<f_Group>, Flags<[CC1Option, CC1AsOption, CoreOption]>, MetaVarName<"<option>">, HelpText<"Control emission of Swift async extended frame info">, @@ -2848,6 +2871,15 @@ def ftime_trace_granularity_EQ : Joined<["-"], "ftime-trace-granularity=">, Grou HelpText<"Minimum time granularity (in microseconds) traced by time profiler">, Flags<[CC1Option, CoreOption]>, MarshallingInfoInt<FrontendOpts<"TimeTraceGranularity">, "500u">; +def ftime_trace_EQ : Joined<["-"], "ftime-trace=">, Group<f_Group>, + HelpText<"Turn on time profiler. Generates JSON file based on output filename. " + "Specify the path which stores the tracing output file.">, + DocBrief<[{ + Turn on time profiler. Generates JSON file based on output filename. Results + can be analyzed with chrome://tracing or `Speedscope App + <https://www.speedscope.app>`_ for flamegraph visualization.}]>, + Flags<[CC1Option, CoreOption]>, + MarshallingInfoString<FrontendOpts<"TimeTracePath">>; def fproc_stat_report : Joined<["-"], "fproc-stat-report">, Group<f_Group>, HelpText<"Print subprocess statistics">; def fproc_stat_report_EQ : Joined<["-"], "fproc-stat-report=">, Group<f_Group>, @@ -5556,8 +5588,8 @@ def diagnostic_serialized_file : Separate<["-"], "serialize-diagnostic-file">, def fdiagnostics_format : Separate<["-"], "fdiagnostics-format">, HelpText<"Change diagnostic formatting to match IDE and command line tools">, - Values<"clang,msvc,vi">, - NormalizedValuesScope<"DiagnosticOptions">, NormalizedValues<["Clang", "MSVC", "Vi"]>, + Values<"clang,msvc,vi,sarif,SARIF">, + NormalizedValuesScope<"DiagnosticOptions">, NormalizedValues<["Clang", "MSVC", "Vi", "SARIF", "SARIF"]>, MarshallingInfoEnum<DiagnosticOpts<"Format">, "Clang">; def fdiagnostics_show_category : Separate<["-"], "fdiagnostics-show-category">, HelpText<"Print diagnostic category">, @@ -6327,7 +6359,7 @@ def defsym : Separate<["-"], "defsym">, // clang-cl Options //===----------------------------------------------------------------------===// -def cl_Group : OptionGroup<"<clang-cl options>">, Flags<[CLOption]>, +def cl_Group : OptionGroup<"<clang-cl options>">, Flags<[CLDXCOption]>, HelpText<"CL.EXE COMPATIBILITY OPTIONS">; def cl_compile_Group : OptionGroup<"<clang-cl compile-only options>">, @@ -6357,6 +6389,9 @@ class CLIgnoredJoined<string name> : Option<["/", "-"], name, KIND_JOINED>, class CLJoinedOrSeparate<string name> : Option<["/", "-"], name, KIND_JOINED_OR_SEPARATE>, Group<cl_Group>, Flags<[CLOption, NoXarchOption]>; +class CLDXCJoinedOrSeparate<string name> : Option<["/", "-"], name, + KIND_JOINED_OR_SEPARATE>, Group<cl_Group>, Flags<[CLDXCOption, NoXarchOption]>; + class CLCompileJoinedOrSeparate<string name> : Option<["/", "-"], name, KIND_JOINED_OR_SEPARATE>, Group<cl_compile_Group>, Flags<[CLOption, NoXarchOption]>; @@ -6434,7 +6469,7 @@ def _SLASH_help : CLFlag<"help">, Alias<help>, def _SLASH_HELP : CLFlag<"HELP">, Alias<help>; def _SLASH_hotpatch : CLFlag<"hotpatch">, Alias<fms_hotpatch>, HelpText<"Create hotpatchable image">; -def _SLASH_I : CLJoinedOrSeparate<"I">, +def _SLASH_I : CLDXCJoinedOrSeparate<"I">, HelpText<"Add directory to include search path">, MetaVarName<"<dir>">, Alias<I>; def _SLASH_J : CLFlag<"J">, HelpText<"Make char type unsigned">, diff --git a/clang/include/clang/Format/Format.h b/clang/include/clang/Format/Format.h index f8a4b069b2e7..77ff9a863429 100644 --- a/clang/include/clang/Format/Format.h +++ b/clang/include/clang/Format/Format.h @@ -786,7 +786,7 @@ struct FormatStyle { }; /// The template declaration breaking style to use. - /// \version 7 + /// \version 3.4 BreakTemplateDeclarationsStyle AlwaysBreakTemplateDeclarations; /// A vector of strings that should be interpreted as attributes/qualifiers diff --git a/clang/include/clang/Frontend/FrontendOptions.h b/clang/include/clang/Frontend/FrontendOptions.h index ff5a9c5c77f4..b0e719ffcacf 100644 --- a/clang/include/clang/Frontend/FrontendOptions.h +++ b/clang/include/clang/Frontend/FrontendOptions.h @@ -499,6 +499,9 @@ public: /// Minimum time granularity (in microseconds) traced by time profiler. unsigned TimeTraceGranularity; + /// Path which stores the output files for -ftime-trace + std::string TimeTracePath; + public: FrontendOptions() : DisableFree(false), RelocatablePCH(false), ShowHelp(false), diff --git a/clang/include/clang/Sema/CodeCompleteConsumer.h b/clang/include/clang/Sema/CodeCompleteConsumer.h index 3869fb5b8398..c725c2ae3f96 100644 --- a/clang/include/clang/Sema/CodeCompleteConsumer.h +++ b/clang/include/clang/Sema/CodeCompleteConsumer.h @@ -1019,6 +1019,10 @@ public: /// for which we only have a function prototype. CK_FunctionType, + /// The candidate is a variable or expression of function type + /// for which we have the location of the prototype declaration. + CK_FunctionProtoTypeLoc, + /// The candidate is a template, template arguments are being completed. CK_Template, @@ -1043,6 +1047,10 @@ public: /// when Kind == CK_FunctionType. const FunctionType *Type; + /// The location of the function prototype that describes the entity being + /// called, when Kind == CK_FunctionProtoTypeLoc. + FunctionProtoTypeLoc ProtoTypeLoc; + /// The template overload candidate, available when /// Kind == CK_Template. const TemplateDecl *Template; @@ -1068,6 +1076,11 @@ public: assert(Type != nullptr); } + OverloadCandidate(FunctionProtoTypeLoc Prototype) + : Kind(CK_FunctionProtoTypeLoc), ProtoTypeLoc(Prototype) { + assert(!Prototype.isNull()); + } + OverloadCandidate(const RecordDecl *Aggregate) : Kind(CK_Aggregate), AggregateType(Aggregate) { assert(Aggregate != nullptr); @@ -1093,6 +1106,11 @@ public: /// function is stored. const FunctionType *getFunctionType() const; + /// Retrieve the function ProtoTypeLoc candidate. + /// This can be called for any Kind, but returns null for kinds + /// other than CK_FunctionProtoTypeLoc. + const FunctionProtoTypeLoc getFunctionProtoTypeLoc() const; + const TemplateDecl *getTemplate() const { assert(getKind() == CK_Template && "Not a template"); return Template; diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h index e51b9daef7d3..a33d85cc954d 100644 --- a/clang/include/clang/Sema/Sema.h +++ b/clang/include/clang/Sema/Sema.h @@ -2281,6 +2281,11 @@ public: return ModuleScopes.empty() ? nullptr : ModuleScopes.back().Module; } + /// Is the module scope we are an interface? + bool currentModuleIsInterface() const { + return ModuleScopes.empty() ? false : ModuleScopes.back().ModuleInterface; + } + /// Get the module owning an entity. Module *getOwningModule(const Decl *Entity) { return Entity->getOwningModule(); diff --git a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/ConstraintManager.h b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/ConstraintManager.h index 22b405919bc1..ca6d7849d621 100644 --- a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/ConstraintManager.h +++ b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/ConstraintManager.h @@ -119,6 +119,9 @@ public: const char *NL, unsigned int Space, bool IsDot) const = 0; + virtual void printValue(raw_ostream &Out, ProgramStateRef State, + SymbolRef Sym) {} + /// Convenience method to query the state to see if a symbol is null or /// not null, or if neither assumption can be made. ConditionTruthVal isNull(ProgramStateRef State, SymbolRef Sym) { diff --git a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/ExprEngine.h b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/ExprEngine.h index 415fa05586ed..116a5970c341 100644 --- a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/ExprEngine.h +++ b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/ExprEngine.h @@ -617,6 +617,11 @@ public: return svalBuilder.evalBinOp(ST, Op, LHS, RHS, T); } + /// Retreives which element is being constructed in a non POD type array. + static Optional<unsigned> + getIndexOfElementToConstruct(ProgramStateRef State, const CXXConstructExpr *E, + const LocationContext *LCtx); + /// By looking at a certain item that may be potentially part of an object's /// ConstructionContext, retrieve such object's location. A particular /// statement can be transparently passed as \p Item in most cases. @@ -708,10 +713,19 @@ public: /// fully implemented it sometimes indicates that it failed via its /// out-parameter CallOpts; in such cases a fake temporary region is /// returned, which is better than nothing but does not represent - /// the actual behavior of the program. - SVal computeObjectUnderConstruction( - const Expr *E, ProgramStateRef State, const LocationContext *LCtx, - const ConstructionContext *CC, EvalCallOptions &CallOpts); + /// the actual behavior of the program. The Idx parameter is used if we + /// construct an array of objects. In that case it points to the index + /// of the continous memory region. + /// E.g.: + /// For `int arr[4]` this index can be 0,1,2,3. + /// For `int arr2[3][3]` this index can be 0,1,...,7,8. + /// A multi-dimensional array is also a continous memory location in a + /// row major order, so for arr[0][0] Idx is 0 and for arr[2][2] Idx is 8. + SVal computeObjectUnderConstruction(const Expr *E, ProgramStateRef State, + const LocationContext *LCtx, + const ConstructionContext *CC, + EvalCallOptions &CallOpts, + unsigned Idx = 0); /// Update the program state with all the path-sensitive information /// that's necessary to perform construction of an object with a given @@ -724,12 +738,16 @@ public: /// A convenient wrapper around computeObjectUnderConstruction /// and updateObjectsUnderConstruction. - std::pair<ProgramStateRef, SVal> handleConstructionContext( - const Expr *E, ProgramStateRef State, const LocationContext *LCtx, - const ConstructionContext *CC, EvalCallOptions &CallOpts) { - SVal V = computeObjectUnderConstruction(E, State, LCtx, CC, CallOpts); - return std::make_pair( - updateObjectsUnderConstruction(V, E, State, LCtx, CC, CallOpts), V); + std::pair<ProgramStateRef, SVal> + handleConstructionContext(const Expr *E, ProgramStateRef State, + const LocationContext *LCtx, + const ConstructionContext *CC, + EvalCallOptions &CallOpts, unsigned Idx = 0) { + + SVal V = computeObjectUnderConstruction(E, State, LCtx, CC, CallOpts, Idx); + State = updateObjectsUnderConstruction(V, E, State, LCtx, CC, CallOpts); + + return std::make_pair(State, V); } private: @@ -796,6 +814,15 @@ private: const ExplodedNode *Pred, const EvalCallOptions &CallOpts = {}); + /// Checks whether our policies allow us to inline a non-POD type array + /// construction. + bool shouldInlineArrayConstruction(const ArrayType *Type); + + /// Checks whether we construct an array of non-POD type, and decides if the + /// constructor should be inkoved once again. + bool shouldRepeatCtorCall(ProgramStateRef State, const CXXConstructExpr *E, + const LocationContext *LCtx); + void inlineCall(WorkList *WList, const CallEvent &Call, const Decl *D, NodeBuilder &Bldr, ExplodedNode *Pred, ProgramStateRef State); @@ -838,7 +865,7 @@ private: const Expr *InitWithAdjustments, const Expr *Result = nullptr, const SubRegion **OutRegionWithAdjustments = nullptr); - /// Returns a region representing the first element of a (possibly + /// Returns a region representing the `Idx`th element of a (possibly /// multi-dimensional) array, for the purposes of element construction or /// destruction. /// @@ -846,8 +873,8 @@ private: /// /// If the type is not an array type at all, the original value is returned. /// Otherwise the "IsArray" flag is set. - static SVal makeZeroElementRegion(ProgramStateRef State, SVal LValue, - QualType &Ty, bool &IsArray); + static SVal makeElementRegion(ProgramStateRef State, SVal LValue, + QualType &Ty, bool &IsArray, unsigned Idx = 0); /// For a DeclStmt or CXXInitCtorInitializer, walk backward in the current CFG /// block to find the constructor expression that directly constructed into @@ -878,6 +905,17 @@ public: const ObjCForCollectionStmt *O, const LocationContext *LC); private: + /// Assuming we construct an array of non-POD types, this method allows us + /// to store which element is to be constructed next. + static ProgramStateRef + setIndexOfElementToConstruct(ProgramStateRef State, const CXXConstructExpr *E, + const LocationContext *LCtx, unsigned Idx); + + static ProgramStateRef + removeIndexOfElementToConstruct(ProgramStateRef State, + const CXXConstructExpr *E, + const LocationContext *LCtx); + /// Store the location of a C++ object corresponding to a statement /// until the statement is actually encountered. For example, if a DeclStmt /// has CXXConstructExpr as its initializer, the object would be considered diff --git a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/MemRegion.h b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/MemRegion.h index 2cb9a6a0a0ed..34d44f709883 100644 --- a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/MemRegion.h +++ b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/MemRegion.h @@ -1364,6 +1364,7 @@ public: ~MemRegionManager(); ASTContext &getContext() { return Ctx; } + const ASTContext &getContext() const { return Ctx; } llvm::BumpPtrAllocator &getAllocator() { return A; } diff --git a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SValBuilder.h b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SValBuilder.h index 3d247e7887d7..1b9526324086 100644 --- a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SValBuilder.h +++ b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SValBuilder.h @@ -75,39 +75,6 @@ protected: /// The width of the scalar type used for array indices. const unsigned ArrayIndexWidth; - SVal evalCastKind(UndefinedVal V, QualType CastTy, QualType OriginalTy); - SVal evalCastKind(UnknownVal V, QualType CastTy, QualType OriginalTy); - SVal evalCastKind(Loc V, QualType CastTy, QualType OriginalTy); - SVal evalCastKind(NonLoc V, QualType CastTy, QualType OriginalTy); - SVal evalCastSubKind(loc::ConcreteInt V, QualType CastTy, - QualType OriginalTy); - SVal evalCastSubKind(loc::GotoLabel V, QualType CastTy, QualType OriginalTy); - SVal evalCastSubKind(loc::MemRegionVal V, QualType CastTy, - QualType OriginalTy); - SVal evalCastSubKind(nonloc::CompoundVal V, QualType CastTy, - QualType OriginalTy); - SVal evalCastSubKind(nonloc::ConcreteInt V, QualType CastTy, - QualType OriginalTy); - SVal evalCastSubKind(nonloc::LazyCompoundVal V, QualType CastTy, - QualType OriginalTy); - SVal evalCastSubKind(nonloc::LocAsInteger V, QualType CastTy, - QualType OriginalTy); - SVal evalCastSubKind(nonloc::SymbolVal V, QualType CastTy, - QualType OriginalTy); - SVal evalCastSubKind(nonloc::PointerToMember V, QualType CastTy, - QualType OriginalTy); - /// Reduce cast expression by removing redundant intermediate casts. - /// E.g. - /// - (char)(short)(int x) -> (char)(int x) - /// - (int)(int x) -> int x - /// - /// \param V -- SymbolVal, which pressumably contains SymbolCast or any symbol - /// that is applicable for cast operation. - /// \param CastTy -- QualType, which `V` shall be cast to. - /// \return SVal with simplified cast expression. - /// \note: Currently only support integral casts. - nonloc::SymbolVal simplifySymbolCast(nonloc::SymbolVal V, QualType CastTy); - public: SValBuilder(llvm::BumpPtrAllocator &alloc, ASTContext &context, ProgramStateManager &stateMgr); diff --git a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SVals.h b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SVals.h index 69f19f7d8565..c9c21fcf230e 100644 --- a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SVals.h +++ b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SVals.h @@ -169,6 +169,11 @@ public: /// should continue to the base regions if the region is not symbolic. SymbolRef getAsSymbol(bool IncludeBaseRegions = false) const; + /// If this SVal is loc::ConcreteInt or nonloc::ConcreteInt, + /// return a pointer to APSInt which is held in it. + /// Otherwise, return nullptr. + const llvm::APSInt *getAsInteger() const; + const MemRegion *getAsRegion() const; /// printJson - Pretty-prints in JSON format. diff --git a/clang/include/clang/Tooling/DependencyScanning/DependencyScanningTool.h b/clang/include/clang/Tooling/DependencyScanning/DependencyScanningTool.h index a85d333ba6b1..209cc81e38dd 100644 --- a/clang/include/clang/Tooling/DependencyScanning/DependencyScanningTool.h +++ b/clang/include/clang/Tooling/DependencyScanning/DependencyScanningTool.h @@ -68,7 +68,9 @@ struct FullDependenciesResult { class DependencyScanningTool { public: /// Construct a dependency scanning tool. - DependencyScanningTool(DependencyScanningService &Service); + DependencyScanningTool(DependencyScanningService &Service, + llvm::IntrusiveRefCntPtr<llvm::vfs::FileSystem> FS = + llvm::vfs::createPhysicalFileSystem()); /// Print out the dependency information into a string using the dependency /// file format that is specified in the options (-MD is the default) and diff --git a/clang/include/clang/Tooling/DependencyScanning/DependencyScanningWorker.h b/clang/include/clang/Tooling/DependencyScanning/DependencyScanningWorker.h index 337bba2e72da..d6c0f2f1c6d6 100644 --- a/clang/include/clang/Tooling/DependencyScanning/DependencyScanningWorker.h +++ b/clang/include/clang/Tooling/DependencyScanning/DependencyScanningWorker.h @@ -52,7 +52,8 @@ public: /// using the regular processing run. class DependencyScanningWorker { public: - DependencyScanningWorker(DependencyScanningService &Service); + DependencyScanningWorker(DependencyScanningService &Service, + llvm::IntrusiveRefCntPtr<llvm::vfs::FileSystem> FS); /// Run the dependency scanning tool for a given clang driver command-line, /// and report the discovered dependencies to the provided consumer. If \p diff --git a/clang/include/clang/Tooling/Inclusions/IncludeStyle.h b/clang/include/clang/Tooling/Inclusions/IncludeStyle.h index d5638642d017..d6b2b0192477 100644 --- a/clang/include/clang/Tooling/Inclusions/IncludeStyle.h +++ b/clang/include/clang/Tooling/Inclusions/IncludeStyle.h @@ -50,7 +50,7 @@ struct IncludeStyle { /// Dependent on the value, multiple ``#include`` blocks can be sorted /// as one and divided based on category. - /// \version 7 + /// \version 6 IncludeBlocksStyle IncludeBlocks; /// See documentation of ``IncludeCategories``. @@ -114,7 +114,7 @@ struct IncludeStyle { /// Priority: 1 /// SortPriority: 0 /// \endcode - /// \version 7 + /// \version 3.8 std::vector<IncludeCategory> IncludeCategories; /// Specify a regular expression of suffixes that are allowed in the @@ -128,7 +128,7 @@ struct IncludeStyle { /// /// For example, if configured to "(_test)?$", then a header a.h would be seen /// as the "main" include in both a.cc and a_test.cc. - /// \version 7 + /// \version 3.9 std::string IncludeIsMainRegex; /// Specify a regular expression for files being formatted @@ -149,7 +149,7 @@ struct IncludeStyle { /// also being respected in later phase). Without this option set, /// ``ClassImpl.hpp`` would not have the main include file put on top /// before any other include. - /// \version 7 + /// \version 10 std::string IncludeIsMainSourceRegex; }; diff --git a/clang/include/clang/Tooling/Syntax/BuildTree.h b/clang/include/clang/Tooling/Syntax/BuildTree.h index d6235797fd7a..273d03ddc233 100644 --- a/clang/include/clang/Tooling/Syntax/BuildTree.h +++ b/clang/include/clang/Tooling/Syntax/BuildTree.h @@ -13,6 +13,7 @@ #include "clang/AST/Decl.h" #include "clang/Basic/TokenKinds.h" #include "clang/Tooling/Syntax/Nodes.h" +#include "clang/Tooling/Syntax/TokenBufferTokenManager.h" #include "clang/Tooling/Syntax/Tree.h" namespace clang { @@ -21,19 +22,21 @@ namespace syntax { /// Build a syntax tree for the main file. /// This usually covers the whole TranslationUnitDecl, but can be restricted by /// the ASTContext's traversal scope. -syntax::TranslationUnit *buildSyntaxTree(Arena &A, ASTContext &Context); +syntax::TranslationUnit * +buildSyntaxTree(Arena &A, TokenBufferTokenManager &TBTM, ASTContext &Context); // Create syntax trees from subtrees not backed by the source code. // Synthesis of Leafs /// Create `Leaf` from token with `Spelling` and assert it has the desired /// `TokenKind`. -syntax::Leaf *createLeaf(syntax::Arena &A, tok::TokenKind K, - StringRef Spelling); +syntax::Leaf *createLeaf(syntax::Arena &A, TokenBufferTokenManager &TBTM, + tok::TokenKind K, StringRef Spelling); /// Infer the token spelling from its `TokenKind`, then create `Leaf` from /// this token -syntax::Leaf *createLeaf(syntax::Arena &A, tok::TokenKind K); +syntax::Leaf *createLeaf(syntax::Arena &A, TokenBufferTokenManager &TBTM, + tok::TokenKind K); // Synthesis of Trees /// Creates the concrete syntax node according to the specified `NodeKind` `K`. @@ -44,7 +47,8 @@ createTree(syntax::Arena &A, syntax::NodeKind K); // Synthesis of Syntax Nodes -syntax::EmptyStatement *createEmptyStatement(syntax::Arena &A); +syntax::EmptyStatement *createEmptyStatement(syntax::Arena &A, + TokenBufferTokenManager &TBTM); /// Creates a completely independent copy of `N` with its macros expanded. /// @@ -52,7 +56,9 @@ syntax::EmptyStatement *createEmptyStatement(syntax::Arena &A); /// * Detached, i.e. `Parent == NextSibling == nullptr` and /// `Role == Detached`. /// * Synthesized, i.e. `Original == false`. -syntax::Node *deepCopyExpandingMacros(syntax::Arena &A, const syntax::Node *N); +syntax::Node *deepCopyExpandingMacros(syntax::Arena &A, + TokenBufferTokenManager &TBTM, + const syntax::Node *N); } // namespace syntax } // namespace clang #endif diff --git a/clang/include/clang/Tooling/Syntax/Mutations.h b/clang/include/clang/Tooling/Syntax/Mutations.h index 8fd58ae34fff..6db9c88ca000 100644 --- a/clang/include/clang/Tooling/Syntax/Mutations.h +++ b/clang/include/clang/Tooling/Syntax/Mutations.h @@ -13,6 +13,7 @@ #include "clang/Tooling/Core/Replacement.h" #include "clang/Tooling/Syntax/Nodes.h" +#include "clang/Tooling/Syntax/TokenBufferTokenManager.h" #include "clang/Tooling/Syntax/Tree.h" namespace clang { @@ -20,7 +21,7 @@ namespace syntax { /// Computes textual replacements required to mimic the tree modifications made /// to the syntax tree. -tooling::Replacements computeReplacements(const Arena &A, +tooling::Replacements computeReplacements(const TokenBufferTokenManager &TBTM, const syntax::TranslationUnit &TU); /// Removes a statement or replaces it with an empty statement where one is @@ -29,7 +30,8 @@ tooling::Replacements computeReplacements(const Arena &A, /// One can remove `foo();` completely and to remove `bar();` we would need to /// replace it with an empty statement. /// EXPECTS: S->canModify() == true -void removeStatement(syntax::Arena &A, syntax::Statement *S); +void removeStatement(syntax::Arena &A, TokenBufferTokenManager &TBTM, + syntax::Statement *S); } // namespace syntax } // namespace clang diff --git a/clang/include/clang/Tooling/Syntax/Nodes.h b/clang/include/clang/Tooling/Syntax/Nodes.h index edb6d4d4381d..c4f31900d0ce 100644 --- a/clang/include/clang/Tooling/Syntax/Nodes.h +++ b/clang/include/clang/Tooling/Syntax/Nodes.h @@ -21,13 +21,8 @@ #ifndef LLVM_CLANG_TOOLING_SYNTAX_NODES_H #define LLVM_CLANG_TOOLING_SYNTAX_NODES_H -#include "clang/Basic/TokenKinds.h" -#include "clang/Lex/Token.h" -#include "clang/Tooling/Syntax/Tokens.h" +#include "clang/Basic/LLVM.h" #include "clang/Tooling/Syntax/Tree.h" -#include "llvm/ADT/ArrayRef.h" -#include "llvm/ADT/StringRef.h" -#include "llvm/Support/raw_ostream.h" namespace clang { namespace syntax { diff --git a/clang/include/clang/Tooling/Syntax/TokenBufferTokenManager.h b/clang/include/clang/Tooling/Syntax/TokenBufferTokenManager.h new file mode 100644 index 000000000000..6522af584e9a --- /dev/null +++ b/clang/include/clang/Tooling/Syntax/TokenBufferTokenManager.h @@ -0,0 +1,70 @@ +//===- TokenBufferTokenManager.h -----------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_TOOLING_SYNTAX_TOKEN_BUFFER_TOKEN_MANAGER_H +#define LLVM_CLANG_TOOLING_SYNTAX_TOKEN_BUFFER_TOKEN_MANAGER_H + +#include "clang/Tooling/Syntax/TokenManager.h" +#include "clang/Tooling/Syntax/Tokens.h" + +namespace clang { +namespace syntax { + +/// A TokenBuffer-powered token manager. +/// It tracks the underlying token buffers, source manager, etc. +class TokenBufferTokenManager : public TokenManager { +public: + TokenBufferTokenManager(const TokenBuffer &Tokens, + const LangOptions &LangOpts, SourceManager &SourceMgr) + : Tokens(Tokens), LangOpts(LangOpts), SM(SourceMgr) {} + + static bool classof(const TokenManager *N) { return N->kind() == Kind; } + llvm::StringLiteral kind() const override { return Kind; } + + llvm::StringRef getText(Key I) const override { + const auto *Token = getToken(I); + assert(Token); + // Handle 'eof' separately, calling text() on it produces an empty string. + // FIXME: this special logic is for syntax::Leaf dump, move it when we + // have a direct way to retrive token kind in the syntax::Leaf. + if (Token->kind() == tok::eof) + return "<eof>"; + return Token->text(SM); + } + + const syntax::Token *getToken(Key I) const { + return reinterpret_cast<const syntax::Token *>(I); + } + SourceManager &sourceManager() { return SM; } + const SourceManager &sourceManager() const { return SM; } + const TokenBuffer &tokenBuffer() const { return Tokens; } + +private: + // This manager is powered by the TokenBuffer. + static constexpr llvm::StringLiteral Kind = "TokenBuffer"; + + /// Add \p Buffer to the underlying source manager, tokenize it and store the + /// resulting tokens. Used exclusively in `FactoryImpl` to materialize tokens + /// that were not written in user code. + std::pair<FileID, ArrayRef<Token>> + lexBuffer(std::unique_ptr<llvm::MemoryBuffer> Buffer); + friend class FactoryImpl; + + const TokenBuffer &Tokens; + const LangOptions &LangOpts; + + /// The underlying source manager for the ExtraTokens. + SourceManager &SM; + /// IDs and storage for additional tokenized files. + llvm::DenseMap<FileID, std::vector<Token>> ExtraTokens; +}; + +} // namespace syntax +} // namespace clang + +#endif // LLVM_CLANG_TOOLING_SYNTAX_TOKEN_BUFFER_TOKEN_MANAGER_H diff --git a/clang/include/clang/Tooling/Syntax/TokenManager.h b/clang/include/clang/Tooling/Syntax/TokenManager.h new file mode 100644 index 000000000000..6f0d11ce0d6b --- /dev/null +++ b/clang/include/clang/Tooling/Syntax/TokenManager.h @@ -0,0 +1,47 @@ +//===- TokenManager.h - Manage Tokens for syntax-tree ------------*- C++-*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Defines Token interfaces for the clang syntax-tree. This is the level of +// abstraction that the syntax-tree uses to operate on Token. +// +// TokenManager decouples the syntax-tree from a particular token +// implementation. For example, a TokenBuffer captured from a clang parser may +// track macro expansions and associate tokens with clang's SourceManager, while +// a clang pseudoparser would use a flat array of raw-lexed tokens in memory. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_TOOLING_SYNTAX_TOKEN_MANAGER_H +#define LLVM_CLANG_TOOLING_SYNTAX_TOKEN_MANAGER_H + +#include "llvm/ADT/StringRef.h" +#include <cstdint> + +namespace clang { +namespace syntax { + +/// Defines interfaces for operating "Token" in the clang syntax-tree. +class TokenManager { +public: + virtual ~TokenManager() = default; + + /// Describes what the exact class kind of the TokenManager is. + virtual llvm::StringLiteral kind() const = 0; + + /// A key to identify a specific token. The token concept depends on the + /// underlying implementation -- it can be a spelled token from the original + /// source file or an expanded token. + /// The syntax-tree Leaf node holds a Key. + using Key = uintptr_t; + virtual llvm::StringRef getText(Key K) const = 0; +}; + +} // namespace syntax +} // namespace clang + +#endif // LLVM_CLANG_TOOLING_SYNTAX_TOKEN_MANAGER_H diff --git a/clang/include/clang/Tooling/Syntax/Tokens.h b/clang/include/clang/Tooling/Syntax/Tokens.h index e4bc1553c2d6..9adb2b12f1ad 100644 --- a/clang/include/clang/Tooling/Syntax/Tokens.h +++ b/clang/include/clang/Tooling/Syntax/Tokens.h @@ -27,7 +27,6 @@ #ifndef LLVM_CLANG_TOOLING_SYNTAX_TOKENS_H #define LLVM_CLANG_TOOLING_SYNTAX_TOKENS_H -#include "clang/Basic/FileManager.h" #include "clang/Basic/LangOptions.h" #include "clang/Basic/SourceLocation.h" #include "clang/Basic/SourceManager.h" diff --git a/clang/include/clang/Tooling/Syntax/Tree.h b/clang/include/clang/Tooling/Syntax/Tree.h index 2063c6b7d82a..c9c957af0a28 100644 --- a/clang/include/clang/Tooling/Syntax/Tree.h +++ b/clang/include/clang/Tooling/Syntax/Tree.h @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // Defines the basic structure of the syntax tree. There are two kinds of nodes: -// - leaf nodes correspond to a token in the expanded token stream, +// - leaf nodes correspond to tokens, // - tree nodes correspond to language grammar constructs. // // The tree is initially built from an AST. Each node of a newly built tree @@ -21,48 +21,22 @@ #ifndef LLVM_CLANG_TOOLING_SYNTAX_TREE_H #define LLVM_CLANG_TOOLING_SYNTAX_TREE_H -#include "clang/Basic/LangOptions.h" -#include "clang/Basic/SourceLocation.h" -#include "clang/Basic/SourceManager.h" #include "clang/Basic/TokenKinds.h" -#include "clang/Tooling/Syntax/Tokens.h" -#include "llvm/ADT/ArrayRef.h" -#include "llvm/ADT/DenseMap.h" +#include "clang/Tooling/Syntax/TokenManager.h" #include "llvm/ADT/iterator.h" #include "llvm/Support/Allocator.h" #include <cstdint> -#include <iterator> +#include <vector> namespace clang { namespace syntax { -/// A memory arena for syntax trees. Also tracks the underlying token buffers, -/// source manager, etc. +/// A memory arena for syntax trees. +// FIXME: use BumpPtrAllocator directly. class Arena { public: - Arena(SourceManager &SourceMgr, const LangOptions &LangOpts, - const TokenBuffer &Tokens); - - const SourceManager &getSourceManager() const { return SourceMgr; } - const LangOptions &getLangOptions() const { return LangOpts; } - - const TokenBuffer &getTokenBuffer() const; llvm::BumpPtrAllocator &getAllocator() { return Allocator; } - -private: - /// Add \p Buffer to the underlying source manager, tokenize it and store the - /// resulting tokens. Used exclusively in `FactoryImpl` to materialize tokens - /// that were not written in user code. - std::pair<FileID, ArrayRef<Token>> - lexBuffer(std::unique_ptr<llvm::MemoryBuffer> Buffer); - friend class FactoryImpl; - private: - SourceManager &SourceMgr; - const LangOptions &LangOpts; - const TokenBuffer &Tokens; - /// IDs and storage for additional tokenized files. - llvm::DenseMap<FileID, std::vector<Token>> ExtraTokens; /// Keeps all the allocated nodes and their intermediate data structures. llvm::BumpPtrAllocator Allocator; }; @@ -122,9 +96,9 @@ public: Node *getPreviousSibling() { return PreviousSibling; } /// Dumps the structure of a subtree. For debugging and testing purposes. - std::string dump(const SourceManager &SM) const; + std::string dump(const TokenManager &SM) const; /// Dumps the tokens forming this subtree. - std::string dumpTokens(const SourceManager &SM) const; + std::string dumpTokens(const TokenManager &SM) const; /// Asserts invariants on this node of the tree and its immediate children. /// Will not recurse into the subtree. No-op if NDEBUG is set. @@ -153,16 +127,17 @@ private: unsigned CanModify : 1; }; -/// A leaf node points to a single token inside the expanded token stream. +/// A leaf node points to a single token. +// FIXME: add TokenKind field (borrow some bits from the Node::kind). class Leaf final : public Node { public: - Leaf(const Token *T); + Leaf(TokenManager::Key K); static bool classof(const Node *N); - const Token *getToken() const { return Tok; } + TokenManager::Key getTokenKey() const { return K; } private: - const Token *Tok; + TokenManager::Key K; }; /// A node that has children and represents a syntactic language construct. diff --git a/clang/include/clang/module.modulemap b/clang/include/clang/module.modulemap index aca4d5ab919a..01bce77718b3 100644 --- a/clang/include/clang/module.modulemap +++ b/clang/include/clang/module.modulemap @@ -71,10 +71,17 @@ module Clang_Basic { textual header "Basic/RISCVVTypes.def" textual header "Basic/Sanitizers.def" textual header "Basic/TargetCXXABI.def" - textual header "Basic/TokenKinds.def" module * { export * } } +module Clang_Basic_TokenKinds { + requires cplusplus + + header "Basic/TokenKinds.h" + textual header "Basic/TokenKinds.def" + + export * +} module Clang_CodeGen { requires cplusplus umbrella "CodeGen" module * { export * } } module Clang_Config { requires cplusplus umbrella "Config" module * { export * } } @@ -182,5 +189,8 @@ module Clang_ToolingCore { module Clang_ToolingInclusions { requires cplusplus - umbrella "Tooling/Inclusions" module * { export * } + umbrella "Tooling/Inclusions" + textual header "Tooling/Inclusions/CSymbolMap.inc" + textual header "Tooling/Inclusions/StdSymbolMap.inc" + module * { export * } } diff --git a/clang/lib/AST/ASTImporter.cpp b/clang/lib/AST/ASTImporter.cpp index 73c3f02e67a8..f7e7b73d1218 100644 --- a/clang/lib/AST/ASTImporter.cpp +++ b/clang/lib/AST/ASTImporter.cpp @@ -3109,6 +3109,11 @@ Error ASTNodeImporter::ImportTemplateInformation( case FunctionDecl::TK_FunctionTemplate: return Error::success(); + case FunctionDecl::TK_DependentNonTemplate: + if (Expected<FunctionDecl *> InstFDOrErr = + import(FromFD->getInstantiatedFromDecl())) + ToFD->setInstantiatedFromDecl(*InstFDOrErr); + return Error::success(); case FunctionDecl::TK_MemberSpecialization: { TemplateSpecializationKind TSK = FromFD->getTemplateSpecializationKind(); diff --git a/clang/lib/AST/Decl.cpp b/clang/lib/AST/Decl.cpp index 5e5101203e6c..f88a2e3fa268 100644 --- a/clang/lib/AST/Decl.cpp +++ b/clang/lib/AST/Decl.cpp @@ -3732,8 +3732,13 @@ const IdentifierInfo *FunctionDecl::getLiteralIdentifier() const { FunctionDecl::TemplatedKind FunctionDecl::getTemplatedKind() const { if (TemplateOrSpecialization.isNull()) return TK_NonTemplate; - if (TemplateOrSpecialization.is<FunctionTemplateDecl *>()) + if (const auto *ND = TemplateOrSpecialization.dyn_cast<NamedDecl *>()) { + if (isa<FunctionDecl>(ND)) + return TK_DependentNonTemplate; + assert(isa<FunctionTemplateDecl>(ND) && + "No other valid types in NamedDecl"); return TK_FunctionTemplate; + } if (TemplateOrSpecialization.is<MemberSpecializationInfo *>()) return TK_MemberSpecialization; if (TemplateOrSpecialization.is<FunctionTemplateSpecializationInfo *>()) @@ -3774,15 +3779,28 @@ FunctionDecl::setInstantiationOfMemberFunction(ASTContext &C, } FunctionTemplateDecl *FunctionDecl::getDescribedFunctionTemplate() const { - return TemplateOrSpecialization.dyn_cast<FunctionTemplateDecl *>(); + return dyn_cast_or_null<FunctionTemplateDecl>( + TemplateOrSpecialization.dyn_cast<NamedDecl *>()); } -void FunctionDecl::setDescribedFunctionTemplate(FunctionTemplateDecl *Template) { +void FunctionDecl::setDescribedFunctionTemplate( + FunctionTemplateDecl *Template) { assert(TemplateOrSpecialization.isNull() && "Member function is already a specialization"); TemplateOrSpecialization = Template; } +void FunctionDecl::setInstantiatedFromDecl(FunctionDecl *FD) { + assert(TemplateOrSpecialization.isNull() && + "Function is already a specialization"); + TemplateOrSpecialization = FD; +} + +FunctionDecl *FunctionDecl::getInstantiatedFromDecl() const { + return dyn_cast_or_null<FunctionDecl>( + TemplateOrSpecialization.dyn_cast<NamedDecl *>()); +} + bool FunctionDecl::isImplicitlyInstantiable() const { // If the function is invalid, it can't be implicitly instantiated. if (isInvalidDecl()) diff --git a/clang/lib/AST/DeclBase.cpp b/clang/lib/AST/DeclBase.cpp index 13dd6da3f24f..d12330de1500 100644 --- a/clang/lib/AST/DeclBase.cpp +++ b/clang/lib/AST/DeclBase.cpp @@ -283,8 +283,9 @@ unsigned Decl::getTemplateDepth() const { return cast<Decl>(DC)->getTemplateDepth(); } -const DeclContext *Decl::getParentFunctionOrMethod() const { - for (const DeclContext *DC = getDeclContext(); +const DeclContext *Decl::getParentFunctionOrMethod(bool LexicalParent) const { + for (const DeclContext *DC = LexicalParent ? getLexicalDeclContext() + : getDeclContext(); DC && !DC->isTranslationUnit() && !DC->isNamespace(); DC = DC->getParent()) if (DC->isFunctionOrMethod()) diff --git a/clang/lib/AST/DeclCXX.cpp b/clang/lib/AST/DeclCXX.cpp index 6fc9a86bc3cf..c307cbe02ecf 100644 --- a/clang/lib/AST/DeclCXX.cpp +++ b/clang/lib/AST/DeclCXX.cpp @@ -2410,7 +2410,7 @@ bool CXXMethodDecl::isMoveAssignmentOperator() const { return false; QualType ParamType = getParamDecl(0)->getType(); - if (!isa<RValueReferenceType>(ParamType)) + if (!ParamType->isRValueReferenceType()) return false; ParamType = ParamType->getPointeeType(); diff --git a/clang/lib/AST/DeclPrinter.cpp b/clang/lib/AST/DeclPrinter.cpp index 3f04d9b4073e..b041e2a67e95 100644 --- a/clang/lib/AST/DeclPrinter.cpp +++ b/clang/lib/AST/DeclPrinter.cpp @@ -1007,10 +1007,10 @@ void DeclPrinter::VisitCXXRecordDecl(CXXRecordDecl *D) { } } - if (auto *Def = D->getDefinition()) { - if (D->hasAttr<FinalAttr>()) { - Out << " final"; - } + if (D->hasDefinition()) { + if (D->hasAttr<FinalAttr>()) { + Out << " final"; + } } if (D->isCompleteDefinition()) { diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index beeb775371c3..6ffb65d8e71d 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -11592,9 +11592,15 @@ static bool isUserWritingOffTheEnd(const ASTContext &Ctx, const LValue &LVal) { // conservative with the last element in structs (if it's an array), so our // current behavior is more compatible than an explicit list approach would // be. + int StrictFlexArraysLevel = Ctx.getLangOpts().StrictFlexArrays; return LVal.InvalidBase && Designator.Entries.size() == Designator.MostDerivedPathLength && Designator.MostDerivedIsArrayElement && + (Designator.isMostDerivedAnUnsizedArray() || + Designator.getMostDerivedArraySize() == 0 || + (Designator.getMostDerivedArraySize() == 1 && + StrictFlexArraysLevel < 2) || + StrictFlexArraysLevel == 0) && isDesignatorAtObjectEnd(Ctx, LVal); } diff --git a/clang/lib/AST/TextNodeDumper.cpp b/clang/lib/AST/TextNodeDumper.cpp index 79e9fa6ab86f..22643d4edbec 100644 --- a/clang/lib/AST/TextNodeDumper.cpp +++ b/clang/lib/AST/TextNodeDumper.cpp @@ -1720,6 +1720,9 @@ void TextNodeDumper::VisitFunctionDecl(const FunctionDecl *D) { } } + if (!D->isInlineSpecified() && D->isInlined()) { + OS << " implicit-inline"; + } // Since NumParams comes from the FunctionProtoType of the FunctionDecl and // the Params are set later, it is possible for a dump during debugging to // encounter a FunctionDecl that has been created but hasn't been assigned diff --git a/clang/lib/Analysis/FlowSensitive/DataflowAnalysisContext.cpp b/clang/lib/Analysis/FlowSensitive/DataflowAnalysisContext.cpp index cd87e87a6aca..5105999741e6 100644 --- a/clang/lib/Analysis/FlowSensitive/DataflowAnalysisContext.cpp +++ b/clang/lib/Analysis/FlowSensitive/DataflowAnalysisContext.cpp @@ -14,7 +14,9 @@ #include "clang/Analysis/FlowSensitive/DataflowAnalysisContext.h" #include "clang/AST/ExprCXX.h" +#include "clang/Analysis/FlowSensitive/DebugSupport.h" #include "clang/Analysis/FlowSensitive/Value.h" +#include "llvm/Support/Debug.h" #include <cassert> #include <memory> #include <utility> @@ -293,6 +295,17 @@ BoolValue &DataflowAnalysisContext::buildAndSubstituteFlowConditionWithCache( return substituteBoolValue(*ConstraintsIT->second, SubstitutionsCache); } +void DataflowAnalysisContext::dumpFlowCondition(AtomicBoolValue &Token) { + llvm::DenseSet<BoolValue *> Constraints = {&Token}; + llvm::DenseSet<AtomicBoolValue *> VisitedTokens; + addTransitiveFlowConditionConstraints(Token, Constraints, VisitedTokens); + + llvm::DenseMap<const AtomicBoolValue *, std::string> AtomNames = { + {&getBoolLiteralValue(false), "False"}, + {&getBoolLiteralValue(true), "True"}}; + llvm::dbgs() << debugString(Constraints, AtomNames); +} + } // namespace dataflow } // namespace clang diff --git a/clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp b/clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp index 3aea670f20aa..2b6cd0c4f857 100644 --- a/clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp +++ b/clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp @@ -15,10 +15,8 @@ #include "clang/Analysis/FlowSensitive/DataflowEnvironment.h" #include "clang/AST/Decl.h" #include "clang/AST/DeclCXX.h" -#include "clang/AST/ExprCXX.h" #include "clang/AST/Type.h" #include "clang/Analysis/FlowSensitive/DataflowLattice.h" -#include "clang/Analysis/FlowSensitive/StorageLocation.h" #include "clang/Analysis/FlowSensitive/Value.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseSet.h" @@ -512,5 +510,9 @@ bool Environment::flowConditionImplies(BoolValue &Val) const { return DACtx->flowConditionImplies(*FlowConditionToken, Val); } +void Environment::dump() const { + DACtx->dumpFlowCondition(*FlowConditionToken); +} + } // namespace dataflow } // namespace clang diff --git a/clang/lib/Analysis/FlowSensitive/DebugSupport.cpp b/clang/lib/Analysis/FlowSensitive/DebugSupport.cpp index 305d9d346089..309ff0682f50 100644 --- a/clang/lib/Analysis/FlowSensitive/DebugSupport.cpp +++ b/clang/lib/Analysis/FlowSensitive/DebugSupport.cpp @@ -17,6 +17,7 @@ #include "clang/Analysis/FlowSensitive/Solver.h" #include "clang/Analysis/FlowSensitive/Value.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringSet.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/FormatAdapters.h" @@ -30,6 +31,28 @@ using llvm::AlignStyle; using llvm::fmt_pad; using llvm::formatv; +std::string debugString(Solver::Result::Assignment Assignment) { + switch (Assignment) { + case Solver::Result::Assignment::AssignedFalse: + return "False"; + case Solver::Result::Assignment::AssignedTrue: + return "True"; + } + llvm_unreachable("Booleans can only be assigned true/false"); +} + +std::string debugString(Solver::Result::Status Status) { + switch (Status) { + case Solver::Result::Status::Satisfiable: + return "Satisfiable"; + case Solver::Result::Status::Unsatisfiable: + return "Unsatisfiable"; + case Solver::Result::Status::TimedOut: + return "TimedOut"; + } + llvm_unreachable("Unhandled SAT check result status"); +} + namespace { class DebugStringGenerator { @@ -80,9 +103,25 @@ public: return formatv("{0}", fmt_pad(S, Indent, 0)); } + std::string debugString(const llvm::DenseSet<BoolValue *> &Constraints) { + std::vector<std::string> ConstraintsStrings; + ConstraintsStrings.reserve(Constraints.size()); + for (BoolValue *Constraint : Constraints) { + ConstraintsStrings.push_back(debugString(*Constraint)); + } + llvm::sort(ConstraintsStrings); + + std::string Result; + for (const std::string &S : ConstraintsStrings) { + Result += S; + Result += '\n'; + } + return Result; + } + /// Returns a string representation of a set of boolean `Constraints` and the /// `Result` of satisfiability checking on the `Constraints`. - std::string debugString(const std::vector<BoolValue *> &Constraints, + std::string debugString(ArrayRef<BoolValue *> &Constraints, const Solver::Result &Result) { auto Template = R"( Constraints @@ -101,10 +140,9 @@ Constraints ConstraintsStrings.push_back(debugString(*Constraint)); } - auto StatusString = debugString(Result.getStatus()); + auto StatusString = clang::dataflow::debugString(Result.getStatus()); auto Solution = Result.getSolution(); - auto SolutionString = - Solution.hasValue() ? "\n" + debugString(Solution.value()) : ""; + auto SolutionString = Solution ? "\n" + debugString(Solution.value()) : ""; return formatv( Template, @@ -127,38 +165,14 @@ private: auto Line = formatv("{0} = {1}", fmt_align(getAtomName(AtomAssignment.first), AlignStyle::Left, MaxNameLength), - debugString(AtomAssignment.second)); + clang::dataflow::debugString(AtomAssignment.second)); Lines.push_back(Line); } - llvm::sort(Lines.begin(), Lines.end()); + llvm::sort(Lines); return formatv("{0:$[\n]}", llvm::make_range(Lines.begin(), Lines.end())); } - /// Returns a string representation of a boolean assignment to true or false. - std::string debugString(Solver::Result::Assignment Assignment) { - switch (Assignment) { - case Solver::Result::Assignment::AssignedFalse: - return "False"; - case Solver::Result::Assignment::AssignedTrue: - return "True"; - } - llvm_unreachable("Booleans can only be assigned true/false"); - } - - /// Returns a string representation of the result status of a SAT check. - std::string debugString(Solver::Result::Status Status) { - switch (Status) { - case Solver::Result::Status::Satisfiable: - return "Satisfiable"; - case Solver::Result::Status::Unsatisfiable: - return "Unsatisfiable"; - case Solver::Result::Status::TimedOut: - return "TimedOut"; - } - llvm_unreachable("Unhandled SAT check result status"); - } - /// Returns the name assigned to `Atom`, either user-specified or created by /// default rules (B0, B1, ...). std::string getAtomName(const AtomicBoolValue *Atom) { @@ -186,8 +200,13 @@ debugString(const BoolValue &B, } std::string -debugString(const std::vector<BoolValue *> &Constraints, - const Solver::Result &Result, +debugString(const llvm::DenseSet<BoolValue *> &Constraints, + llvm::DenseMap<const AtomicBoolValue *, std::string> AtomNames) { + return DebugStringGenerator(std::move(AtomNames)).debugString(Constraints); +} + +std::string +debugString(ArrayRef<BoolValue *> Constraints, const Solver::Result &Result, llvm::DenseMap<const AtomicBoolValue *, std::string> AtomNames) { return DebugStringGenerator(std::move(AtomNames)) .debugString(Constraints, Result); diff --git a/clang/lib/Analysis/ThreadSafety.cpp b/clang/lib/Analysis/ThreadSafety.cpp index 03bbf078d7e8..32d950864ce7 100644 --- a/clang/lib/Analysis/ThreadSafety.cpp +++ b/clang/lib/Analysis/ThreadSafety.cpp @@ -1679,6 +1679,17 @@ void BuildLockset::checkAccess(const Expr *Exp, AccessKind AK, return; } + if (const auto *BO = dyn_cast<BinaryOperator>(Exp)) { + switch (BO->getOpcode()) { + case BO_PtrMemD: // .* + return checkAccess(BO->getLHS(), AK, POK); + case BO_PtrMemI: // ->* + return checkPtAccess(BO->getLHS(), AK, POK); + default: + return; + } + } + if (const auto *AE = dyn_cast<ArraySubscriptExpr>(Exp)) { checkPtAccess(AE->getLHS(), AK, POK); return; diff --git a/clang/lib/Basic/MakeSupport.cpp b/clang/lib/Basic/MakeSupport.cpp index 37838f7bbc7b..4ddfcc350410 100644 --- a/clang/lib/Basic/MakeSupport.cpp +++ b/clang/lib/Basic/MakeSupport.cpp @@ -32,4 +32,4 @@ void clang::quoteMakeTarget(StringRef Target, SmallVectorImpl<char> &Res) { Res.push_back(Target[i]); } -}
\ No newline at end of file +} diff --git a/clang/lib/Basic/NoSanitizeList.cpp b/clang/lib/Basic/NoSanitizeList.cpp index 3efd613b0d33..e7e63c1f419e 100644 --- a/clang/lib/Basic/NoSanitizeList.cpp +++ b/clang/lib/Basic/NoSanitizeList.cpp @@ -47,6 +47,11 @@ bool NoSanitizeList::containsFile(SanitizerMask Mask, StringRef FileName, return SSCL->inSection(Mask, "src", FileName, Category); } +bool NoSanitizeList::containsMainFile(SanitizerMask Mask, StringRef FileName, + StringRef Category) const { + return SSCL->inSection(Mask, "mainfile", FileName, Category); +} + bool NoSanitizeList::containsLocation(SanitizerMask Mask, SourceLocation Loc, StringRef Category) const { return Loc.isValid() && diff --git a/clang/lib/Basic/Sarif.cpp b/clang/lib/Basic/Sarif.cpp new file mode 100644 index 000000000000..faca9c508c08 --- /dev/null +++ b/clang/lib/Basic/Sarif.cpp @@ -0,0 +1,389 @@ +//===-- clang/Basic/Sarif.cpp - SarifDocumentWriter class definition ------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file contains the declaration of the SARIFDocumentWriter class, and +/// associated builders such as: +/// - \ref SarifArtifact +/// - \ref SarifArtifactLocation +/// - \ref SarifRule +/// - \ref SarifResult +//===----------------------------------------------------------------------===// +#include "clang/Basic/Sarif.h" +#include "clang/Basic/SourceLocation.h" +#include "clang/Basic/SourceManager.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/StringMap.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/ConvertUTF.h" +#include "llvm/Support/JSON.h" +#include "llvm/Support/Path.h" + +#include <string> +#include <utility> + +using namespace clang; +using namespace llvm; + +using clang::detail::SarifArtifact; +using clang::detail::SarifArtifactLocation; + +static StringRef getFileName(const FileEntry &FE) { + StringRef Filename = FE.tryGetRealPathName(); + if (Filename.empty()) + Filename = FE.getName(); + return Filename; +} +/// \name URI +/// @{ + +/// \internal +/// \brief +/// Return the RFC3986 encoding of the input character. +/// +/// \param C Character to encode to RFC3986. +/// +/// \return The RFC3986 representation of \c C. +static std::string percentEncodeURICharacter(char C) { + // RFC 3986 claims alpha, numeric, and this handful of + // characters are not reserved for the path component and + // should be written out directly. Otherwise, percent + // encode the character and write that out instead of the + // reserved character. + if (llvm::isAlnum(C) || + StringRef::npos != StringRef("-._~:@!$&'()*+,;=").find(C)) + return std::string(&C, 1); + return "%" + llvm::toHex(StringRef(&C, 1)); +} + +/// \internal +/// \brief Return a URI representing the given file name. +/// +/// \param Filename The filename to be represented as URI. +/// +/// \return RFC3986 URI representing the input file name. +static std::string fileNameToURI(StringRef Filename) { + SmallString<32> Ret = StringRef("file://"); + + // Get the root name to see if it has a URI authority. + StringRef Root = sys::path::root_name(Filename); + if (Root.startswith("//")) { + // There is an authority, so add it to the URI. + Ret += Root.drop_front(2).str(); + } else if (!Root.empty()) { + // There is no authority, so end the component and add the root to the URI. + Ret += Twine("/" + Root).str(); + } + + auto Iter = sys::path::begin(Filename), End = sys::path::end(Filename); + assert(Iter != End && "Expected there to be a non-root path component."); + // Add the rest of the path components, encoding any reserved characters; + // we skip past the first path component, as it was handled it above. + std::for_each(++Iter, End, [&Ret](StringRef Component) { + // For reasons unknown to me, we may get a backslash with Windows native + // paths for the initial backslash following the drive component, which + // we need to ignore as a URI path part. + if (Component == "\\") + return; + + // Add the separator between the previous path part and the one being + // currently processed. + Ret += "/"; + + // URI encode the part. + for (char C : Component) { + Ret += percentEncodeURICharacter(C); + } + }); + + return std::string(Ret); +} +/// @} + +/// \brief Calculate the column position expressed in the number of UTF-8 code +/// points from column start to the source location +/// +/// \param Loc The source location whose column needs to be calculated. +/// \param TokenLen Optional hint for when the token is multiple bytes long. +/// +/// \return The column number as a UTF-8 aware byte offset from column start to +/// the effective source location. +static unsigned int adjustColumnPos(FullSourceLoc Loc, + unsigned int TokenLen = 0) { + assert(!Loc.isInvalid() && "invalid Loc when adjusting column position"); + + std::pair<FileID, unsigned> LocInfo = Loc.getDecomposedLoc(); + Optional<MemoryBufferRef> Buf = + Loc.getManager().getBufferOrNone(LocInfo.first); + assert(Buf && "got an invalid buffer for the location's file"); + assert(Buf->getBufferSize() >= (LocInfo.second + TokenLen) && + "token extends past end of buffer?"); + + // Adjust the offset to be the start of the line, since we'll be counting + // Unicode characters from there until our column offset. + unsigned int Off = LocInfo.second - (Loc.getExpansionColumnNumber() - 1); + unsigned int Ret = 1; + while (Off < (LocInfo.second + TokenLen)) { + Off += getNumBytesForUTF8(Buf->getBuffer()[Off]); + Ret++; + } + + return Ret; +} + +/// \name SARIF Utilities +/// @{ + +/// \internal +json::Object createMessage(StringRef Text) { + return json::Object{{"text", Text.str()}}; +} + +/// \internal +/// \pre CharSourceRange must be a token range +static json::Object createTextRegion(const SourceManager &SM, + const CharSourceRange &R) { + FullSourceLoc FirstTokenLoc{R.getBegin(), SM}; + FullSourceLoc LastTokenLoc{R.getEnd(), SM}; + json::Object Region{{"startLine", FirstTokenLoc.getExpansionLineNumber()}, + {"startColumn", adjustColumnPos(FirstTokenLoc)}, + {"endColumn", adjustColumnPos(LastTokenLoc)}}; + if (FirstTokenLoc != LastTokenLoc) { + Region["endLine"] = LastTokenLoc.getExpansionLineNumber(); + } + return Region; +} + +static json::Object createLocation(json::Object &&PhysicalLocation, + StringRef Message = "") { + json::Object Ret{{"physicalLocation", std::move(PhysicalLocation)}}; + if (!Message.empty()) + Ret.insert({"message", createMessage(Message)}); + return Ret; +} + +static StringRef importanceToStr(ThreadFlowImportance I) { + switch (I) { + case ThreadFlowImportance::Important: + return "important"; + case ThreadFlowImportance::Essential: + return "essential"; + case ThreadFlowImportance::Unimportant: + return "unimportant"; + } + llvm_unreachable("Fully covered switch is not so fully covered"); +} + +static json::Object +createThreadFlowLocation(json::Object &&Location, + const ThreadFlowImportance &Importance) { + return json::Object{{"location", std::move(Location)}, + {"importance", importanceToStr(Importance)}}; +} +/// @} + +json::Object +SarifDocumentWriter::createPhysicalLocation(const CharSourceRange &R) { + assert(R.isValid() && + "Cannot create a physicalLocation from invalid SourceRange!"); + assert(R.isCharRange() && + "Cannot create a physicalLocation from a token range!"); + FullSourceLoc Start{R.getBegin(), SourceMgr}; + const FileEntry *FE = Start.getExpansionLoc().getFileEntry(); + assert(FE != nullptr && "Diagnostic does not exist within a valid file!"); + + const std::string &FileURI = fileNameToURI(getFileName(*FE)); + auto I = CurrentArtifacts.find(FileURI); + + if (I == CurrentArtifacts.end()) { + uint32_t Idx = static_cast<uint32_t>(CurrentArtifacts.size()); + const SarifArtifactLocation &Location = + SarifArtifactLocation::create(FileURI).setIndex(Idx); + const SarifArtifact &Artifact = SarifArtifact::create(Location) + .setRoles({"resultFile"}) + .setLength(FE->getSize()) + .setMimeType("text/plain"); + auto StatusIter = CurrentArtifacts.insert({FileURI, Artifact}); + // If inserted, ensure the original iterator points to the newly inserted + // element, so it can be used downstream. + if (StatusIter.second) + I = StatusIter.first; + } + assert(I != CurrentArtifacts.end() && "Failed to insert new artifact"); + const SarifArtifactLocation &Location = I->second.Location; + uint32_t Idx = Location.Index.value(); + return json::Object{{{"artifactLocation", json::Object{{{"index", Idx}}}}, + {"region", createTextRegion(SourceMgr, R)}}}; +} + +json::Object &SarifDocumentWriter::getCurrentTool() { + assert(!Closed && "SARIF Document is closed. " + "Need to call createRun() before using getcurrentTool!"); + + // Since Closed = false here, expect there to be at least 1 Run, anything + // else is an invalid state. + assert(!Runs.empty() && "There are no runs associated with the document!"); + + return *Runs.back().getAsObject()->get("tool")->getAsObject(); +} + +void SarifDocumentWriter::reset() { + CurrentRules.clear(); + CurrentArtifacts.clear(); +} + +void SarifDocumentWriter::endRun() { + // Exit early if trying to close a closed Document. + if (Closed) { + reset(); + return; + } + + // Since Closed = false here, expect there to be at least 1 Run, anything + // else is an invalid state. + assert(!Runs.empty() && "There are no runs associated with the document!"); + + // Flush all the rules. + json::Object &Tool = getCurrentTool(); + json::Array Rules; + for (const SarifRule &R : CurrentRules) { + json::Object Rule{ + {"name", R.Name}, + {"id", R.Id}, + {"fullDescription", json::Object{{"text", R.Description}}}}; + if (!R.HelpURI.empty()) + Rule["helpUri"] = R.HelpURI; + Rules.emplace_back(std::move(Rule)); + } + json::Object &Driver = *Tool.getObject("driver"); + Driver["rules"] = std::move(Rules); + + // Flush all the artifacts. + json::Object &Run = getCurrentRun(); + json::Array *Artifacts = Run.getArray("artifacts"); + for (const auto &Pair : CurrentArtifacts) { + const SarifArtifact &A = Pair.getValue(); + json::Object Loc{{"uri", A.Location.URI}}; + if (A.Location.Index.has_value()) { + Loc["index"] = static_cast<int64_t>(A.Location.Index.value()); + } + json::Object Artifact; + Artifact["location"] = std::move(Loc); + if (A.Length.has_value()) + Artifact["length"] = static_cast<int64_t>(A.Length.value()); + if (!A.Roles.empty()) + Artifact["roles"] = json::Array(A.Roles); + if (!A.MimeType.empty()) + Artifact["mimeType"] = A.MimeType; + if (A.Offset.has_value()) + Artifact["offset"] = A.Offset; + Artifacts->push_back(json::Value(std::move(Artifact))); + } + + // Clear, reset temporaries before next run. + reset(); + + // Mark the document as closed. + Closed = true; +} + +json::Array +SarifDocumentWriter::createThreadFlows(ArrayRef<ThreadFlow> ThreadFlows) { + json::Object Ret{{"locations", json::Array{}}}; + json::Array Locs; + for (const auto &ThreadFlow : ThreadFlows) { + json::Object PLoc = createPhysicalLocation(ThreadFlow.Range); + json::Object Loc = createLocation(std::move(PLoc), ThreadFlow.Message); + Locs.emplace_back( + createThreadFlowLocation(std::move(Loc), ThreadFlow.Importance)); + } + Ret["locations"] = std::move(Locs); + return json::Array{std::move(Ret)}; +} + +json::Object +SarifDocumentWriter::createCodeFlow(ArrayRef<ThreadFlow> ThreadFlows) { + return json::Object{{"threadFlows", createThreadFlows(ThreadFlows)}}; +} + +void SarifDocumentWriter::createRun(StringRef ShortToolName, + StringRef LongToolName, + StringRef ToolVersion) { + // Clear resources associated with a previous run. + endRun(); + + // Signify a new run has begun. + Closed = false; + + json::Object Tool{ + {"driver", + json::Object{{"name", ShortToolName}, + {"fullName", LongToolName}, + {"language", "en-US"}, + {"version", ToolVersion}, + {"informationUri", + "https://clang.llvm.org/docs/UsersManual.html"}}}}; + json::Object TheRun{{"tool", std::move(Tool)}, + {"results", {}}, + {"artifacts", {}}, + {"columnKind", "unicodeCodePoints"}}; + Runs.emplace_back(std::move(TheRun)); +} + +json::Object &SarifDocumentWriter::getCurrentRun() { + assert(!Closed && + "SARIF Document is closed. " + "Can only getCurrentRun() if document is opened via createRun(), " + "create a run first"); + + // Since Closed = false here, expect there to be at least 1 Run, anything + // else is an invalid state. + assert(!Runs.empty() && "There are no runs associated with the document!"); + return *Runs.back().getAsObject(); +} + +size_t SarifDocumentWriter::createRule(const SarifRule &Rule) { + size_t Ret = CurrentRules.size(); + CurrentRules.emplace_back(Rule); + return Ret; +} + +void SarifDocumentWriter::appendResult(const SarifResult &Result) { + size_t RuleIdx = Result.RuleIdx; + assert(RuleIdx < CurrentRules.size() && + "Trying to reference a rule that doesn't exist"); + json::Object Ret{{"message", createMessage(Result.DiagnosticMessage)}, + {"ruleIndex", static_cast<int64_t>(RuleIdx)}, + {"ruleId", CurrentRules[RuleIdx].Id}}; + if (!Result.Locations.empty()) { + json::Array Locs; + for (auto &Range : Result.Locations) { + Locs.emplace_back(createLocation(createPhysicalLocation(Range))); + } + Ret["locations"] = std::move(Locs); + } + if (!Result.ThreadFlows.empty()) + Ret["codeFlows"] = json::Array{createCodeFlow(Result.ThreadFlows)}; + json::Object &Run = getCurrentRun(); + json::Array *Results = Run.getArray("results"); + Results->emplace_back(std::move(Ret)); +} + +json::Object SarifDocumentWriter::createDocument() { + // Flush all temporaries to their destinations if needed. + endRun(); + + json::Object Doc{ + {"$schema", SchemaURI}, + {"version", SchemaVersion}, + }; + if (!Runs.empty()) + Doc["runs"] = json::Array(Runs); + return Doc; +} diff --git a/clang/lib/Basic/Targets/AMDGPU.cpp b/clang/lib/Basic/Targets/AMDGPU.cpp index 50256d8e210c..80f2601b0a24 100644 --- a/clang/lib/Basic/Targets/AMDGPU.cpp +++ b/clang/lib/Basic/Targets/AMDGPU.cpp @@ -250,6 +250,7 @@ bool AMDGPUTargetInfo::initFeatureMap( break; case GK_GFX940: Features["gfx940-insts"] = true; + Features["fp8-insts"] = true; LLVM_FALLTHROUGH; case GK_GFX90A: Features["gfx90a-insts"] = true; diff --git a/clang/lib/Basic/Targets/RISCV.cpp b/clang/lib/Basic/Targets/RISCV.cpp index cb2cdb50e18e..7e6c0620385a 100644 --- a/clang/lib/Basic/Targets/RISCV.cpp +++ b/clang/lib/Basic/Targets/RISCV.cpp @@ -158,8 +158,10 @@ void RISCVTargetInfo::getTargetDefines(const LangOptions &Opts, Builder.defineMacro(Twine("__riscv_", ExtName), Twine(Version)); } - if (ISAInfo->hasExtension("m")) { + if (ISAInfo->hasExtension("m") || ISAInfo->hasExtension("zmmul")) Builder.defineMacro("__riscv_mul"); + + if (ISAInfo->hasExtension("m")) { Builder.defineMacro("__riscv_div"); Builder.defineMacro("__riscv_muldiv"); } diff --git a/clang/lib/CodeGen/ABIInfo.h b/clang/lib/CodeGen/ABIInfo.h index 6214148adab9..fe6cc7a2b1c7 100644 --- a/clang/lib/CodeGen/ABIInfo.h +++ b/clang/lib/CodeGen/ABIInfo.h @@ -35,10 +35,6 @@ namespace CodeGen { class CodeGenTypes; class SwiftABIInfo; -namespace swiftcall { - class SwiftAggLowering; -} - // FIXME: All of this stuff should be part of the target interface // somehow. It is currently here because it is not clear how to factor // the targets to support this, since the Targets currently live in a diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 8c7ee6b078f2..113c629bf9ed 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -10,6 +10,7 @@ // //===----------------------------------------------------------------------===// +#include "ABIInfo.h" #include "CGCUDARuntime.h" #include "CGCXXABI.h" #include "CGObjCRuntime.h" diff --git a/clang/lib/CodeGen/CGCall.h b/clang/lib/CodeGen/CGCall.h index af63e1bddd2d..59c3f304f59b 100644 --- a/clang/lib/CodeGen/CGCall.h +++ b/clang/lib/CodeGen/CGCall.h @@ -22,9 +22,6 @@ #include "clang/AST/Type.h" #include "llvm/IR/Value.h" -// FIXME: Restructure so we don't have to expose so much stuff. -#include "ABIInfo.h" - namespace llvm { class Type; class Value; diff --git a/clang/lib/CodeGen/CGDeclCXX.cpp b/clang/lib/CodeGen/CGDeclCXX.cpp index de5cb913220a..949112c63cc9 100644 --- a/clang/lib/CodeGen/CGDeclCXX.cpp +++ b/clang/lib/CodeGen/CGDeclCXX.cpp @@ -618,6 +618,130 @@ void CodeGenModule::EmitCXXThreadLocalInitFunc() { CXXThreadLocals.clear(); } +/* Build the initializer for a C++20 module: + This is arranged to be run only once regardless of how many times the module + might be included transitively. This arranged by using a control variable. + + First we call any initializers for imported modules. + We then call initializers for the Global Module Fragment (if present) + We then call initializers for the current module. + We then call initializers for the Private Module Fragment (if present) +*/ + +void CodeGenModule::EmitCXXModuleInitFunc(Module *Primary) { + while (!CXXGlobalInits.empty() && !CXXGlobalInits.back()) + CXXGlobalInits.pop_back(); + + // We create the function, even if it is empty, since an importer of this + // module will refer to it unconditionally (for the current implementation + // there is no way for the importer to know that an importee does not need + // an initializer to be run). + + // Module initializers for imported modules are emitted first. + // Collect the modules that we import + SmallVector<Module *> AllImports; + // Ones that we export + for (auto I : Primary->Exports) + AllImports.push_back(I.getPointer()); + // Ones that we only import. + for (Module *M : Primary->Imports) + AllImports.push_back(M); + + SmallVector<llvm::Function *, 8> ModuleInits; + for (Module *M : AllImports) { + // No Itanium initializer in module map modules. + if (M->isModuleMapModule()) + continue; // TODO: warn of mixed use of module map modules and C++20? + llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy, false); + SmallString<256> FnName; + { + llvm::raw_svector_ostream Out(FnName); + cast<ItaniumMangleContext>(getCXXABI().getMangleContext()) + .mangleModuleInitializer(M, Out); + } + assert(!GetGlobalValue(FnName.str()) && + "We should only have one use of the initializer call"); + llvm::Function *Fn = llvm::Function::Create( + FTy, llvm::Function::ExternalLinkage, FnName.str(), &getModule()); + ModuleInits.push_back(Fn); + } + AllImports.clear(); + + // Add any initializers with specified priority; this uses the same approach + // as EmitCXXGlobalInitFunc(). + if (!PrioritizedCXXGlobalInits.empty()) { + SmallVector<llvm::Function *, 8> LocalCXXGlobalInits; + llvm::array_pod_sort(PrioritizedCXXGlobalInits.begin(), + PrioritizedCXXGlobalInits.end()); + for (SmallVectorImpl<GlobalInitData>::iterator + I = PrioritizedCXXGlobalInits.begin(), + E = PrioritizedCXXGlobalInits.end(); + I != E;) { + SmallVectorImpl<GlobalInitData>::iterator PrioE = + std::upper_bound(I + 1, E, *I, GlobalInitPriorityCmp()); + + for (; I < PrioE; ++I) + ModuleInits.push_back(I->second); + } + PrioritizedCXXGlobalInits.clear(); + } + + // Now append the ones without specified priority. + for (auto F : CXXGlobalInits) + ModuleInits.push_back(F); + CXXGlobalInits.clear(); + + llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy, false); + const CGFunctionInfo &FI = getTypes().arrangeNullaryFunction(); + + // We now build the initializer for this module, which has a mangled name + // as per the Itanium ABI . The action of the initializer is guarded so that + // each init is run just once (even though a module might be imported + // multiple times via nested use). + llvm::Function *Fn; + llvm::GlobalVariable *Guard = nullptr; + { + SmallString<256> InitFnName; + llvm::raw_svector_ostream Out(InitFnName); + cast<ItaniumMangleContext>(getCXXABI().getMangleContext()) + .mangleModuleInitializer(Primary, Out); + Fn = CreateGlobalInitOrCleanUpFunction( + FTy, llvm::Twine(InitFnName), FI, SourceLocation(), false, + llvm::GlobalVariable::ExternalLinkage); + + Guard = new llvm::GlobalVariable(getModule(), Int8Ty, /*isConstant=*/false, + llvm::GlobalVariable::InternalLinkage, + llvm::ConstantInt::get(Int8Ty, 0), + InitFnName.str() + "__in_chrg"); + } + CharUnits GuardAlign = CharUnits::One(); + Guard->setAlignment(GuardAlign.getAsAlign()); + + CodeGenFunction(*this).GenerateCXXGlobalInitFunc( + Fn, ModuleInits, ConstantAddress(Guard, Int8Ty, GuardAlign)); + // We allow for the case that a module object is added to a linked binary + // without a specific call to the the initializer. This also ensure that + // implementation partition initializers are called when the partition + // is not imported as an interface. + AddGlobalCtor(Fn); + + // See the comment in EmitCXXGlobalInitFunc about OpenCL global init + // functions. + if (getLangOpts().OpenCL) { + GenKernelArgMetadata(Fn); + Fn->setCallingConv(llvm::CallingConv::SPIR_KERNEL); + } + + assert(!getLangOpts().CUDA || !getLangOpts().CUDAIsDevice || + getLangOpts().GPUAllowDeviceInit); + if (getLangOpts().HIP && getLangOpts().CUDAIsDevice) { + Fn->setCallingConv(llvm::CallingConv::AMDGPU_KERNEL); + Fn->addFnAttr("device-init"); + } + + ModuleInits.clear(); +} + static SmallString<128> getTransformedFileName(llvm::Module &M) { SmallString<128> FileName = llvm::sys::path::filename(M.getName()); @@ -650,7 +774,29 @@ CodeGenModule::EmitCXXGlobalInitFunc() { while (!CXXGlobalInits.empty() && !CXXGlobalInits.back()) CXXGlobalInits.pop_back(); - if (CXXGlobalInits.empty() && PrioritizedCXXGlobalInits.empty()) + // When we import C++20 modules, we must run their initializers first. + SmallVector<llvm::Function *, 8> ModuleInits; + if (CXX20ModuleInits) + for (Module *M : ImportedModules) { + // No Itanium initializer in module map modules. + if (M->isModuleMapModule()) + continue; + llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy, false); + SmallString<256> FnName; + { + llvm::raw_svector_ostream Out(FnName); + cast<ItaniumMangleContext>(getCXXABI().getMangleContext()) + .mangleModuleInitializer(M, Out); + } + assert(!GetGlobalValue(FnName.str()) && + "We should only have one use of the initializer call"); + llvm::Function *Fn = llvm::Function::Create( + FTy, llvm::Function::ExternalLinkage, FnName.str(), &getModule()); + ModuleInits.push_back(Fn); + } + + if (ModuleInits.empty() && CXXGlobalInits.empty() && + PrioritizedCXXGlobalInits.empty()) return; llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy, false); @@ -676,6 +822,13 @@ CodeGenModule::EmitCXXGlobalInitFunc() { llvm::Function *Fn = CreateGlobalInitOrCleanUpFunction( FTy, "_GLOBAL__I_" + getPrioritySuffix(Priority), FI); + // Prepend the module inits to the highest priority set. + if (!ModuleInits.empty()) { + for (auto F : ModuleInits) + LocalCXXGlobalInits.push_back(F); + ModuleInits.clear(); + } + for (; I < PrioE; ++I) LocalCXXGlobalInits.push_back(I->second); @@ -685,17 +838,33 @@ CodeGenModule::EmitCXXGlobalInitFunc() { PrioritizedCXXGlobalInits.clear(); } - if (getCXXABI().useSinitAndSterm() && CXXGlobalInits.empty()) + if (getCXXABI().useSinitAndSterm() && ModuleInits.empty() && + CXXGlobalInits.empty()) return; + for (auto F : CXXGlobalInits) + ModuleInits.push_back(F); + CXXGlobalInits.clear(); + // Include the filename in the symbol name. Including "sub_" matches gcc // and makes sure these symbols appear lexicographically behind the symbols // with priority emitted above. - llvm::Function *Fn = CreateGlobalInitOrCleanUpFunction( - FTy, llvm::Twine("_GLOBAL__sub_I_", getTransformedFileName(getModule())), - FI); - - CodeGenFunction(*this).GenerateCXXGlobalInitFunc(Fn, CXXGlobalInits); + llvm::Function *Fn; + if (CXX20ModuleInits && getContext().getModuleForCodeGen()) { + SmallString<256> InitFnName; + llvm::raw_svector_ostream Out(InitFnName); + cast<ItaniumMangleContext>(getCXXABI().getMangleContext()) + .mangleModuleInitializer(getContext().getModuleForCodeGen(), Out); + Fn = CreateGlobalInitOrCleanUpFunction( + FTy, llvm::Twine(InitFnName), FI, SourceLocation(), false, + llvm::GlobalVariable::ExternalLinkage); + } else + Fn = CreateGlobalInitOrCleanUpFunction( + FTy, + llvm::Twine("_GLOBAL__sub_I_", getTransformedFileName(getModule())), + FI); + + CodeGenFunction(*this).GenerateCXXGlobalInitFunc(Fn, ModuleInits); AddGlobalCtor(Fn); // In OpenCL global init functions must be converted to kernels in order to @@ -718,7 +887,7 @@ CodeGenModule::EmitCXXGlobalInitFunc() { Fn->addFnAttr("device-init"); } - CXXGlobalInits.clear(); + ModuleInits.clear(); } void CodeGenModule::EmitCXXGlobalCleanUpFunc() { diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp index cbeb6c938bee..bf3dd812b9e8 100644 --- a/clang/lib/CodeGen/CGExpr.cpp +++ b/clang/lib/CodeGen/CGExpr.cpp @@ -877,7 +877,8 @@ void CodeGenFunction::EmitTypeCheck(TypeCheckKind TCK, SourceLocation Loc, /// Determine whether this expression refers to a flexible array member in a /// struct. We disable array bounds checks for such members. -static bool isFlexibleArrayMemberExpr(const Expr *E) { +static bool isFlexibleArrayMemberExpr(const Expr *E, + unsigned StrictFlexArraysLevel) { // For compatibility with existing code, we treat arrays of length 0 or // 1 as flexible array members. // FIXME: This is inconsistent with the warning code in SemaChecking. Unify @@ -886,6 +887,11 @@ static bool isFlexibleArrayMemberExpr(const Expr *E) { if (const auto *CAT = dyn_cast<ConstantArrayType>(AT)) { // FIXME: Sema doesn't treat [1] as a flexible array member if the bound // was produced by macro expansion. + if (StrictFlexArraysLevel >= 2 && CAT->getSize().ugt(0)) + return false; + // FIXME: While the default -fstrict-flex-arrays=0 permits Size>1 trailing + // arrays to be treated as flexible-array-members, we still emit ubsan + // checks as if they are not. if (CAT->getSize().ugt(1)) return false; } else if (!isa<IncompleteArrayType>(AT)) @@ -900,8 +906,10 @@ static bool isFlexibleArrayMemberExpr(const Expr *E) { if (const auto *FD = dyn_cast<FieldDecl>(ME->getMemberDecl())) { // FIXME: Sema doesn't treat a T[1] union member as a flexible array // member, only a T[0] or T[] member gets that treatment. + // Under StrictFlexArraysLevel, obey c99+ that disallows FAM in union, see + // C11 6.7.2.1 §18 if (FD->getParent()->isUnion()) - return true; + return StrictFlexArraysLevel < 2; RecordDecl::field_iterator FI( DeclContext::decl_iterator(const_cast<FieldDecl *>(FD))); return ++FI == FD->getParent()->field_end(); @@ -954,8 +962,10 @@ llvm::Value *CodeGenFunction::LoadPassedObjectSize(const Expr *E, /// If Base is known to point to the start of an array, return the length of /// that array. Return 0 if the length cannot be determined. -static llvm::Value *getArrayIndexingBound( - CodeGenFunction &CGF, const Expr *Base, QualType &IndexedType) { +static llvm::Value *getArrayIndexingBound(CodeGenFunction &CGF, + const Expr *Base, + QualType &IndexedType, + unsigned StrictFlexArraysLevel) { // For the vector indexing extension, the bound is the number of elements. if (const VectorType *VT = Base->getType()->getAs<VectorType>()) { IndexedType = Base->getType(); @@ -966,7 +976,7 @@ static llvm::Value *getArrayIndexingBound( if (const auto *CE = dyn_cast<CastExpr>(Base)) { if (CE->getCastKind() == CK_ArrayToPointerDecay && - !isFlexibleArrayMemberExpr(CE->getSubExpr())) { + !isFlexibleArrayMemberExpr(CE->getSubExpr(), StrictFlexArraysLevel)) { IndexedType = CE->getSubExpr()->getType(); const ArrayType *AT = IndexedType->castAsArrayTypeUnsafe(); if (const auto *CAT = dyn_cast<ConstantArrayType>(AT)) @@ -993,8 +1003,11 @@ void CodeGenFunction::EmitBoundsCheck(const Expr *E, const Expr *Base, "should not be called unless adding bounds checks"); SanitizerScope SanScope(this); + const unsigned StrictFlexArraysLevel = getLangOpts().StrictFlexArrays; + QualType IndexedType; - llvm::Value *Bound = getArrayIndexingBound(*this, Base, IndexedType); + llvm::Value *Bound = + getArrayIndexingBound(*this, Base, IndexedType, StrictFlexArraysLevel); if (!Bound) return; diff --git a/clang/lib/CodeGen/CGObjCRuntime.h b/clang/lib/CodeGen/CGObjCRuntime.h index bb27c38db204..3bd981256f47 100644 --- a/clang/lib/CodeGen/CGObjCRuntime.h +++ b/clang/lib/CodeGen/CGObjCRuntime.h @@ -34,7 +34,8 @@ namespace llvm { namespace clang { namespace CodeGen { - class CodeGenFunction; +class CGFunctionInfo; +class CodeGenFunction; } class FieldDecl; diff --git a/clang/lib/CodeGen/CGStmt.cpp b/clang/lib/CodeGen/CGStmt.cpp index df7e5608f8f0..05ab16668743 100644 --- a/clang/lib/CodeGen/CGStmt.cpp +++ b/clang/lib/CodeGen/CGStmt.cpp @@ -2603,14 +2603,9 @@ void CodeGenFunction::EmitAsmStmt(const AsmStmt &S) { for (const auto *E : GS->labels()) { JumpDest Dest = getJumpDestForLabel(E->getLabel()); Transfer.push_back(Dest.getBlock()); - llvm::BlockAddress *BA = - llvm::BlockAddress::get(CurFn, Dest.getBlock()); - Args.push_back(BA); - ArgTypes.push_back(BA->getType()); - ArgElemTypes.push_back(nullptr); if (!Constraints.empty()) Constraints += ','; - Constraints += 'i'; + Constraints += "!i"; } Fallthrough = createBasicBlock("asm.fallthrough"); } diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp index db0b2ffd3a4f..aa55cdaca5dc 100644 --- a/clang/lib/CodeGen/CGStmtOpenMP.cpp +++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp @@ -5203,8 +5203,30 @@ void CodeGenFunction::EmitOMPTaskwaitDirective(const OMPTaskwaitDirective &S) { CGM.getOpenMPRuntime().emitTaskwaitCall(*this, S.getBeginLoc(), Data); } +bool isSupportedByOpenMPIRBuilder(const OMPTaskgroupDirective &T) { + return T.clauses().empty(); +} + void CodeGenFunction::EmitOMPTaskgroupDirective( const OMPTaskgroupDirective &S) { + OMPLexicalScope Scope(*this, S, OMPD_unknown); + if (CGM.getLangOpts().OpenMPIRBuilder && isSupportedByOpenMPIRBuilder(S)) { + llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); + using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; + InsertPointTy AllocaIP(AllocaInsertPt->getParent(), + AllocaInsertPt->getIterator()); + + auto BodyGenCB = [&, this](InsertPointTy AllocaIP, + InsertPointTy CodeGenIP) { + Builder.restoreIP(CodeGenIP); + EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt()); + }; + CodeGenFunction::CGCapturedStmtInfo CapStmtInfo; + if (!CapturedStmtInfo) + CapturedStmtInfo = &CapStmtInfo; + Builder.restoreIP(OMPBuilder.createTaskgroup(Builder, AllocaIP, BodyGenCB)); + return; + } auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { Action.Enter(CGF); if (const Expr *E = S.getReductionRef()) { @@ -5230,7 +5252,6 @@ void CodeGenFunction::EmitOMPTaskgroupDirective( } CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt()); }; - OMPLexicalScope Scope(*this, S, OMPD_unknown); CGM.getOpenMPRuntime().emitTaskgroupRegion(*this, CodeGen, S.getBeginLoc()); } diff --git a/clang/lib/CodeGen/CodeGenFunction.cpp b/clang/lib/CodeGen/CodeGenFunction.cpp index 17c1c91c7e8f..5012bd822bd3 100644 --- a/clang/lib/CodeGen/CodeGenFunction.cpp +++ b/clang/lib/CodeGen/CodeGenFunction.cpp @@ -852,7 +852,7 @@ void CodeGenFunction::StartFunction(GlobalDecl GD, QualType RetTy, } if (CGM.getCodeGenOpts().getProfileInstr() != CodeGenOptions::ProfileNone) - if (CGM.isProfileInstrExcluded(Fn, Loc)) + if (CGM.isFunctionBlockedFromProfileInstr(Fn, Loc)) Fn->addFnAttr(llvm::Attribute::NoProfile); unsigned Count, Offset; diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp index c372bab1eccb..101080b6fe13 100644 --- a/clang/lib/CodeGen/CodeGenModule.cpp +++ b/clang/lib/CodeGen/CodeGenModule.cpp @@ -11,6 +11,7 @@ //===----------------------------------------------------------------------===// #include "CodeGenModule.h" +#include "ABIInfo.h" #include "CGBlocks.h" #include "CGCUDARuntime.h" #include "CGCXXABI.h" @@ -32,7 +33,6 @@ #include "clang/AST/DeclObjC.h" #include "clang/AST/DeclTemplate.h" #include "clang/AST/Mangle.h" -#include "clang/AST/RecordLayout.h" #include "clang/AST/RecursiveASTVisitor.h" #include "clang/AST/StmtVisitor.h" #include "clang/Basic/Builtins.h" @@ -58,6 +58,7 @@ #include "llvm/IR/Module.h" #include "llvm/IR/ProfileSummary.h" #include "llvm/ProfileData/InstrProfReader.h" +#include "llvm/Support/CRC.h" #include "llvm/Support/CodeGen.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/ConvertUTF.h" @@ -136,6 +137,13 @@ CodeGenModule::CodeGenModule(ASTContext &C, const HeaderSearchOptions &HSO, GlobalsInt8PtrTy = Int8Ty->getPointerTo(DL.getDefaultGlobalsAddressSpace()); ASTAllocaAddressSpace = getTargetCodeGenInfo().getASTAllocaAddressSpace(); + // Build C++20 Module initializers. + // TODO: Add Microsoft here once we know the mangling required for the + // initializers. + CXX20ModuleInits = + LangOpts.CPlusPlusModules && getCXXABI().getMangleContext().getKind() == + ItaniumMangleContext::MK_Itanium; + RuntimeCC = getTargetCodeGenInfo().getABIInfo().getRuntimeCC(); if (LangOpts.ObjC) @@ -510,6 +518,9 @@ static void setVisibilityFromDLLStorageClass(const clang::LangOptions &LO, } void CodeGenModule::Release() { + Module *Primary = getContext().getModuleForCodeGen(); + if (CXX20ModuleInits && Primary && !Primary->isModuleMapModule()) + EmitModuleInitializers(Primary); EmitDeferred(); DeferredDecls.insert(EmittedDeferredDecls.begin(), EmittedDeferredDecls.end()); @@ -518,7 +529,10 @@ void CodeGenModule::Release() { applyGlobalValReplacements(); applyReplacements(); emitMultiVersionFunctions(); - EmitCXXGlobalInitFunc(); + if (CXX20ModuleInits && Primary && Primary->isInterfaceOrPartition()) + EmitCXXModuleInitFunc(Primary); + else + EmitCXXGlobalInitFunc(); EmitCXXGlobalCleanUpFunc(); registerGlobalDtorsWithAtExit(); EmitCXXThreadLocalInitFunc(); @@ -742,19 +756,22 @@ void CodeGenModule::Release() { if (CodeGenOpts.CFProtectionReturn && Target.checkCFProtectionReturnSupported(getDiags())) { // Indicate that we want to instrument return control flow protection. - getModule().addModuleFlag(llvm::Module::Override, "cf-protection-return", + getModule().addModuleFlag(llvm::Module::Min, "cf-protection-return", 1); } if (CodeGenOpts.CFProtectionBranch && Target.checkCFProtectionBranchSupported(getDiags())) { // Indicate that we want to instrument branch control flow protection. - getModule().addModuleFlag(llvm::Module::Override, "cf-protection-branch", + getModule().addModuleFlag(llvm::Module::Min, "cf-protection-branch", 1); } if (CodeGenOpts.IBTSeal) - getModule().addModuleFlag(llvm::Module::Override, "ibt-seal", 1); + getModule().addModuleFlag(llvm::Module::Min, "ibt-seal", 1); + + if (CodeGenOpts.FunctionReturnThunks) + getModule().addModuleFlag(llvm::Module::Override, "function_return_thunk_extern", 1); // Add module metadata for return address signing (ignoring // non-leaf/all) and stack tagging. These are actually turned on by function @@ -2498,6 +2515,31 @@ static void addLinkOptionsPostorder(CodeGenModule &CGM, Module *Mod, } } +void CodeGenModule::EmitModuleInitializers(clang::Module *Primary) { + // Emit the initializers in the order that sub-modules appear in the + // source, first Global Module Fragments, if present. + if (auto GMF = Primary->getGlobalModuleFragment()) { + for (Decl *D : getContext().getModuleInitializers(GMF)) { + assert(D->getKind() == Decl::Var && "GMF initializer decl is not a var?"); + EmitTopLevelDecl(D); + } + } + // Second any associated with the module, itself. + for (Decl *D : getContext().getModuleInitializers(Primary)) { + // Skip import decls, the inits for those are called explicitly. + if (D->getKind() == Decl::Import) + continue; + EmitTopLevelDecl(D); + } + // Third any associated with the Privat eMOdule Fragment, if present. + if (auto PMF = Primary->getPrivateModuleFragment()) { + for (Decl *D : getContext().getModuleInitializers(PMF)) { + assert(D->getKind() == Decl::Var && "PMF initializer decl is not a var?"); + EmitTopLevelDecl(D); + } + } +} + void CodeGenModule::EmitModuleLinkOptions() { // Collect the set of all of the modules we want to visit to emit link // options, which is essentially the imported modules and all of their @@ -2776,16 +2818,18 @@ bool CodeGenModule::isInNoSanitizeList(SanitizerMask Kind, llvm::Function *Fn, // NoSanitize by function name. if (NoSanitizeL.containsFunction(Kind, Fn->getName())) return true; - // NoSanitize by location. + // NoSanitize by location. Check "mainfile" prefix. + auto &SM = Context.getSourceManager(); + const FileEntry &MainFile = *SM.getFileEntryForID(SM.getMainFileID()); + if (NoSanitizeL.containsMainFile(Kind, MainFile.getName())) + return true; + + // Check "src" prefix. if (Loc.isValid()) return NoSanitizeL.containsLocation(Kind, Loc); // If location is unknown, this may be a compiler-generated function. Assume // it's located in the main file. - auto &SM = Context.getSourceManager(); - if (const auto *MainFile = SM.getFileEntryForID(SM.getMainFileID())) { - return NoSanitizeL.containsFile(Kind, MainFile->getName()); - } - return false; + return NoSanitizeL.containsFile(Kind, MainFile.getName()); } bool CodeGenModule::isInNoSanitizeList(SanitizerMask Kind, @@ -2795,8 +2839,13 @@ bool CodeGenModule::isInNoSanitizeList(SanitizerMask Kind, const auto &NoSanitizeL = getContext().getNoSanitizeList(); if (NoSanitizeL.containsGlobal(Kind, GV->getName(), Category)) return true; + auto &SM = Context.getSourceManager(); + if (NoSanitizeL.containsMainFile( + Kind, SM.getFileEntryForID(SM.getMainFileID())->getName(), Category)) + return true; if (NoSanitizeL.containsLocation(Kind, Loc, Category)) return true; + // Check global type. if (!Ty.isNull()) { // Drill down the array types: if global variable of a fixed type is @@ -2840,8 +2889,8 @@ bool CodeGenModule::imbueXRayAttrs(llvm::Function *Fn, SourceLocation Loc, return true; } -bool CodeGenModule::isProfileInstrExcluded(llvm::Function *Fn, - SourceLocation Loc) const { +bool CodeGenModule::isFunctionBlockedByProfileList(llvm::Function *Fn, + SourceLocation Loc) const { const auto &ProfileList = getContext().getProfileList(); // If the profile list is empty, then instrument everything. if (ProfileList.isEmpty()) @@ -2868,6 +2917,20 @@ bool CodeGenModule::isProfileInstrExcluded(llvm::Function *Fn, return ProfileList.getDefault(); } +bool CodeGenModule::isFunctionBlockedFromProfileInstr( + llvm::Function *Fn, SourceLocation Loc) const { + if (isFunctionBlockedByProfileList(Fn, Loc)) + return true; + + auto NumGroups = getCodeGenOpts().ProfileTotalFunctionGroups; + if (NumGroups > 1) { + auto Group = llvm::crc32(arrayRefFromStringRef(Fn->getName())) % NumGroups; + if (Group != getCodeGenOpts().ProfileSelectedFunctionGroup) + return true; + } + return false; +} + bool CodeGenModule::MustBeEmitted(const ValueDecl *Global) { // Never defer when EmitAllDecls is specified. if (LangOpts.EmitAllDecls) @@ -2903,12 +2966,20 @@ bool CodeGenModule::MayBeEmittedEagerly(const ValueDecl *Global) { // explicitly instantiated, so they should not be emitted eagerly. return false; } - if (const auto *VD = dyn_cast<VarDecl>(Global)) + if (const auto *VD = dyn_cast<VarDecl>(Global)) { if (Context.getInlineVariableDefinitionKind(VD) == ASTContext::InlineVariableDefinitionKind::WeakUnknown) // A definition of an inline constexpr static data member may change // linkage later if it's redeclared outside the class. return false; + if (CXX20ModuleInits && VD->getOwningModule() && + !VD->getOwningModule()->isModuleMapModule()) { + // For CXX20, module-owned initializers need to be deferred, since it is + // not known at this point if they will be run for the current module or + // as part of the initializer for an imported one. + return false; + } + } // If OpenMP is enabled and threadprivates must be generated like TLS, delay // codegen for global variables, because they may be marked as threadprivate. if (LangOpts.OpenMP && LangOpts.OpenMPUseTLS && @@ -6208,6 +6279,16 @@ void CodeGenModule::EmitTopLevelDecl(Decl *D) { DI->EmitImportDecl(*Import); } + // For C++ standard modules we are done - we will call the module + // initializer for imported modules, and that will likewise call those for + // any imports it has. + if (CXX20ModuleInits && Import->getImportedOwningModule() && + !Import->getImportedOwningModule()->isModuleMapModule()) + break; + + // For clang C++ module map modules the initializers for sub-modules are + // emitted here. + // Find all of the submodules and emit the module initializers. llvm::SmallPtrSet<clang::Module *, 16> Visited; SmallVector<clang::Module *, 16> Stack; @@ -6892,3 +6973,31 @@ void CodeGenModule::printPostfixForExternalizedDecl(llvm::raw_ostream &OS, OS << getContext().getCUIDHash(); } } + +void CodeGenModule::moveLazyEmissionStates(CodeGenModule *NewBuilder) { + assert(DeferredDeclsToEmit.empty() && + "Should have emitted all decls deferred to emit."); + assert(NewBuilder->DeferredDecls.empty() && + "Newly created module should not have deferred decls"); + NewBuilder->DeferredDecls = std::move(DeferredDecls); + + assert(NewBuilder->DeferredVTables.empty() && + "Newly created module should not have deferred vtables"); + NewBuilder->DeferredVTables = std::move(DeferredVTables); + + assert(NewBuilder->MangledDeclNames.empty() && + "Newly created module should not have mangled decl names"); + assert(NewBuilder->Manglings.empty() && + "Newly created module should not have manglings"); + NewBuilder->Manglings = std::move(Manglings); + + assert(WeakRefReferences.empty() && "Not all WeakRefRefs have been applied"); + NewBuilder->WeakRefReferences = std::move(WeakRefReferences); + + NewBuilder->TBAA = std::move(TBAA); + + assert(NewBuilder->EmittedDeferredDecls.empty() && + "Still have (unmerged) EmittedDeferredDecls deferred decls"); + + NewBuilder->EmittedDeferredDecls = std::move(EmittedDeferredDecls); +} diff --git a/clang/lib/CodeGen/CodeGenModule.h b/clang/lib/CodeGen/CodeGenModule.h index 10b49da27dab..c939e7a309f5 100644 --- a/clang/lib/CodeGen/CodeGenModule.h +++ b/clang/lib/CodeGen/CodeGenModule.h @@ -303,7 +303,7 @@ private: std::unique_ptr<CGCXXABI> ABI; llvm::LLVMContext &VMContext; std::string ModuleNameHash; - + bool CXX20ModuleInits = false; std::unique_ptr<CodeGenTBAA> TBAA; mutable std::unique_ptr<TargetCodeGenInfo> TheTargetCodeGenInfo; @@ -1340,9 +1340,15 @@ public: bool imbueXRayAttrs(llvm::Function *Fn, SourceLocation Loc, StringRef Category = StringRef()) const; - /// Returns true if function at the given location should be excluded from - /// profile instrumentation. - bool isProfileInstrExcluded(llvm::Function *Fn, SourceLocation Loc) const; + /// \returns true if \p Fn at \p Loc should be excluded from profile + /// instrumentation by the SCL passed by \p -fprofile-list. + bool isFunctionBlockedByProfileList(llvm::Function *Fn, + SourceLocation Loc) const; + + /// \returns true if \p Fn at \p Loc should be excluded from profile + /// instrumentation. + bool isFunctionBlockedFromProfileInstr(llvm::Function *Fn, + SourceLocation Loc) const; SanitizerMetadata *getSanitizerMetadata() { return SanitizerMD.get(); @@ -1508,34 +1514,7 @@ public: /// Move some lazily-emitted states to the NewBuilder. This is especially /// essential for the incremental parsing environment like Clang Interpreter, /// because we'll lose all important information after each repl. - void moveLazyEmissionStates(CodeGenModule *NewBuilder) { - assert(DeferredDeclsToEmit.empty() && - "Should have emitted all decls deferred to emit."); - assert(NewBuilder->DeferredDecls.empty() && - "Newly created module should not have deferred decls"); - NewBuilder->DeferredDecls = std::move(DeferredDecls); - - assert(NewBuilder->DeferredVTables.empty() && - "Newly created module should not have deferred vtables"); - NewBuilder->DeferredVTables = std::move(DeferredVTables); - - assert(NewBuilder->MangledDeclNames.empty() && - "Newly created module should not have mangled decl names"); - assert(NewBuilder->Manglings.empty() && - "Newly created module should not have manglings"); - NewBuilder->Manglings = std::move(Manglings); - - assert(WeakRefReferences.empty() && - "Not all WeakRefRefs have been applied"); - NewBuilder->WeakRefReferences = std::move(WeakRefReferences); - - NewBuilder->TBAA = std::move(TBAA); - - assert(NewBuilder->EmittedDeferredDecls.empty() && - "Still have (unmerged) EmittedDeferredDecls deferred decls"); - - NewBuilder->EmittedDeferredDecls = std::move(EmittedDeferredDecls); - } + void moveLazyEmissionStates(CodeGenModule *NewBuilder); private: llvm::Constant *GetOrCreateLLVMFunction( @@ -1593,6 +1572,9 @@ private: /// Emit the function that initializes C++ thread_local variables. void EmitCXXThreadLocalInitFunc(); + /// Emit the function that initializes global variables for a C++ Module. + void EmitCXXModuleInitFunc(clang::Module *Primary); + /// Emit the function that initializes C++ globals. void EmitCXXGlobalInitFunc(); @@ -1660,6 +1642,9 @@ private: /// Emit the llvm.used and llvm.compiler.used metadata. void emitLLVMUsed(); + /// For C++20 Itanium ABI, emit the initializers for the module. + void EmitModuleInitializers(clang::Module *Primary); + /// Emit the link options introduced by imported modules. void EmitModuleLinkOptions(); diff --git a/clang/lib/CodeGen/SwiftCallingConv.cpp b/clang/lib/CodeGen/SwiftCallingConv.cpp index 1d712f4fde3c..8fb24fcecf53 100644 --- a/clang/lib/CodeGen/SwiftCallingConv.cpp +++ b/clang/lib/CodeGen/SwiftCallingConv.cpp @@ -11,9 +11,10 @@ //===----------------------------------------------------------------------===// #include "clang/CodeGen/SwiftCallingConv.h" -#include "clang/Basic/TargetInfo.h" +#include "ABIInfo.h" #include "CodeGenModule.h" #include "TargetInfo.h" +#include "clang/Basic/TargetInfo.h" using namespace clang; using namespace CodeGen; diff --git a/clang/lib/CodeGen/TargetInfo.cpp b/clang/lib/CodeGen/TargetInfo.cpp index 8eaed1db8e7d..d1ee61eab9d6 100644 --- a/clang/lib/CodeGen/TargetInfo.cpp +++ b/clang/lib/CodeGen/TargetInfo.cpp @@ -35,7 +35,7 @@ #include "llvm/IR/Type.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" -#include <algorithm> // std::sort +#include <algorithm> using namespace clang; using namespace CodeGen; @@ -443,6 +443,9 @@ static Address emitMergePHI(CodeGenFunction &CGF, return Address(PHI, Addr1.getElementType(), Align); } +TargetCodeGenInfo::TargetCodeGenInfo(std::unique_ptr<ABIInfo> Info) + : Info(std::move(Info)) {} + TargetCodeGenInfo::~TargetCodeGenInfo() = default; // If someone can figure out a general rule for this, that would be great. @@ -10446,6 +10449,15 @@ ABIArgInfo SPIRVABIInfo::classifyKernelArgumentType(QualType Ty) const { LTy = llvm::PointerType::getWithSamePointeeType(PtrTy, GlobalAS); return ABIArgInfo::getDirect(LTy, 0, nullptr, false); } + + // Force copying aggregate type in kernel arguments by value when + // compiling CUDA targeting SPIR-V. This is required for the object + // copied to be valid on the device. + // This behavior follows the CUDA spec + // https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#global-function-argument-processing, + // and matches the NVPTX implementation. + if (isAggregateTypeForABI(Ty)) + return getNaturalAlignIndirect(Ty, /* byval */ true); } return classifyArgumentType(Ty); } diff --git a/clang/lib/CodeGen/TargetInfo.h b/clang/lib/CodeGen/TargetInfo.h index bdd64977b475..30421612015b 100644 --- a/clang/lib/CodeGen/TargetInfo.h +++ b/clang/lib/CodeGen/TargetInfo.h @@ -43,10 +43,10 @@ class CGBlockInfo; /// codegeneration issues, like target-specific attributes, builtins and so /// on. class TargetCodeGenInfo { - std::unique_ptr<ABIInfo> Info = nullptr; + std::unique_ptr<ABIInfo> Info; public: - TargetCodeGenInfo(std::unique_ptr<ABIInfo> Info) : Info(std::move(Info)) {} + TargetCodeGenInfo(std::unique_ptr<ABIInfo> Info); virtual ~TargetCodeGenInfo(); /// getABIInfo() - Returns ABI info helper for the target. diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp index 3a8400a55741..3f29afd35971 100644 --- a/clang/lib/Driver/Driver.cpp +++ b/clang/lib/Driver/Driver.cpp @@ -4432,6 +4432,11 @@ Action *Driver::BuildOffloadingActions(Compilation &C, types::ID InputType = Input.first; const Arg *InputArg = Input.second; + // The toolchain can be active for unsupported file types. + if ((Kind == Action::OFK_Cuda && !types::isCuda(InputType)) || + (Kind == Action::OFK_HIP && !types::isHIP(InputType))) + continue; + // Get the product of all bound architectures and toolchains. SmallVector<std::pair<const ToolChain *, StringRef>> TCAndArchs; for (const ToolChain *TC : ToolChains) @@ -4473,6 +4478,15 @@ Action *Driver::BuildOffloadingActions(Compilation &C, } } + // Compiling HIP in non-RDC mode requires linking each action individually. + for (Action *&A : DeviceActions) { + if (A->getType() != types::TY_Object || Kind != Action::OFK_HIP || + Args.hasFlag(options::OPT_fgpu_rdc, options::OPT_fno_gpu_rdc, false)) + continue; + ActionList LinkerInput = {A}; + A = C.MakeAction<LinkJobAction>(LinkerInput, types::TY_Image); + } + auto TCAndArch = TCAndArchs.begin(); for (Action *A : DeviceActions) { DDeps.add(*A, *TCAndArch->first, TCAndArch->second.data(), Kind); @@ -4486,15 +4500,27 @@ Action *Driver::BuildOffloadingActions(Compilation &C, if (offloadDeviceOnly()) return C.MakeAction<OffloadAction>(DDeps, types::TY_Nothing); + if (OffloadActions.empty()) + return HostAction; + OffloadAction::DeviceDependences DDep; if (C.isOffloadingHostKind(Action::OFK_Cuda) && !Args.hasFlag(options::OPT_fgpu_rdc, options::OPT_fno_gpu_rdc, false)) { - // If we are not in RDC-mode we just emit the final CUDA fatbinary for each - // translation unit without requiring any linking. + // If we are not in RDC-mode we just emit the final CUDA fatbinary for + // each translation unit without requiring any linking. Action *FatbinAction = C.MakeAction<LinkJobAction>(OffloadActions, types::TY_CUDA_FATBIN); DDep.add(*FatbinAction, *C.getSingleOffloadToolChain<Action::OFK_Cuda>(), nullptr, Action::OFK_Cuda); + } else if (C.isOffloadingHostKind(Action::OFK_HIP) && + !Args.hasFlag(options::OPT_fgpu_rdc, options::OPT_fno_gpu_rdc, + false)) { + // If we are not in RDC-mode we just emit the final HIP fatbinary for each + // translation unit, linking each input individually. + Action *FatbinAction = + C.MakeAction<LinkJobAction>(OffloadActions, types::TY_HIP_FATBIN); + DDep.add(*FatbinAction, *C.getSingleOffloadToolChain<Action::OFK_HIP>(), + nullptr, Action::OFK_HIP); } else { // Package all the offloading actions into a single output that can be // embedded in the host and linked. @@ -4503,6 +4529,7 @@ Action *Driver::BuildOffloadingActions(Compilation &C, DDep.add(*PackagerAction, *C.getSingleOffloadToolChain<Action::OFK_Host>(), nullptr, Action::OFK_None); } + OffloadAction::HostDependence HDep( *HostAction, *C.getSingleOffloadToolChain<Action::OFK_Host>(), /*BoundArch=*/nullptr, isa<CompileJobAction>(HostAction) ? DDep : DDeps); @@ -6254,6 +6281,7 @@ Driver::getIncludeExcludeOptionFlagMasks(bool IsClCompatMode) const { if (IsClCompatMode) { // Include CL and Core options. IncludedFlagsBitmask |= options::CLOption; + IncludedFlagsBitmask |= options::CLDXCOption; IncludedFlagsBitmask |= options::CoreOption; } else { ExcludedFlagsBitmask |= options::CLOption; @@ -6261,10 +6289,14 @@ Driver::getIncludeExcludeOptionFlagMasks(bool IsClCompatMode) const { if (IsDXCMode()) { // Include DXC and Core options. IncludedFlagsBitmask |= options::DXCOption; + IncludedFlagsBitmask |= options::CLDXCOption; IncludedFlagsBitmask |= options::CoreOption; } else { ExcludedFlagsBitmask |= options::DXCOption; } + if (!IsClCompatMode && !IsDXCMode()) + ExcludedFlagsBitmask |= options::CLDXCOption; + return std::make_pair(IncludedFlagsBitmask, ExcludedFlagsBitmask); } diff --git a/clang/lib/Driver/Multilib.cpp b/clang/lib/Driver/Multilib.cpp index ab44ba50b5d5..ec619874ad60 100644 --- a/clang/lib/Driver/Multilib.cpp +++ b/clang/lib/Driver/Multilib.cpp @@ -267,10 +267,9 @@ bool MultilibSet::select(const Multilib::flags_list &Flags, Multilib &M) const { } // Sort multilibs by priority and select the one with the highest priority. - llvm::sort(Filtered.begin(), Filtered.end(), - [](const Multilib &a, const Multilib &b) -> bool { - return a.priority() > b.priority(); - }); + llvm::sort(Filtered, [](const Multilib &a, const Multilib &b) -> bool { + return a.priority() > b.priority(); + }); if (Filtered[0].priority() > Filtered[1].priority()) { M = Filtered[0]; diff --git a/clang/lib/Driver/ToolChain.cpp b/clang/lib/Driver/ToolChain.cpp index 5130eb9b72c1..7a4319ea680f 100644 --- a/clang/lib/Driver/ToolChain.cpp +++ b/clang/lib/Driver/ToolChain.cpp @@ -1013,6 +1013,8 @@ void ToolChain::AddCXXStdlibLibArgs(const ArgList &Args, switch (Type) { case ToolChain::CST_Libcxx: CmdArgs.push_back("-lc++"); + if (Args.hasArg(options::OPT_fexperimental_library)) + CmdArgs.push_back("-lc++experimental"); break; case ToolChain::CST_Libstdcxx: diff --git a/clang/lib/Driver/ToolChains/AIX.cpp b/clang/lib/Driver/ToolChains/AIX.cpp index 878b84a77702..64be5fe23558 100644 --- a/clang/lib/Driver/ToolChains/AIX.cpp +++ b/clang/lib/Driver/ToolChains/AIX.cpp @@ -222,11 +222,13 @@ void AIX::AddClangSystemIncludeArgs(const ArgList &DriverArgs, llvm::StringRef Sysroot = GetHeaderSysroot(DriverArgs); const Driver &D = getDriver(); - // Add the Clang builtin headers (<resource>/include). if (!DriverArgs.hasArg(options::OPT_nobuiltininc)) { SmallString<128> P(D.ResourceDir); - path::append(P, "/include"); - addSystemInclude(DriverArgs, CC1Args, P.str()); + // Add the PowerPC intrinsic headers (<resource>/include/ppc_wrappers) + path::append(P, "include", "ppc_wrappers"); + addSystemInclude(DriverArgs, CC1Args, P); + // Add the Clang builtin headers (<resource>/include) + addSystemInclude(DriverArgs, CC1Args, path::parent_path(P.str())); } // Return if -nostdlibinc is specified as a driver option. @@ -275,6 +277,8 @@ void AIX::AddCXXStdlibLibArgs(const llvm::opt::ArgList &Args, llvm::report_fatal_error("linking libstdc++ unimplemented on AIX"); case ToolChain::CST_Libcxx: CmdArgs.push_back("-lc++"); + if (Args.hasArg(options::OPT_fexperimental_library)) + CmdArgs.push_back("-lc++experimental"); CmdArgs.push_back("-lc++abi"); return; } diff --git a/clang/lib/Driver/ToolChains/Ananas.cpp b/clang/lib/Driver/ToolChains/Ananas.cpp index 40f9e56b38e9..a9c13464a0d6 100644 --- a/clang/lib/Driver/ToolChains/Ananas.cpp +++ b/clang/lib/Driver/ToolChains/Ananas.cpp @@ -71,7 +71,7 @@ void ananas::Linker::ConstructJob(Compilation &C, const JobAction &JA, CmdArgs.push_back("-export-dynamic"); if (Args.hasArg(options::OPT_shared)) { CmdArgs.push_back("-Bshareable"); - } else { + } else if (!Args.hasArg(options::OPT_r)) { Args.AddAllArgs(CmdArgs, options::OPT_pie); CmdArgs.push_back("-dynamic-linker"); CmdArgs.push_back("/lib/ld-ananas.so"); diff --git a/clang/lib/Driver/ToolChains/Arch/RISCV.cpp b/clang/lib/Driver/ToolChains/Arch/RISCV.cpp index 8a8ed20986c5..de6e045a9447 100644 --- a/clang/lib/Driver/ToolChains/Arch/RISCV.cpp +++ b/clang/lib/Driver/ToolChains/Arch/RISCV.cpp @@ -7,6 +7,7 @@ //===----------------------------------------------------------------------===// #include "RISCV.h" +#include "../Clang.h" #include "ToolChains/CommonArgs.h" #include "clang/Basic/CharInfo.h" #include "clang/Driver/Driver.h" @@ -137,10 +138,17 @@ void riscv::getRISCVTargetFeatures(const Driver &D, const llvm::Triple &Triple, Features.push_back("+reserve-x31"); // -mrelax is default, unless -mno-relax is specified. - if (Args.hasFlag(options::OPT_mrelax, options::OPT_mno_relax, true)) + if (Args.hasFlag(options::OPT_mrelax, options::OPT_mno_relax, true)) { Features.push_back("+relax"); - else + // -gsplit-dwarf -mrelax requires DW_AT_high_pc/DW_AT_ranges/... indexing + // into .debug_addr, which is currently not implemented. + Arg *A; + if (getDebugFissionKind(D, Args, A) != DwarfFissionKind::None) + D.Diag(clang::diag::err_drv_riscv_unsupported_with_linker_relaxation) + << A->getAsString(Args); + } else { Features.push_back("-relax"); + } // GCC Compatibility: -mno-save-restore is default, unless -msave-restore is // specified. diff --git a/clang/lib/Driver/ToolChains/BareMetal.cpp b/clang/lib/Driver/ToolChains/BareMetal.cpp index cd07692be358..5f1638a159d5 100644 --- a/clang/lib/Driver/ToolChains/BareMetal.cpp +++ b/clang/lib/Driver/ToolChains/BareMetal.cpp @@ -276,6 +276,8 @@ void BareMetal::AddCXXStdlibLibArgs(const ArgList &Args, switch (GetCXXStdlibType(Args)) { case ToolChain::CST_Libcxx: CmdArgs.push_back("-lc++"); + if (Args.hasArg(options::OPT_fexperimental_library)) + CmdArgs.push_back("-lc++experimental"); CmdArgs.push_back("-lc++abi"); break; case ToolChain::CST_Libstdcxx: diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index 97435f1a73de..3044c2d92d21 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -956,6 +956,27 @@ static void addPGOAndCoverageFlags(const ToolChain &TC, Compilation &C, CmdArgs.push_back("-fprofile-update=atomic"); } + int FunctionGroups = 1; + int SelectedFunctionGroup = 0; + if (const auto *A = Args.getLastArg(options::OPT_fprofile_function_groups)) { + StringRef Val = A->getValue(); + if (Val.getAsInteger(0, FunctionGroups) || FunctionGroups < 1) + D.Diag(diag::err_drv_invalid_int_value) << A->getAsString(Args) << Val; + } + if (const auto *A = + Args.getLastArg(options::OPT_fprofile_selected_function_group)) { + StringRef Val = A->getValue(); + if (Val.getAsInteger(0, SelectedFunctionGroup) || + SelectedFunctionGroup < 0 || SelectedFunctionGroup >= FunctionGroups) + D.Diag(diag::err_drv_invalid_int_value) << A->getAsString(Args) << Val; + } + if (FunctionGroups != 1) + CmdArgs.push_back(Args.MakeArgString("-fprofile-function-groups=" + + Twine(FunctionGroups))); + if (SelectedFunctionGroup != 0) + CmdArgs.push_back(Args.MakeArgString("-fprofile-selected-function-group=" + + Twine(SelectedFunctionGroup))); + // Leave -fprofile-dir= an unused argument unless .gcda emission is // enabled. To be polite, with '-fprofile-arcs -fno-profile-arcs' consider // the flag used. There is no -fno-profile-dir, so the user has no @@ -1902,18 +1923,11 @@ void Clang::AddAArch64TargetArgs(const ArgList &Args, AddAAPCSVolatileBitfieldArgs(Args, CmdArgs); if (const Arg *A = Args.getLastArg(clang::driver::options::OPT_mtune_EQ)) { - StringRef Name = A->getValue(); - - std::string TuneCPU; - if (Name == "native") - TuneCPU = std::string(llvm::sys::getHostCPUName()); + CmdArgs.push_back("-tune-cpu"); + if (strcmp(A->getValue(), "native") == 0) + CmdArgs.push_back(Args.MakeArgString(llvm::sys::getHostCPUName())); else - TuneCPU = std::string(Name); - - if (!TuneCPU.empty()) { - CmdArgs.push_back("-tune-cpu"); - CmdArgs.push_back(Args.MakeArgString(TuneCPU)); - } + CmdArgs.push_back(A->getValue()); } AddUnalignedAccessWarning(CmdArgs); @@ -2167,18 +2181,11 @@ void Clang::AddRISCVTargetArgs(const ArgList &Args, SetRISCVSmallDataLimit(getToolChain(), Args, CmdArgs); - std::string TuneCPU; - - if (const Arg *A = Args.getLastArg(clang::driver::options::OPT_mtune_EQ)) { - StringRef Name = A->getValue(); - - Name = llvm::RISCV::resolveTuneCPUAlias(Name, Triple.isArch64Bit()); - TuneCPU = std::string(Name); - } - - if (!TuneCPU.empty()) { + if (const Arg *A = Args.getLastArg(options::OPT_mtune_EQ)) { + StringRef Name = + llvm::RISCV::resolveTuneCPUAlias(A->getValue(), Triple.isArch64Bit()); CmdArgs.push_back("-tune-cpu"); - CmdArgs.push_back(Args.MakeArgString(TuneCPU)); + CmdArgs.push_back(Name.data()); } } @@ -2202,19 +2209,12 @@ void Clang::AddSparcTargetArgs(const ArgList &Args, void Clang::AddSystemZTargetArgs(const ArgList &Args, ArgStringList &CmdArgs) const { - if (const Arg *A = Args.getLastArg(clang::driver::options::OPT_mtune_EQ)) { - StringRef Name = A->getValue(); - - std::string TuneCPU; - if (Name == "native") - TuneCPU = std::string(llvm::sys::getHostCPUName()); + if (const Arg *A = Args.getLastArg(options::OPT_mtune_EQ)) { + CmdArgs.push_back("-tune-cpu"); + if (strcmp(A->getValue(), "native") == 0) + CmdArgs.push_back(Args.MakeArgString(llvm::sys::getHostCPUName())); else - TuneCPU = std::string(Name); - - if (!TuneCPU.empty()) { - CmdArgs.push_back("-tune-cpu"); - CmdArgs.push_back(Args.MakeArgString(TuneCPU)); - } + CmdArgs.push_back(A->getValue()); } bool HasBackchain = @@ -3490,6 +3490,7 @@ static void RenderHLSLOptions(const ArgList &Args, ArgStringList &CmdArgs, types::ID InputType) { const unsigned ForwardedArguments[] = {options::OPT_dxil_validator_version, options::OPT_D, + options::OPT_I, options::OPT_S, options::OPT_emit_llvm, options::OPT_disable_llvm_passes, @@ -3985,6 +3986,9 @@ static void RenderDiagnosticsOptions(const Driver &D, const ArgList &Args, if (const Arg *A = Args.getLastArg(options::OPT_fdiagnostics_format_EQ)) { CmdArgs.push_back("-fdiagnostics-format"); CmdArgs.push_back(A->getValue()); + if (StringRef(A->getValue()) == "sarif" || + StringRef(A->getValue()) == "SARIF") + D.Diag(diag::warn_drv_sarif_format_unstable); } if (const Arg *A = Args.getLastArg( @@ -4030,9 +4034,7 @@ static void RenderDiagnosticsOptions(const Driver &D, const ArgList &Args, options::OPT_fno_spell_checking); } -enum class DwarfFissionKind { None, Split, Single }; - -static DwarfFissionKind getDebugFissionKind(const Driver &D, +DwarfFissionKind tools::getDebugFissionKind(const Driver &D, const ArgList &Args, Arg *&Arg) { Arg = Args.getLastArg(options::OPT_gsplit_dwarf, options::OPT_gsplit_dwarf_EQ, options::OPT_gno_split_dwarf); @@ -5388,9 +5390,6 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, TC.addClangTargetOptions(Args, CmdArgs, JA.getOffloadingDeviceKind()); - // FIXME: Handle -mtune=. - (void)Args.hasArg(options::OPT_mtune_EQ); - if (Arg *A = Args.getLastArg(options::OPT_mcmodel_EQ)) { StringRef CM = A->getValue(); if (CM == "small" || CM == "kernel" || CM == "medium" || CM == "large" || @@ -5837,12 +5836,7 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, CmdArgs.push_back(A->getValue()); } - if (Args.hasArg(options::OPT_funstable)) { - CmdArgs.push_back("-funstable"); - if (!Args.hasArg(options::OPT_fno_coroutines_ts)) - CmdArgs.push_back("-fcoroutines-ts"); - CmdArgs.push_back("-fmodules-ts"); - } + Args.AddLastArg(CmdArgs, options::OPT_fexperimental_library); if (Args.hasArg(options::OPT_fexperimental_new_constant_interpreter)) CmdArgs.push_back("-fexperimental-new-constant-interpreter"); @@ -6209,6 +6203,7 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, Args.AddLastArg(CmdArgs, options::OPT_ftime_report_EQ); Args.AddLastArg(CmdArgs, options::OPT_ftime_trace); Args.AddLastArg(CmdArgs, options::OPT_ftime_trace_granularity_EQ); + Args.AddLastArg(CmdArgs, options::OPT_ftime_trace_EQ); Args.AddLastArg(CmdArgs, options::OPT_ftrapv); Args.AddLastArg(CmdArgs, options::OPT_malign_double); Args.AddLastArg(CmdArgs, options::OPT_fno_temp_file); @@ -6243,6 +6238,8 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, Args.AddLastArg(CmdArgs, options::OPT_funroll_loops, options::OPT_fno_unroll_loops); + Args.AddLastArg(CmdArgs, options::OPT_fstrict_flex_arrays_EQ); + Args.AddLastArg(CmdArgs, options::OPT_pthread); if (Args.hasFlag(options::OPT_mspeculative_load_hardening, @@ -6986,7 +6983,7 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, CmdArgs.push_back("-fcuda-include-gpubinary"); CmdArgs.push_back(CudaDeviceInput->getFilename()); } else if (!HostOffloadingInputs.empty()) { - if (IsCuda && !IsRDCMode) { + if ((IsCuda || IsHIP) && !IsRDCMode) { assert(HostOffloadingInputs.size() == 1 && "Only one input expected"); CmdArgs.push_back("-fcuda-include-gpubinary"); CmdArgs.push_back(HostOffloadingInputs.front().getFilename()); @@ -8448,14 +8445,14 @@ void LinkerWrapper::ConstructJob(Compilation &C, const JobAction &JA, // Forward remarks passes to the LLVM backend in the wrapper. if (const Arg *A = Args.getLastArg(options::OPT_Rpass_EQ)) - CmdArgs.push_back( - Args.MakeArgString(Twine("--pass-remarks=") + A->getValue())); + CmdArgs.push_back(Args.MakeArgString(Twine("--offload-opt=-pass-remarks=") + + A->getValue())); if (const Arg *A = Args.getLastArg(options::OPT_Rpass_missed_EQ)) - CmdArgs.push_back( - Args.MakeArgString(Twine("--pass-remarks-missed=") + A->getValue())); + CmdArgs.push_back(Args.MakeArgString( + Twine("--offload-opt=-pass-remarks-missed=") + A->getValue())); if (const Arg *A = Args.getLastArg(options::OPT_Rpass_analysis_EQ)) - CmdArgs.push_back( - Args.MakeArgString(Twine("--pass-remarks-analysis=") + A->getValue())); + CmdArgs.push_back(Args.MakeArgString( + Twine("--offload-opt=-pass-remarks-analysis=") + A->getValue())); if (Args.getLastArg(options::OPT_save_temps_EQ)) CmdArgs.push_back("--save-temps"); diff --git a/clang/lib/Driver/ToolChains/Clang.h b/clang/lib/Driver/ToolChains/Clang.h index 37263efd57a5..5209c6687599 100644 --- a/clang/lib/Driver/ToolChains/Clang.h +++ b/clang/lib/Driver/ToolChains/Clang.h @@ -198,6 +198,12 @@ public: const char *LinkingOutput) const override; }; +enum class DwarfFissionKind { None, Split, Single }; + +DwarfFissionKind getDebugFissionKind(const Driver &D, + const llvm::opt::ArgList &Args, + llvm::opt::Arg *&Arg); + } // end namespace tools } // end namespace driver diff --git a/clang/lib/Driver/ToolChains/CloudABI.cpp b/clang/lib/Driver/ToolChains/CloudABI.cpp index 501e3a382ec1..9fd0529a3297 100644 --- a/clang/lib/Driver/ToolChains/CloudABI.cpp +++ b/clang/lib/Driver/ToolChains/CloudABI.cpp @@ -117,6 +117,8 @@ void CloudABI::addLibCxxIncludePaths(const llvm::opt::ArgList &DriverArgs, void CloudABI::AddCXXStdlibLibArgs(const ArgList &Args, ArgStringList &CmdArgs) const { CmdArgs.push_back("-lc++"); + if (Args.hasArg(options::OPT_fexperimental_library)) + CmdArgs.push_back("-lc++experimental"); CmdArgs.push_back("-lc++abi"); CmdArgs.push_back("-lunwind"); } diff --git a/clang/lib/Driver/ToolChains/CrossWindows.cpp b/clang/lib/Driver/ToolChains/CrossWindows.cpp index 2b043fbeecda..681a6824dad1 100644 --- a/clang/lib/Driver/ToolChains/CrossWindows.cpp +++ b/clang/lib/Driver/ToolChains/CrossWindows.cpp @@ -273,8 +273,11 @@ AddClangCXXStdlibIncludeArgs(const llvm::opt::ArgList &DriverArgs, void CrossWindowsToolChain:: AddCXXStdlibLibArgs(const llvm::opt::ArgList &Args, llvm::opt::ArgStringList &CmdArgs) const { - if (GetCXXStdlibType(Args) == ToolChain::CST_Libcxx) + if (GetCXXStdlibType(Args) == ToolChain::CST_Libcxx) { CmdArgs.push_back("-lc++"); + if (Args.hasArg(options::OPT_fexperimental_library)) + CmdArgs.push_back("-lc++experimental"); + } } clang::SanitizerMask CrossWindowsToolChain::getSupportedSanitizers() const { diff --git a/clang/lib/Driver/ToolChains/Darwin.cpp b/clang/lib/Driver/ToolChains/Darwin.cpp index c9e773701ac3..bada811daadf 100644 --- a/clang/lib/Driver/ToolChains/Darwin.cpp +++ b/clang/lib/Driver/ToolChains/Darwin.cpp @@ -1141,25 +1141,38 @@ void DarwinClang::AddLinkARCArgs(const ArgList &Args, SmallString<128> P(getDriver().ClangExecutable); llvm::sys::path::remove_filename(P); // 'clang' llvm::sys::path::remove_filename(P); // 'bin' + llvm::sys::path::append(P, "lib", "arc"); // 'libarclite' usually lives in the same toolchain as 'clang'. However, the // Swift open source toolchains for macOS distribute Clang without libarclite. // In that case, to allow the linker to find 'libarclite', we point to the // 'libarclite' in the XcodeDefault toolchain instead. - if (getXcodeDeveloperPath(P).empty()) { - if (const Arg *A = Args.getLastArg(options::OPT_isysroot)) { + if (!getVFS().exists(P)) { + auto updatePath = [&](const Arg *A) { // Try to infer the path to 'libarclite' in the toolchain from the // specified SDK path. StringRef XcodePathForSDK = getXcodeDeveloperPath(A->getValue()); - if (!XcodePathForSDK.empty()) { - P = XcodePathForSDK; - llvm::sys::path::append(P, "Toolchains/XcodeDefault.xctoolchain/usr"); - } + if (XcodePathForSDK.empty()) + return false; + + P = XcodePathForSDK; + llvm::sys::path::append(P, "Toolchains/XcodeDefault.xctoolchain/usr", + "lib", "arc"); + return getVFS().exists(P); + }; + + bool updated = false; + if (const Arg *A = Args.getLastArg(options::OPT_isysroot)) + updated = updatePath(A); + + if (!updated) { + if (const Arg *A = Args.getLastArg(options::OPT__sysroot_EQ)) + updatePath(A); } } CmdArgs.push_back("-force_load"); - llvm::sys::path::append(P, "lib", "arc", "libarclite_"); + llvm::sys::path::append(P, "libarclite_"); // Mash in the platform. if (isTargetWatchOSSimulator()) P += "watchsimulator"; @@ -2448,6 +2461,7 @@ void DarwinClang::AddClangCXXStdlibIncludeArgs( break; } } + void DarwinClang::AddCXXStdlibLibArgs(const ArgList &Args, ArgStringList &CmdArgs) const { CXXStdlibType Type = GetCXXStdlibType(Args); @@ -2455,6 +2469,8 @@ void DarwinClang::AddCXXStdlibLibArgs(const ArgList &Args, switch (Type) { case ToolChain::CST_Libcxx: CmdArgs.push_back("-lc++"); + if (Args.hasArg(options::OPT_fexperimental_library)) + CmdArgs.push_back("-lc++experimental"); break; case ToolChain::CST_Libstdcxx: diff --git a/clang/lib/Driver/ToolChains/DragonFly.cpp b/clang/lib/Driver/ToolChains/DragonFly.cpp index 8cfec6a6c4e0..ba901407715f 100644 --- a/clang/lib/Driver/ToolChains/DragonFly.cpp +++ b/clang/lib/Driver/ToolChains/DragonFly.cpp @@ -69,7 +69,7 @@ void dragonfly::Linker::ConstructJob(Compilation &C, const JobAction &JA, CmdArgs.push_back("-export-dynamic"); if (Args.hasArg(options::OPT_shared)) CmdArgs.push_back("-Bshareable"); - else { + else if (!Args.hasArg(options::OPT_r)) { CmdArgs.push_back("-dynamic-linker"); CmdArgs.push_back("/usr/libexec/ld-elf.so.2"); } diff --git a/clang/lib/Driver/ToolChains/FreeBSD.cpp b/clang/lib/Driver/ToolChains/FreeBSD.cpp index 79e3c5cbca5f..e5451c20a00c 100644 --- a/clang/lib/Driver/ToolChains/FreeBSD.cpp +++ b/clang/lib/Driver/ToolChains/FreeBSD.cpp @@ -170,7 +170,7 @@ void freebsd::Linker::ConstructJob(Compilation &C, const JobAction &JA, CmdArgs.push_back("-export-dynamic"); if (Args.hasArg(options::OPT_shared)) { CmdArgs.push_back("-Bshareable"); - } else { + } else if (!Args.hasArg(options::OPT_r)) { CmdArgs.push_back("-dynamic-linker"); CmdArgs.push_back("/libexec/ld-elf.so.1"); } @@ -389,10 +389,10 @@ FreeBSD::FreeBSD(const Driver &D, const llvm::Triple &Triple, // back to '/usr/lib' if it doesn't exist. if ((Triple.getArch() == llvm::Triple::x86 || Triple.isMIPS32() || Triple.isPPC32()) && - D.getVFS().exists(getDriver().SysRoot + "/usr/lib32/crt1.o")) - getFilePaths().push_back(getDriver().SysRoot + "/usr/lib32"); + D.getVFS().exists(concat(getDriver().SysRoot, "/usr/lib32/crt1.o"))) + getFilePaths().push_back(concat(getDriver().SysRoot, "/usr/lib32")); else - getFilePaths().push_back(getDriver().SysRoot + "/usr/lib"); + getFilePaths().push_back(concat(getDriver().SysRoot, "/usr/lib")); } ToolChain::CXXStdlibType FreeBSD::GetDefaultCXXStdlibType() const { @@ -411,14 +411,14 @@ unsigned FreeBSD::GetDefaultDwarfVersion() const { void FreeBSD::addLibCxxIncludePaths(const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args) const { addSystemInclude(DriverArgs, CC1Args, - getDriver().SysRoot + "/usr/include/c++/v1"); + concat(getDriver().SysRoot, "/usr/include/c++/v1")); } void FreeBSD::addLibStdCxxIncludePaths( const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args) const { - addLibStdCXXIncludePaths(getDriver().SysRoot + "/usr/include/c++/4.2", "", "", - DriverArgs, CC1Args); + addLibStdCXXIncludePaths(concat(getDriver().SysRoot, "/usr/include/c++/4.2"), + "", "", DriverArgs, CC1Args); } void FreeBSD::AddCXXStdlibLibArgs(const ArgList &Args, @@ -430,6 +430,8 @@ void FreeBSD::AddCXXStdlibLibArgs(const ArgList &Args, switch (Type) { case ToolChain::CST_Libcxx: CmdArgs.push_back(Profiling ? "-lc++_p" : "-lc++"); + if (Args.hasArg(options::OPT_fexperimental_library)) + CmdArgs.push_back("-lc++experimental"); break; case ToolChain::CST_Libstdcxx: diff --git a/clang/lib/Driver/ToolChains/Fuchsia.cpp b/clang/lib/Driver/ToolChains/Fuchsia.cpp index 03ff9fe894c8..d63c69c63b1f 100644 --- a/clang/lib/Driver/ToolChains/Fuchsia.cpp +++ b/clang/lib/Driver/ToolChains/Fuchsia.cpp @@ -101,7 +101,7 @@ void fuchsia::Linker::ConstructJob(Compilation &C, const JobAction &JA, const SanitizerArgs &SanArgs = ToolChain.getSanitizerArgs(Args); - if (!Args.hasArg(options::OPT_shared)) { + if (!Args.hasArg(options::OPT_shared) && !Args.hasArg(options::OPT_r)) { std::string Dyld = D.DyldPrefix; if (SanArgs.needsAsanRt() && SanArgs.needsSharedRt()) Dyld += "asan/"; @@ -417,6 +417,8 @@ void Fuchsia::AddCXXStdlibLibArgs(const ArgList &Args, switch (GetCXXStdlibType(Args)) { case ToolChain::CST_Libcxx: CmdArgs.push_back("-lc++"); + if (Args.hasArg(options::OPT_fexperimental_library)) + CmdArgs.push_back("-lc++experimental"); break; case ToolChain::CST_Libstdcxx: diff --git a/clang/lib/Driver/ToolChains/Hexagon.cpp b/clang/lib/Driver/ToolChains/Hexagon.cpp index ed07e710fc49..93b987c07f29 100644 --- a/clang/lib/Driver/ToolChains/Hexagon.cpp +++ b/clang/lib/Driver/ToolChains/Hexagon.cpp @@ -614,6 +614,8 @@ void HexagonToolChain::AddCXXStdlibLibArgs(const ArgList &Args, switch (Type) { case ToolChain::CST_Libcxx: CmdArgs.push_back("-lc++"); + if (Args.hasArg(options::OPT_fexperimental_library)) + CmdArgs.push_back("-lc++experimental"); CmdArgs.push_back("-lc++abi"); CmdArgs.push_back("-lunwind"); break; diff --git a/clang/lib/Driver/ToolChains/MipsLinux.cpp b/clang/lib/Driver/ToolChains/MipsLinux.cpp index 41b7b839f3b3..9c58583bca77 100644 --- a/clang/lib/Driver/ToolChains/MipsLinux.cpp +++ b/clang/lib/Driver/ToolChains/MipsLinux.cpp @@ -112,6 +112,8 @@ void MipsLLVMToolChain::AddCXXStdlibLibArgs(const ArgList &Args, "Only -lc++ (aka libxx) is supported in this toolchain."); CmdArgs.push_back("-lc++"); + if (Args.hasArg(options::OPT_fexperimental_library)) + CmdArgs.push_back("-lc++experimental"); CmdArgs.push_back("-lc++abi"); CmdArgs.push_back("-lunwind"); } diff --git a/clang/lib/Driver/ToolChains/NaCl.cpp b/clang/lib/Driver/ToolChains/NaCl.cpp index 753459cb230b..38151735ee51 100644 --- a/clang/lib/Driver/ToolChains/NaCl.cpp +++ b/clang/lib/Driver/ToolChains/NaCl.cpp @@ -308,6 +308,8 @@ void NaClToolChain::AddCXXStdlibLibArgs(const ArgList &Args, // if the value is libc++, and emits an error for other values. GetCXXStdlibType(Args); CmdArgs.push_back("-lc++"); + if (Args.hasArg(options::OPT_fexperimental_library)) + CmdArgs.push_back("-lc++experimental"); } void NaClToolChain::addLibCxxIncludePaths( diff --git a/clang/lib/Driver/ToolChains/NetBSD.cpp b/clang/lib/Driver/ToolChains/NetBSD.cpp index d1eda14a51f0..ac90ed49b8a5 100644 --- a/clang/lib/Driver/ToolChains/NetBSD.cpp +++ b/clang/lib/Driver/ToolChains/NetBSD.cpp @@ -139,7 +139,7 @@ void netbsd::Linker::ConstructJob(Compilation &C, const JobAction &JA, CmdArgs.push_back("-export-dynamic"); if (Args.hasArg(options::OPT_shared)) { CmdArgs.push_back("-Bshareable"); - } else { + } else if (!Args.hasArg(options::OPT_r)) { Args.AddAllArgs(CmdArgs, options::OPT_pie); CmdArgs.push_back("-dynamic-linker"); CmdArgs.push_back("/libexec/ld.elf_so"); diff --git a/clang/lib/Driver/ToolChains/OpenBSD.cpp b/clang/lib/Driver/ToolChains/OpenBSD.cpp index 54cf3cc89caf..8b3a40606ff3 100644 --- a/clang/lib/Driver/ToolChains/OpenBSD.cpp +++ b/clang/lib/Driver/ToolChains/OpenBSD.cpp @@ -147,7 +147,7 @@ void openbsd::Linker::ConstructJob(Compilation &C, const JobAction &JA, CmdArgs.push_back("-Bdynamic"); if (Args.hasArg(options::OPT_shared)) { CmdArgs.push_back("-shared"); - } else { + } else if (!Args.hasArg(options::OPT_r)) { CmdArgs.push_back("-dynamic-linker"); CmdArgs.push_back("/usr/libexec/ld.so"); } @@ -284,7 +284,7 @@ SanitizerMask OpenBSD::getSupportedSanitizers() const { OpenBSD::OpenBSD(const Driver &D, const llvm::Triple &Triple, const ArgList &Args) : Generic_ELF(D, Triple, Args) { - getFilePaths().push_back(getDriver().SysRoot + "/usr/lib"); + getFilePaths().push_back(concat(getDriver().SysRoot, "/usr/lib")); } void OpenBSD::AddClangSystemIncludeArgs( @@ -317,13 +317,14 @@ void OpenBSD::AddClangSystemIncludeArgs( return; } - addExternCSystemInclude(DriverArgs, CC1Args, D.SysRoot + "/usr/include"); + addExternCSystemInclude(DriverArgs, CC1Args, + concat(D.SysRoot, "/usr/include")); } void OpenBSD::addLibCxxIncludePaths(const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args) const { addSystemInclude(DriverArgs, CC1Args, - getDriver().SysRoot + "/usr/include/c++/v1"); + concat(getDriver().SysRoot, "/usr/include/c++/v1")); } void OpenBSD::AddCXXStdlibLibArgs(const ArgList &Args, @@ -331,6 +332,8 @@ void OpenBSD::AddCXXStdlibLibArgs(const ArgList &Args, bool Profiling = Args.hasArg(options::OPT_pg); CmdArgs.push_back(Profiling ? "-lc++_p" : "-lc++"); + if (Args.hasArg(options::OPT_fexperimental_library)) + CmdArgs.push_back("-lc++experimental"); CmdArgs.push_back(Profiling ? "-lc++abi_p" : "-lc++abi"); CmdArgs.push_back(Profiling ? "-lpthread_p" : "-lpthread"); } diff --git a/clang/lib/Driver/ToolChains/VEToolchain.cpp b/clang/lib/Driver/ToolChains/VEToolchain.cpp index 1e43796be1ff..9be239262db8 100644 --- a/clang/lib/Driver/ToolChains/VEToolchain.cpp +++ b/clang/lib/Driver/ToolChains/VEToolchain.cpp @@ -141,6 +141,8 @@ void VEToolChain::AddCXXStdlibLibArgs(const ArgList &Args, tools::addArchSpecificRPath(*this, Args, CmdArgs); CmdArgs.push_back("-lc++"); + if (Args.hasArg(options::OPT_fexperimental_library)) + CmdArgs.push_back("-lc++experimental"); CmdArgs.push_back("-lc++abi"); CmdArgs.push_back("-lunwind"); // libc++ requires -lpthread under glibc environment diff --git a/clang/lib/Driver/ToolChains/WebAssembly.cpp b/clang/lib/Driver/ToolChains/WebAssembly.cpp index c5e4d569793c..b051bff87512 100644 --- a/clang/lib/Driver/ToolChains/WebAssembly.cpp +++ b/clang/lib/Driver/ToolChains/WebAssembly.cpp @@ -444,6 +444,8 @@ void WebAssembly::AddCXXStdlibLibArgs(const llvm::opt::ArgList &Args, switch (GetCXXStdlibType(Args)) { case ToolChain::CST_Libcxx: CmdArgs.push_back("-lc++"); + if (Args.hasArg(options::OPT_fexperimental_library)) + CmdArgs.push_back("-lc++experimental"); CmdArgs.push_back("-lc++abi"); break; case ToolChain::CST_Libstdcxx: diff --git a/clang/lib/ExtractAPI/ExtractAPIConsumer.cpp b/clang/lib/ExtractAPI/ExtractAPIConsumer.cpp index bffa66c2d944..1a785182e363 100644 --- a/clang/lib/ExtractAPI/ExtractAPIConsumer.cpp +++ b/clang/lib/ExtractAPI/ExtractAPIConsumer.cpp @@ -303,6 +303,7 @@ public: // Skip templated functions. switch (Decl->getTemplatedKind()) { case FunctionDecl::TK_NonTemplate: + case FunctionDecl::TK_DependentNonTemplate: break; case FunctionDecl::TK_MemberSpecialization: case FunctionDecl::TK_FunctionTemplateSpecialization: diff --git a/clang/lib/Format/ContinuationIndenter.cpp b/clang/lib/Format/ContinuationIndenter.cpp index 1cd28ab073da..651ec80d6196 100644 --- a/clang/lib/Format/ContinuationIndenter.cpp +++ b/clang/lib/Format/ContinuationIndenter.cpp @@ -37,7 +37,7 @@ static bool shouldIndentWrappedSelectorName(const FormatStyle &Style, // Returns the length of everything up to the first possible line break after // the ), ], } or > matching \c Tok. static unsigned getLengthToMatchingParen(const FormatToken &Tok, - const SmallVector<ParenState> &Stack) { + ArrayRef<ParenState> Stack) { // Normally whether or not a break before T is possible is calculated and // stored in T.CanBreakBefore. Braces, array initializers and text proto // messages like `key: < ... >` are an exception: a break is possible @@ -656,6 +656,7 @@ void ContinuationIndenter::addTokenOnCurrentLine(LineState &State, bool DryRun, int PPColumnCorrection = 0; if (Style.IndentPPDirectives == FormatStyle::PPDIS_AfterHash && Previous.is(tok::hash) && State.FirstIndent > 0 && + &Previous == State.Line->First && (State.Line->Type == LT_PreprocessorDirective || State.Line->Type == LT_ImportStatement)) { Spaces += State.FirstIndent; @@ -1190,6 +1191,10 @@ unsigned ContinuationIndenter::getNewLineColumn(const LineState &State) { break; } } + if (NextNonComment->isOneOf(TT_CtorInitializerColon, TT_InheritanceColon, + TT_InheritanceComma)) { + return State.FirstIndent + Style.ConstructorInitializerIndentWidth; + } if ((PreviousNonComment && (PreviousNonComment->ClosesTemplateDeclaration || PreviousNonComment->ClosesRequiresClause || @@ -1264,10 +1269,6 @@ unsigned ContinuationIndenter::getNewLineColumn(const LineState &State) { Style.BreakInheritanceList == FormatStyle::BILS_AfterColon) { return CurrentState.Indent; } - if (NextNonComment->isOneOf(TT_CtorInitializerColon, TT_InheritanceColon, - TT_InheritanceComma)) { - return State.FirstIndent + Style.ConstructorInitializerIndentWidth; - } if (Previous.is(tok::r_paren) && !Current.isBinaryOperator() && !Current.isOneOf(tok::colon, tok::comment)) { return ContinuationIndent; diff --git a/clang/lib/Format/Format.cpp b/clang/lib/Format/Format.cpp index d13907faca43..2659fa2af1a7 100644 --- a/clang/lib/Format/Format.cpp +++ b/clang/lib/Format/Format.cpp @@ -1895,26 +1895,31 @@ private: void removeBraces(SmallVectorImpl<AnnotatedLine *> &Lines, tooling::Replacements &Result) { const auto &SourceMgr = Env.getSourceManager(); + bool EndsWithComment = false; for (AnnotatedLine *Line : Lines) { removeBraces(Line->Children, Result); - if (!Line->Affected) - continue; - for (FormatToken *Token = Line->First; Token && !Token->Finalized; - Token = Token->Next) { - if (!Token->Optional) - continue; - assert(Token->isOneOf(tok::l_brace, tok::r_brace)); - assert(Token->Next || Token == Line->Last); - const auto Start = - Token == Line->Last || - (Token->Next->isOneOf(tok::kw_else, tok::comment) && - Token->Next->NewlinesBefore > 0) - ? Token->WhitespaceRange.getBegin() - : Token->Tok.getLocation(); - const auto Range = - CharSourceRange::getCharRange(Start, Token->Tok.getEndLoc()); - cantFail(Result.add(tooling::Replacement(SourceMgr, Range, ""))); + if (Line->Affected) { + for (FormatToken *Token = Line->First; Token && !Token->Finalized; + Token = Token->Next) { + if (!Token->Optional) + continue; + assert(Token->isOneOf(tok::l_brace, tok::r_brace)); + assert(Token->Previous || Token == Line->First); + const FormatToken *Next = Token->Next; + assert(Next || Token == Line->Last); + const auto Start = + (!Token->Previous && EndsWithComment) || + (Next && !(Next->isOneOf(tok::kw_else, tok::comment) && + Next->NewlinesBefore > 0)) + ? Token->Tok.getLocation() + : Token->WhitespaceRange.getBegin(); + const auto Range = + CharSourceRange::getCharRange(Start, Token->Tok.getEndLoc()); + cantFail(Result.add(tooling::Replacement(SourceMgr, Range, ""))); + } } + assert(Line->Last); + EndsWithComment = Line->Last->is(tok::comment); } } }; diff --git a/clang/lib/Format/TokenAnnotator.cpp b/clang/lib/Format/TokenAnnotator.cpp index 98c012994f45..5991cf23d5dc 100644 --- a/clang/lib/Format/TokenAnnotator.cpp +++ b/clang/lib/Format/TokenAnnotator.cpp @@ -999,7 +999,8 @@ private: FormatToken *Prev = Tok->getPreviousNonComment(); if (!Prev) break; - if (Prev->isOneOf(tok::r_paren, tok::kw_noexcept)) { + if (Prev->isOneOf(tok::r_paren, tok::kw_noexcept) || + Prev->ClosesRequiresClause) { Tok->setType(TT_CtorInitializerColon); } else if (Prev->is(tok::kw_try)) { // Member initializer list within function try block. @@ -2317,7 +2318,15 @@ private: // After right braces, star tokens are likely to be pointers to struct, // union, or class. // struct {} *ptr; - if (PrevToken->is(tok::r_brace) && Tok.is(tok::star)) + // This by itself is not sufficient to distinguish from multiplication + // following a brace-initialized expression, as in: + // int i = int{42} * 2; + // In the struct case, the part of the struct declaration until the `{` and + // the `}` are put on separate unwrapped lines; in the brace-initialized + // case, the matching `{` is on the same unwrapped line, so check for the + // presence of the matching brace to distinguish between those. + if (PrevToken->is(tok::r_brace) && Tok.is(tok::star) && + !PrevToken->MatchingParen) return TT_PointerOrReference; // For "} &&" diff --git a/clang/lib/Format/UnwrappedLineParser.cpp b/clang/lib/Format/UnwrappedLineParser.cpp index 97c3d86282a0..83b4f1e7991f 100644 --- a/clang/lib/Format/UnwrappedLineParser.cpp +++ b/clang/lib/Format/UnwrappedLineParser.cpp @@ -539,7 +539,7 @@ bool UnwrappedLineParser::parseLevel(const FormatToken *OpeningBrace, break; case tok::r_brace: if (OpeningBrace) { - if (!Style.RemoveBracesLLVM || + if (!Style.RemoveBracesLLVM || Line->InPPDirective || !OpeningBrace->isOneOf(TT_ControlStatementLBrace, TT_ElseLBrace)) { return false; } @@ -2119,6 +2119,7 @@ bool UnwrappedLineParser::tryToParseLambda() { case tok::amp: case tok::star: case tok::kw_const: + case tok::kw_constexpr: case tok::comma: case tok::less: case tok::greater: diff --git a/clang/lib/Frontend/FrontendAction.cpp b/clang/lib/Frontend/FrontendAction.cpp index ed3e314cc73b..7b07ab948f64 100644 --- a/clang/lib/Frontend/FrontendAction.cpp +++ b/clang/lib/Frontend/FrontendAction.cpp @@ -414,7 +414,7 @@ static std::error_code collectModuleHeaderIncludes( // Sort header paths and make the header inclusion order deterministic // across different OSs and filesystems. - llvm::sort(Headers.begin(), Headers.end(), llvm::less_first()); + llvm::sort(Headers, llvm::less_first()); for (auto &H : Headers) { // Include this header as part of the umbrella directory. Module->addTopHeader(H.second); @@ -1205,4 +1205,3 @@ bool WrapperFrontendAction::hasCodeCompletionSupport() const { WrapperFrontendAction::WrapperFrontendAction( std::unique_ptr<FrontendAction> WrappedAction) : WrappedAction(std::move(WrappedAction)) {} - diff --git a/clang/lib/Frontend/InitPreprocessor.cpp b/clang/lib/Frontend/InitPreprocessor.cpp index d0360696ff9c..20bfbf144a30 100644 --- a/clang/lib/Frontend/InitPreprocessor.cpp +++ b/clang/lib/Frontend/InitPreprocessor.cpp @@ -298,12 +298,12 @@ static void DefineFastIntType(unsigned TypeWidth, bool IsSigned, /// Get the value the ATOMIC_*_LOCK_FREE macro should have for a type with /// the specified properties. -static const char *getLockFreeValue(unsigned TypeWidth, unsigned TypeAlign, - unsigned InlineWidth) { +static const char *getLockFreeValue(unsigned TypeWidth, unsigned InlineWidth) { // Fully-aligned, power-of-2 sizes no larger than the inline // width will be inlined as lock-free operations. - if (TypeWidth == TypeAlign && (TypeWidth & (TypeWidth - 1)) == 0 && - TypeWidth <= InlineWidth) + // Note: we do not need to check alignment since _Atomic(T) is always + // appropriately-aligned in clang. + if ((TypeWidth & (TypeWidth - 1)) == 0 && TypeWidth <= InlineWidth) return "2"; // "always lock free" // We cannot be certain what operations the lib calls might be // able to implement as lock-free on future processors. @@ -829,15 +829,8 @@ static void InitializePredefinedMacros(const TargetInfo &TI, if (LangOpts.ObjCRuntime.getKind() == ObjCRuntime::ObjFW) { VersionTuple tuple = LangOpts.ObjCRuntime.getVersion(); - - unsigned minor = 0; - if (tuple.getMinor()) - minor = tuple.getMinor().value(); - - unsigned subminor = 0; - if (tuple.getSubminor()) - subminor = tuple.getSubminor().value(); - + unsigned minor = tuple.getMinor().value_or(0); + unsigned subminor = tuple.getSubminor().value_or(0); Builder.defineMacro("__OBJFW_RUNTIME_ABI__", Twine(tuple.getMajor() * 10000 + minor * 100 + subminor)); @@ -1149,7 +1142,6 @@ static void InitializePredefinedMacros(const TargetInfo &TI, #define DEFINE_LOCK_FREE_MACRO(TYPE, Type) \ Builder.defineMacro(Prefix + #TYPE "_LOCK_FREE", \ getLockFreeValue(TI.get##Type##Width(), \ - TI.get##Type##Align(), \ InlineWidthBits)); DEFINE_LOCK_FREE_MACRO(BOOL, Bool); DEFINE_LOCK_FREE_MACRO(CHAR, Char); @@ -1164,7 +1156,6 @@ static void InitializePredefinedMacros(const TargetInfo &TI, DEFINE_LOCK_FREE_MACRO(LLONG, LongLong); Builder.defineMacro(Prefix + "POINTER_LOCK_FREE", getLockFreeValue(TI.getPointerWidth(0), - TI.getPointerAlign(0), InlineWidthBits)); #undef DEFINE_LOCK_FREE_MACRO }; diff --git a/clang/lib/Frontend/TextDiagnostic.cpp b/clang/lib/Frontend/TextDiagnostic.cpp index 6c0ea0cde358..ab0dbcef6534 100644 --- a/clang/lib/Frontend/TextDiagnostic.cpp +++ b/clang/lib/Frontend/TextDiagnostic.cpp @@ -815,6 +815,7 @@ void TextDiagnostic::emitDiagnosticLoc(FullSourceLoc Loc, PresumedLoc PLoc, emitFilename(PLoc.getFilename(), Loc.getManager()); switch (DiagOpts->getFormat()) { + case DiagnosticOptions::SARIF: case DiagnosticOptions::Clang: if (DiagOpts->ShowLine) OS << ':' << LineNo; @@ -837,6 +838,7 @@ void TextDiagnostic::emitDiagnosticLoc(FullSourceLoc Loc, PresumedLoc PLoc, OS << ColNo; } switch (DiagOpts->getFormat()) { + case DiagnosticOptions::SARIF: case DiagnosticOptions::Clang: case DiagnosticOptions::Vi: OS << ':'; break; case DiagnosticOptions::MSVC: diff --git a/clang/lib/Headers/__clang_cuda_intrinsics.h b/clang/lib/Headers/__clang_cuda_intrinsics.h index cfd5eb869e34..b87413e12a27 100644 --- a/clang/lib/Headers/__clang_cuda_intrinsics.h +++ b/clang/lib/Headers/__clang_cuda_intrinsics.h @@ -71,8 +71,8 @@ } \ inline __device__ unsigned long long __FnName( \ unsigned long long __val, __Type __offset, int __width = warpSize) { \ - return static_cast<unsigned long long>(::__FnName( \ - static_cast<unsigned long long>(__val), __offset, __width)); \ + return static_cast<unsigned long long>( \ + ::__FnName(static_cast<long long>(__val), __offset, __width)); \ } \ inline __device__ double __FnName(double __val, __Type __offset, \ int __width = warpSize) { \ @@ -139,8 +139,8 @@ __MAKE_SHUFFLES(__shfl_xor, __nvvm_shfl_bfly_i32, __nvvm_shfl_bfly_f32, 0x1f, inline __device__ unsigned long long __FnName( \ unsigned int __mask, unsigned long long __val, __Type __offset, \ int __width = warpSize) { \ - return static_cast<unsigned long long>(::__FnName( \ - __mask, static_cast<unsigned long long>(__val), __offset, __width)); \ + return static_cast<unsigned long long>( \ + ::__FnName(__mask, static_cast<long long>(__val), __offset, __width)); \ } \ inline __device__ long __FnName(unsigned int __mask, long __val, \ __Type __offset, int __width = warpSize) { \ diff --git a/clang/lib/Headers/ppc_wrappers/emmintrin.h b/clang/lib/Headers/ppc_wrappers/emmintrin.h index 8c6aa23c1a0d..a4c458a41bcf 100644 --- a/clang/lib/Headers/ppc_wrappers/emmintrin.h +++ b/clang/lib/Headers/ppc_wrappers/emmintrin.h @@ -36,7 +36,8 @@ #ifndef EMMINTRIN_H_ #define EMMINTRIN_H_ -#if defined(__ppc64__) && (defined(__linux__) || defined(__FreeBSD__)) +#if defined(__ppc64__) && \ + (defined(__linux__) || defined(__FreeBSD__) || defined(_AIX)) #include <altivec.h> @@ -2261,7 +2262,7 @@ extern __inline __m128d #else #include_next <emmintrin.h> -#endif /* defined(__ppc64__) && (defined(__linux__) || defined(__FreeBSD__)) \ - */ +#endif /* defined(__ppc64__) && + * (defined(__linux__) || defined(__FreeBSD__) || defined(_AIX)) */ #endif /* EMMINTRIN_H_ */ diff --git a/clang/lib/Headers/ppc_wrappers/mm_malloc.h b/clang/lib/Headers/ppc_wrappers/mm_malloc.h index 29c1de4a83e1..65920917f3bd 100644 --- a/clang/lib/Headers/ppc_wrappers/mm_malloc.h +++ b/clang/lib/Headers/ppc_wrappers/mm_malloc.h @@ -10,7 +10,8 @@ #ifndef _MM_MALLOC_H_INCLUDED #define _MM_MALLOC_H_INCLUDED -#if defined(__ppc64__) && (defined(__linux__) || defined(__FreeBSD__)) +#if defined(__ppc64__) && \ + (defined(__linux__) || defined(__FreeBSD__) || defined(_AIX)) #include <stdlib.h> diff --git a/clang/lib/Headers/ppc_wrappers/mmintrin.h b/clang/lib/Headers/ppc_wrappers/mmintrin.h index 6f9c137b6a09..70e8b81e11ee 100644 --- a/clang/lib/Headers/ppc_wrappers/mmintrin.h +++ b/clang/lib/Headers/ppc_wrappers/mmintrin.h @@ -35,7 +35,8 @@ #ifndef _MMINTRIN_H_INCLUDED #define _MMINTRIN_H_INCLUDED -#if defined(__ppc64__) && (defined(__linux__) || defined(__FreeBSD__)) +#if defined(__ppc64__) && \ + (defined(__linux__) || defined(__FreeBSD__) || defined(_AIX)) #include <altivec.h> /* The Intel API is flexible enough that we must allow aliasing with other @@ -1446,7 +1447,7 @@ extern __inline __m64 #else #include_next <mmintrin.h> -#endif /* defined(__ppc64__) && (defined(__linux__) || defined(__FreeBSD__)) \ - */ +#endif /* defined(__ppc64__) && + * (defined(__linux__) || defined(__FreeBSD__) || defined(_AIX)) */ #endif /* _MMINTRIN_H_INCLUDED */ diff --git a/clang/lib/Headers/ppc_wrappers/pmmintrin.h b/clang/lib/Headers/ppc_wrappers/pmmintrin.h index 889f57ae89d8..fda39edbaa22 100644 --- a/clang/lib/Headers/ppc_wrappers/pmmintrin.h +++ b/clang/lib/Headers/ppc_wrappers/pmmintrin.h @@ -39,7 +39,8 @@ #ifndef PMMINTRIN_H_ #define PMMINTRIN_H_ -#if defined(__ppc64__) && (defined(__linux__) || defined(__FreeBSD__)) +#if defined(__ppc64__) && \ + (defined(__linux__) || defined(__FreeBSD__) || defined(_AIX)) /* We need definitions from the SSE2 and SSE header files*/ #include <emmintrin.h> @@ -138,7 +139,7 @@ extern __inline __m128i #else #include_next <pmmintrin.h> -#endif /* defined(__ppc64__) && (defined(__linux__) || defined(__FreeBSD__)) \ - */ +#endif /* defined(__ppc64__) && + * (defined(__linux__) || defined(__FreeBSD__) || defined(_AIX)) */ #endif /* PMMINTRIN_H_ */ diff --git a/clang/lib/Headers/ppc_wrappers/smmintrin.h b/clang/lib/Headers/ppc_wrappers/smmintrin.h index 694d5aa06940..6fe6c8a93d9b 100644 --- a/clang/lib/Headers/ppc_wrappers/smmintrin.h +++ b/clang/lib/Headers/ppc_wrappers/smmintrin.h @@ -29,7 +29,8 @@ #ifndef SMMINTRIN_H_ #define SMMINTRIN_H_ -#if defined(__ppc64__) && (defined(__linux__) || defined(__FreeBSD__)) +#if defined(__ppc64__) && \ + (defined(__linux__) || defined(__FreeBSD__) || defined(_AIX)) #include <altivec.h> #include <tmmintrin.h> @@ -656,7 +657,7 @@ extern __inline __m128i #else #include_next <smmintrin.h> -#endif /* defined(__ppc64__) && (defined(__linux__) || defined(__FreeBSD__)) \ - */ +#endif /* defined(__ppc64__) && + * (defined(__linux__) || defined(__FreeBSD__) || defined(_AIX)) */ #endif /* SMMINTRIN_H_ */ diff --git a/clang/lib/Headers/ppc_wrappers/tmmintrin.h b/clang/lib/Headers/ppc_wrappers/tmmintrin.h index 1725eb9b8f64..6185ca1e7e71 100644 --- a/clang/lib/Headers/ppc_wrappers/tmmintrin.h +++ b/clang/lib/Headers/ppc_wrappers/tmmintrin.h @@ -25,7 +25,8 @@ #ifndef TMMINTRIN_H_ #define TMMINTRIN_H_ -#if defined(__ppc64__) && (defined(__linux__) || defined(__FreeBSD__)) +#if defined(__ppc64__) && \ + (defined(__linux__) || defined(__FreeBSD__) || defined(_AIX)) #include <altivec.h> @@ -446,7 +447,7 @@ extern __inline __m64 #else #include_next <tmmintrin.h> -#endif /* defined(__ppc64__) && (defined(__linux__) || defined(__FreeBSD__)) \ - */ +#endif /* defined(__ppc64__) && + * (defined(__linux__) || defined(__FreeBSD__) || defined(_AIX)) */ #endif /* TMMINTRIN_H_ */ diff --git a/clang/lib/Headers/ppc_wrappers/xmmintrin.h b/clang/lib/Headers/ppc_wrappers/xmmintrin.h index 8bf29777b79c..ee0032ca159c 100644 --- a/clang/lib/Headers/ppc_wrappers/xmmintrin.h +++ b/clang/lib/Headers/ppc_wrappers/xmmintrin.h @@ -35,7 +35,8 @@ #ifndef XMMINTRIN_H_ #define XMMINTRIN_H_ -#if defined(__ppc64__) && (defined(__linux__) || defined(__FreeBSD__)) +#if defined(__ppc64__) && \ + (defined(__linux__) || defined(__FreeBSD__) || defined(_AIX)) /* Define four value permute mask */ #define _MM_SHUFFLE(w, x, y, z) (((w) << 6) | ((x) << 4) | ((y) << 2) | (z)) @@ -1820,7 +1821,7 @@ extern __inline void #else #include_next <xmmintrin.h> -#endif /* defined(__ppc64__) && (defined(__linux__) || defined(__FreeBSD__)) \ - */ +#endif /* defined(__ppc64__) && + * (defined(__linux__) || defined(__FreeBSD__) || defined(_AIX)) */ #endif /* XMMINTRIN_H_ */ diff --git a/clang/lib/Headers/x86gprintrin.h b/clang/lib/Headers/x86gprintrin.h index 01e741f1eb61..2c2fbb97c9ac 100644 --- a/clang/lib/Headers/x86gprintrin.h +++ b/clang/lib/Headers/x86gprintrin.h @@ -25,11 +25,29 @@ #include <crc32intrin.h> #endif -#define __SSC_MARK(Tag) \ - __asm__ __volatile__("mov {%%ebx, %%eax|eax, ebx}; " \ - "mov {%0, %%ebx|ebx, %0}; " \ +#if defined(__i386__) +#define __FULLBX "ebx" +#define __TMPGPR "eax" +#else +// When in 64-bit target, the 32-bit operands generate a 32-bit result, +// zero-extended to a 64-bit result in the destination general-purpose, +// It means "mov x %ebx" will clobber the higher 32 bits of rbx, so we +// should preserve the 64-bit register rbx. +#define __FULLBX "rbx" +#define __TMPGPR "rax" +#endif + +#define __MOVEGPR(__r1, __r2) "mov {%%"__r1 ", %%"__r2 "|"__r2 ", "__r1"};" + +#define __SAVE_GPRBX __MOVEGPR(__FULLBX, __TMPGPR) +#define __RESTORE_GPRBX __MOVEGPR(__TMPGPR, __FULLBX) + +#define __SSC_MARK(__Tag) \ + __asm__ __volatile__( __SAVE_GPRBX \ + "mov {%0, %%ebx|ebx, %0}; " \ ".byte 0x64, 0x67, 0x90; " \ - "mov {%%eax, %%ebx|ebx, eax};" ::"i"(Tag) \ - : "%eax"); + __RESTORE_GPRBX \ + ::"i"(__Tag) \ + : __TMPGPR ); #endif /* __X86GPRINTRIN_H */ diff --git a/clang/lib/Lex/Lexer.cpp b/clang/lib/Lex/Lexer.cpp index b3aac9df6546..a4cff403e739 100644 --- a/clang/lib/Lex/Lexer.cpp +++ b/clang/lib/Lex/Lexer.cpp @@ -1462,11 +1462,11 @@ static bool isAllowedIDChar(uint32_t C, const LangOptions &LangOpts) { return false; } else if (LangOpts.DollarIdents && '$' == C) { return true; - } else if (LangOpts.CPlusPlus) { + } else if (LangOpts.CPlusPlus || LangOpts.C2x) { // A non-leading codepoint must have the XID_Continue property. // XIDContinueRanges doesn't contains characters also in XIDStartRanges, // so we need to check both tables. - // '_' doesn't have the XID_Continue property but is allowed in C++. + // '_' doesn't have the XID_Continue property but is allowed in C and C++. static const llvm::sys::UnicodeCharSet XIDStartChars(XIDStartRanges); static const llvm::sys::UnicodeCharSet XIDContinueChars(XIDContinueRanges); return C == '_' || XIDStartChars.contains(C) || @@ -1486,7 +1486,7 @@ static bool isAllowedInitiallyIDChar(uint32_t C, const LangOptions &LangOpts) { if (LangOpts.AsmPreprocessor) { return false; } - if (LangOpts.CPlusPlus) { + if (LangOpts.CPlusPlus || LangOpts.C2x) { static const llvm::sys::UnicodeCharSet XIDStartChars(XIDStartRanges); // '_' doesn't have the XID_Start property but is allowed in C++. return C == '_' || XIDStartChars.contains(C); diff --git a/clang/lib/Lex/ModuleMap.cpp b/clang/lib/Lex/ModuleMap.cpp index 57e344622f25..47d6f5893e97 100644 --- a/clang/lib/Lex/ModuleMap.cpp +++ b/clang/lib/Lex/ModuleMap.cpp @@ -456,10 +456,8 @@ static bool violatesPrivateInclude(Module *RequestingModule, &Header.getModule()->Headers[Module::HK_Private], &Header.getModule()->Headers[Module::HK_PrivateTextual]}; for (auto *Hs : HeaderList) - IsPrivate |= - std::find_if(Hs->begin(), Hs->end(), [&](const Module::Header &H) { - return H.Entry == IncFileEnt; - }) != Hs->end(); + IsPrivate |= llvm::any_of( + *Hs, [&](const Module::Header &H) { return H.Entry == IncFileEnt; }); assert(IsPrivate && "inconsistent headers and roles"); } #endif diff --git a/clang/lib/Lex/PPDirectives.cpp b/clang/lib/Lex/PPDirectives.cpp index 352e1f217819..9a8fd4391b41 100644 --- a/clang/lib/Lex/PPDirectives.cpp +++ b/clang/lib/Lex/PPDirectives.cpp @@ -1261,7 +1261,16 @@ void Preprocessor::HandleDirective(Token &Result) { return HandleIncludeNextDirective(SavedHash.getLocation(), Result); case tok::pp_warning: - Diag(Result, diag::ext_pp_warning_directive); + if (LangOpts.CPlusPlus) + Diag(Result, LangOpts.CPlusPlus2b + ? diag::warn_cxx2b_compat_warning_directive + : diag::ext_pp_warning_directive) + << /*C++2b*/ 1; + else + Diag(Result, LangOpts.C2x ? diag::warn_c2x_compat_warning_directive + : diag::ext_pp_warning_directive) + << /*C2x*/ 0; + return HandleUserDiagnosticDirective(Result, true); case tok::pp_ident: return HandleIdentSCCSDirective(Result); @@ -1806,22 +1815,14 @@ static void diagnoseAutoModuleImport( Preprocessor &PP, SourceLocation HashLoc, Token &IncludeTok, ArrayRef<std::pair<IdentifierInfo *, SourceLocation>> Path, SourceLocation PathEnd) { - StringRef ImportKeyword; - if (PP.getLangOpts().ObjC) - ImportKeyword = "@import"; - else if (PP.getLangOpts().ModulesTS || PP.getLangOpts().CPlusPlusModules) - ImportKeyword = "import"; - else - return; // no import syntax available - SmallString<128> PathString; for (size_t I = 0, N = Path.size(); I != N; ++I) { if (I) PathString += '.'; PathString += Path[I].first->getName(); } - int IncludeKind = 0; + int IncludeKind = 0; switch (IncludeTok.getIdentifierInfo()->getPPKeywordID()) { case tok::pp_include: IncludeKind = 0; @@ -1843,12 +1844,8 @@ static void diagnoseAutoModuleImport( llvm_unreachable("unknown include directive kind"); } - CharSourceRange ReplaceRange(SourceRange(HashLoc, PathEnd), - /*IsTokenRange=*/false); - PP.Diag(HashLoc, diag::warn_auto_module_import) - << IncludeKind << PathString - << FixItHint::CreateReplacement( - ReplaceRange, (ImportKeyword + " " + PathString + ";").str()); + PP.Diag(HashLoc, diag::remark_pp_include_directive_modular_translation) + << IncludeKind << PathString; } // Given a vector of path components and a string containing the real diff --git a/clang/lib/Parse/ParseAST.cpp b/clang/lib/Parse/ParseAST.cpp index 04b3f0460bf3..5fca029a4266 100644 --- a/clang/lib/Parse/ParseAST.cpp +++ b/clang/lib/Parse/ParseAST.cpp @@ -172,6 +172,29 @@ void clang::ParseAST(Sema &S, bool PrintStats, bool SkipFunctionBodies) { for (Decl *D : S.WeakTopLevelDecls()) Consumer->HandleTopLevelDecl(DeclGroupRef(D)); + // For C++20 modules, the codegen for module initializers needs to be altered + // and to be able to use a name based on the module name. + + // At this point, we should know if we are building a non-header C++20 module. + if (S.getLangOpts().CPlusPlusModules && !S.getLangOpts().IsHeaderFile && + !S.getLangOpts().CurrentModule.empty()) { + // If we are building the module from source, then the top level module + // will be here. + Module *CodegenModule = S.getCurrentModule(); + bool Interface = true; + if (CodegenModule) + // We only use module initializers for interfaces (including partition + // implementation units). + Interface = S.currentModuleIsInterface(); + else + // If we are building the module from a PCM file, then the module can be + // found here. + CodegenModule = S.getPreprocessor().getCurrentModule(); + // If neither. then .... + assert(CodegenModule && "codegen for a module, but don't know which?"); + if (Interface) + S.getASTContext().setModuleForCodeGen(CodegenModule); + } Consumer->HandleTranslationUnit(S.getASTContext()); // Finalize the template instantiation observer chain. diff --git a/clang/lib/Sema/AnalysisBasedWarnings.cpp b/clang/lib/Sema/AnalysisBasedWarnings.cpp index 164fea6a449b..9780a0aba749 100644 --- a/clang/lib/Sema/AnalysisBasedWarnings.cpp +++ b/clang/lib/Sema/AnalysisBasedWarnings.cpp @@ -1575,8 +1575,7 @@ public: // Sort the uses by their SourceLocations. While not strictly // guaranteed to produce them in line/column order, this will provide // a stable ordering. - llvm::sort(vec->begin(), vec->end(), - [](const UninitUse &a, const UninitUse &b) { + llvm::sort(*vec, [](const UninitUse &a, const UninitUse &b) { // Prefer a more confident report over a less confident one. if (a.getKind() != b.getKind()) return a.getKind() > b.getKind(); diff --git a/clang/lib/Sema/CodeCompleteConsumer.cpp b/clang/lib/Sema/CodeCompleteConsumer.cpp index 8e8a1be38c0f..e93be3e04dfe 100644 --- a/clang/lib/Sema/CodeCompleteConsumer.cpp +++ b/clang/lib/Sema/CodeCompleteConsumer.cpp @@ -515,7 +515,8 @@ CodeCompleteConsumer::OverloadCandidate::getFunctionType() const { case CK_FunctionType: return Type; - + case CK_FunctionProtoTypeLoc: + return ProtoTypeLoc.getTypePtr(); case CK_Template: case CK_Aggregate: return nullptr; @@ -524,6 +525,13 @@ CodeCompleteConsumer::OverloadCandidate::getFunctionType() const { llvm_unreachable("Invalid CandidateKind!"); } +const FunctionProtoTypeLoc +CodeCompleteConsumer::OverloadCandidate::getFunctionProtoTypeLoc() const { + if (Kind == CK_FunctionProtoTypeLoc) + return ProtoTypeLoc; + return FunctionProtoTypeLoc(); +} + unsigned CodeCompleteConsumer::OverloadCandidate::getNumParams() const { if (Kind == CK_Template) return Template->getTemplateParameters()->size(); @@ -597,7 +605,12 @@ CodeCompleteConsumer::OverloadCandidate::getParamDecl(unsigned N) const { if (const auto *FD = getFunction()) { if (N < FD->param_size()) return FD->getParamDecl(N); + } else if (Kind == CK_FunctionProtoTypeLoc) { + if (N < ProtoTypeLoc.getNumParams()) { + return ProtoTypeLoc.getParam(N); + } } + return nullptr; } diff --git a/clang/lib/Sema/SemaCUDA.cpp b/clang/lib/Sema/SemaCUDA.cpp index 185ccebe2717..0a49d72ba963 100644 --- a/clang/lib/Sema/SemaCUDA.cpp +++ b/clang/lib/Sema/SemaCUDA.cpp @@ -714,7 +714,7 @@ void Sema::MaybeAddCUDAConstantAttr(VarDecl *VD) { // Do not promote dependent variables since the cotr/dtor/initializer are // not determined. Do it after instantiation. if (getLangOpts().CUDAIsDevice && !VD->hasAttr<CUDAConstantAttr>() && - !VD->hasAttr<CUDAConstantAttr>() && !VD->hasAttr<CUDASharedAttr>() && + !VD->hasAttr<CUDASharedAttr>() && (VD->isFileVarDecl() || VD->isStaticDataMember()) && !IsDependentVar(VD) && ((VD->isConstexpr() || VD->getType().isConstQualified()) && diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp index aed1d9befe2b..dae51d0690e6 100644 --- a/clang/lib/Sema/SemaChecking.cpp +++ b/clang/lib/Sema/SemaChecking.cpp @@ -11883,6 +11883,9 @@ Sema::CheckReturnValExpr(Expr *RetValExp, QualType lhsType, /// warning if the comparison is not likely to do what the programmer intended. void Sema::CheckFloatComparison(SourceLocation Loc, Expr *LHS, Expr *RHS, BinaryOperatorKind Opcode) { + if (!BinaryOperator::isEqualityOp(Opcode)) + return; + // Match and capture subexpressions such as "(float) X == 0.1". FloatingLiteral *FPLiteral; CastExpr *FPCast; @@ -11918,8 +11921,8 @@ void Sema::CheckFloatComparison(SourceLocation Loc, Expr *LHS, Expr *RHS, // Special case: check for x == x (which is OK). // Do not emit warnings for such cases. - if (DeclRefExpr* DRL = dyn_cast<DeclRefExpr>(LeftExprSansParen)) - if (DeclRefExpr* DRR = dyn_cast<DeclRefExpr>(RightExprSansParen)) + if (auto *DRL = dyn_cast<DeclRefExpr>(LeftExprSansParen)) + if (auto *DRR = dyn_cast<DeclRefExpr>(RightExprSansParen)) if (DRL->getDecl() == DRR->getDecl()) return; @@ -15827,11 +15830,26 @@ void Sema::CheckCastAlign(Expr *Op, QualType T, SourceRange TRange) { /// We avoid emitting out-of-bounds access warnings for such arrays as they are /// commonly used to emulate flexible arrays in C89 code. static bool IsTailPaddedMemberArray(Sema &S, const llvm::APInt &Size, - const NamedDecl *ND) { - if (Size != 1 || !ND) return false; + const NamedDecl *ND, + unsigned StrictFlexArraysLevel) { + if (!ND) + return false; + + if (StrictFlexArraysLevel >= 2 && Size != 0) + return false; + + if (StrictFlexArraysLevel == 1 && Size.ule(1)) + return false; + + // FIXME: While the default -fstrict-flex-arrays=0 permits Size>1 trailing + // arrays to be treated as flexible-array-members, we still emit diagnostics + // as if they are not. Pending further discussion... + if (StrictFlexArraysLevel == 0 && Size != 1) + return false; const FieldDecl *FD = dyn_cast<FieldDecl>(ND); - if (!FD) return false; + if (!FD) + return false; // Don't consider sizes resulting from macro expansions or template argument // substitution to form C89 tail-padded arrays. @@ -15854,10 +15872,13 @@ static bool IsTailPaddedMemberArray(Sema &S, const llvm::APInt &Size, } const RecordDecl *RD = dyn_cast<RecordDecl>(FD->getDeclContext()); - if (!RD) return false; - if (RD->isUnion()) return false; + if (!RD) + return false; + if (RD->isUnion()) + return false; if (const CXXRecordDecl *CRD = dyn_cast<CXXRecordDecl>(RD)) { - if (!CRD->isStandardLayout()) return false; + if (!CRD->isStandardLayout()) + return false; } // See if this is the last field decl in the record. @@ -15985,9 +16006,14 @@ void Sema::CheckArrayAccess(const Expr *BaseExpr, const Expr *IndexExpr, // example). In this case we have no information about whether the array // access exceeds the array bounds. However we can still diagnose an array // access which precedes the array bounds. + // + // FIXME: this check should be redundant with the IsUnboundedArray check + // above. if (BaseType->isIncompleteType()) return; + // FIXME: this check should belong to the IsTailPaddedMemberArray call + // below. llvm::APInt size = ArrayTy->getSize(); if (!size.isStrictlyPositive()) return; @@ -16020,10 +16046,9 @@ void Sema::CheckArrayAccess(const Expr *BaseExpr, const Expr *IndexExpr, if (AllowOnePastEnd ? index.ule(size) : index.ult(size)) return; - // Also don't warn for arrays of size 1 which are members of some - // structure. These are often used to approximate flexible arrays in C89 - // code. - if (IsTailPaddedMemberArray(*this, size, ND)) + // Also don't warn for Flexible Array Member emulation. + const unsigned StrictFlexArraysLevel = getLangOpts().StrictFlexArrays; + if (IsTailPaddedMemberArray(*this, size, ND, StrictFlexArraysLevel)) return; // Suppress the warning if the subscript expression (as identified by the diff --git a/clang/lib/Sema/SemaCodeComplete.cpp b/clang/lib/Sema/SemaCodeComplete.cpp index 86bad736227d..8ede7c015315 100644 --- a/clang/lib/Sema/SemaCodeComplete.cpp +++ b/clang/lib/Sema/SemaCodeComplete.cpp @@ -53,6 +53,7 @@ #include "llvm/Support/Casting.h" #include "llvm/Support/Path.h" #include "llvm/Support/raw_ostream.h" + #include <list> #include <map> #include <string> @@ -3722,13 +3723,11 @@ static void AddOverloadAggregateChunks(const RecordDecl *RD, /// Add function overload parameter chunks to the given code completion /// string. -static void AddOverloadParameterChunks(ASTContext &Context, - const PrintingPolicy &Policy, - const FunctionDecl *Function, - const FunctionProtoType *Prototype, - CodeCompletionBuilder &Result, - unsigned CurrentArg, unsigned Start = 0, - bool InOptional = false) { +static void AddOverloadParameterChunks( + ASTContext &Context, const PrintingPolicy &Policy, + const FunctionDecl *Function, const FunctionProtoType *Prototype, + FunctionProtoTypeLoc PrototypeLoc, CodeCompletionBuilder &Result, + unsigned CurrentArg, unsigned Start = 0, bool InOptional = false) { if (!Function && !Prototype) { Result.AddChunk(CodeCompletionString::CK_CurrentParameter, "..."); return; @@ -3747,8 +3746,9 @@ static void AddOverloadParameterChunks(ASTContext &Context, if (!FirstParameter) Opt.AddChunk(CodeCompletionString::CK_Comma); // Optional sections are nested. - AddOverloadParameterChunks(Context, Policy, Function, Prototype, Opt, - CurrentArg, P, /*InOptional=*/true); + AddOverloadParameterChunks(Context, Policy, Function, Prototype, + PrototypeLoc, Opt, CurrentArg, P, + /*InOptional=*/true); Result.AddOptionalChunk(Opt.TakeString()); return; } @@ -3762,8 +3762,10 @@ static void AddOverloadParameterChunks(ASTContext &Context, // Format the placeholder string. std::string Placeholder; - if (Function) { - const ParmVarDecl *Param = Function->getParamDecl(P); + assert(P < Prototype->getNumParams()); + if (Function || PrototypeLoc) { + const ParmVarDecl *Param = + Function ? Function->getParamDecl(P) : PrototypeLoc.getParam(P); Placeholder = FormatFunctionParameter(Policy, Param); if (Param->hasDefaultArg()) Placeholder += GetDefaultValueString(Param, Context.getSourceManager(), @@ -3916,8 +3918,8 @@ CodeCompleteConsumer::OverloadCandidate::CreateSignatureString( if (getKind() == CK_Aggregate) AddOverloadAggregateChunks(getAggregate(), Policy, Result, CurrentArg); else - AddOverloadParameterChunks(S.getASTContext(), Policy, FDecl, Proto, Result, - CurrentArg); + AddOverloadParameterChunks(S.getASTContext(), Policy, FDecl, Proto, + getFunctionProtoTypeLoc(), Result, CurrentArg); Result.AddChunk(Braced ? CodeCompletionString::CK_RightBrace : CodeCompletionString::CK_RightParen); @@ -5998,6 +6000,39 @@ ProduceSignatureHelp(Sema &SemaRef, MutableArrayRef<ResultCandidate> Candidates, return getParamType(SemaRef, Candidates, CurrentArg); } +// Given a callee expression `Fn`, if the call is through a function pointer, +// try to find the declaration of the corresponding function pointer type, +// so that we can recover argument names from it. +static FunctionProtoTypeLoc GetPrototypeLoc(Expr *Fn) { + TypeLoc Target; + if (const auto *T = Fn->getType().getTypePtr()->getAs<TypedefType>()) { + Target = T->getDecl()->getTypeSourceInfo()->getTypeLoc(); + + } else if (const auto *DR = dyn_cast<DeclRefExpr>(Fn)) { + const auto *D = DR->getDecl(); + if (const auto *const VD = dyn_cast<VarDecl>(D)) { + Target = VD->getTypeSourceInfo()->getTypeLoc(); + } + } + + if (!Target) + return {}; + + if (auto P = Target.getAs<PointerTypeLoc>()) { + Target = P.getPointeeLoc(); + } + + if (auto P = Target.getAs<ParenTypeLoc>()) { + Target = P.getInnerLoc(); + } + + if (auto F = Target.getAs<FunctionProtoTypeLoc>()) { + return F; + } + + return {}; +} + QualType Sema::ProduceCallSignatureHelp(Expr *Fn, ArrayRef<Expr *> Args, SourceLocation OpenParLoc) { Fn = unwrapParenList(Fn); @@ -6079,6 +6114,8 @@ QualType Sema::ProduceCallSignatureHelp(Expr *Fn, ArrayRef<Expr *> Args, } else { // Lastly we check whether expression's type is function pointer or // function. + + FunctionProtoTypeLoc P = GetPrototypeLoc(NakedFn); QualType T = NakedFn->getType(); if (!T->getPointeeType().isNull()) T = T->getPointeeType(); @@ -6087,8 +6124,13 @@ QualType Sema::ProduceCallSignatureHelp(Expr *Fn, ArrayRef<Expr *> Args, if (!TooManyArguments(FP->getNumParams(), ArgsWithoutDependentTypes.size(), /*PartialOverloading=*/true) || - FP->isVariadic()) - Results.push_back(ResultCandidate(FP)); + FP->isVariadic()) { + if (P) { + Results.push_back(ResultCandidate(P)); + } else { + Results.push_back(ResultCandidate(FP)); + } + } } else if (auto FT = T->getAs<FunctionType>()) // No prototype and declaration, it may be a K & R style function. Results.push_back(ResultCandidate(FT)); diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp index 5a546503cced..8d2fc5331a0d 100644 --- a/clang/lib/Sema/SemaDecl.cpp +++ b/clang/lib/Sema/SemaDecl.cpp @@ -9411,15 +9411,27 @@ Sema::ActOnFunctionDeclarator(Scope *S, Declarator &D, DeclContext *DC, NewFD->setLocalExternDecl(); if (getLangOpts().CPlusPlus) { + // The rules for implicit inlines changed in C++20 for methods and friends + // with an in-class definition (when such a definition is not attached to + // the global module). User-specified 'inline' overrides this (set when + // the function decl is created above). + // FIXME: We need a better way to separate C++ standard and clang modules. + bool ImplicitInlineCXX20 = !getLangOpts().CPlusPlusModules || + !NewFD->getOwningModule() || + NewFD->getOwningModule()->isGlobalModule() || + NewFD->getOwningModule()->isModuleMapModule(); bool isInline = D.getDeclSpec().isInlineSpecified(); bool isVirtual = D.getDeclSpec().isVirtualSpecified(); bool hasExplicit = D.getDeclSpec().hasExplicitSpecifier(); isFriend = D.getDeclSpec().isFriendSpecified(); if (isFriend && !isInline && D.isFunctionDefinition()) { - // C++ [class.friend]p5 + // Pre-C++20 [class.friend]p5 // A function can be defined in a friend declaration of a // class . . . . Such a function is implicitly inline. - NewFD->setImplicitlyInline(); + // Post C++20 [class.friend]p7 + // Such a function is implicitly an inline function if it is attached + // to the global module. + NewFD->setImplicitlyInline(ImplicitInlineCXX20); } // If this is a method defined in an __interface, and is not a constructor @@ -9702,11 +9714,14 @@ Sema::ActOnFunctionDeclarator(Scope *S, Declarator &D, DeclContext *DC, } if (isa<CXXMethodDecl>(NewFD) && DC == CurContext && - D.isFunctionDefinition()) { - // C++ [class.mfct]p2: + D.isFunctionDefinition() && !isInline) { + // Pre C++20 [class.mfct]p2: // A member function may be defined (8.4) in its class definition, in // which case it is an inline member function (7.1.2) - NewFD->setImplicitlyInline(); + // Post C++20 [class.mfct]p1: + // If a member function is attached to the global module and is defined + // in its class definition, it is inline. + NewFD->setImplicitlyInline(ImplicitInlineCXX20); } if (SC == SC_Static && isa<CXXMethodDecl>(NewFD) && @@ -16194,7 +16209,10 @@ Decl *Sema::ActOnTag(Scope *S, unsigned TagSpec, TagUseKind TUK, ED->setIntegerTypeSourceInfo(TI); else ED->setIntegerType(QualType(EnumUnderlying.get<const Type *>(), 0)); - ED->setPromotionType(ED->getIntegerType()); + QualType EnumTy = ED->getIntegerType(); + ED->setPromotionType(EnumTy->isPromotableIntegerType() + ? Context.getPromotedIntegerType(EnumTy) + : EnumTy); } } else { // struct/union New = RecordDecl::Create(Context, Kind, SearchDC, KWLoc, Loc, Name, @@ -16816,8 +16834,11 @@ CreateNewDecl: if (TypeSourceInfo *TI = EnumUnderlying.dyn_cast<TypeSourceInfo*>()) ED->setIntegerTypeSourceInfo(TI); else - ED->setIntegerType(QualType(EnumUnderlying.get<const Type*>(), 0)); - ED->setPromotionType(ED->getIntegerType()); + ED->setIntegerType(QualType(EnumUnderlying.get<const Type *>(), 0)); + QualType EnumTy = ED->getIntegerType(); + ED->setPromotionType(EnumTy->isPromotableIntegerType() + ? Context.getPromotedIntegerType(EnumTy) + : EnumTy); assert(ED->isComplete() && "enum with type should be complete"); } } else { diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp index 742c4828b8dc..cd5cdbde7f3f 100644 --- a/clang/lib/Sema/SemaExpr.cpp +++ b/clang/lib/Sema/SemaExpr.cpp @@ -12323,7 +12323,7 @@ static QualType checkArithmeticOrEnumeralCompare(Sema &S, ExprResult &LHS, return S.InvalidOperands(Loc, LHS, RHS); // Check for comparisons of floating point operands using != and ==. - if (Type->hasFloatingRepresentation() && BinaryOperator::isEqualityOp(Opc)) + if (Type->hasFloatingRepresentation()) S.CheckFloatComparison(Loc, LHS.get(), RHS.get(), Opc); // The result of comparisons is 'bool' in C++, 'int' in C. @@ -12933,8 +12933,7 @@ QualType Sema::CheckVectorCompareOperands(ExprResult &LHS, ExprResult &RHS, diagnoseTautologicalComparison(*this, Loc, LHS.get(), RHS.get(), Opc); // Check for comparisons of floating point operands using != and ==. - if (BinaryOperator::isEqualityOp(Opc) && - LHSType->hasFloatingRepresentation()) { + if (LHSType->hasFloatingRepresentation()) { assert(RHS.get()->getType()->hasFloatingRepresentation()); CheckFloatComparison(Loc, LHS.get(), RHS.get(), Opc); } @@ -12968,8 +12967,7 @@ QualType Sema::CheckSizelessVectorCompareOperands(ExprResult &LHS, diagnoseTautologicalComparison(*this, Loc, LHS.get(), RHS.get(), Opc); // Check for comparisons of floating point operands using != and ==. - if (BinaryOperator::isEqualityOp(Opc) && - LHSType->hasFloatingRepresentation()) { + if (LHSType->hasFloatingRepresentation()) { assert(RHS.get()->getType()->hasFloatingRepresentation()); CheckFloatComparison(Loc, LHS.get(), RHS.get(), Opc); } @@ -15402,7 +15400,7 @@ ExprResult Sema::BuildBinOp(Scope *S, SourceLocation OpLoc, pty->getKind() == BuiltinType::Overload)) { auto *OE = dyn_cast<OverloadExpr>(LHSExpr); if (OE && !OE->hasTemplateKeyword() && !OE->hasExplicitTemplateArgs() && - std::any_of(OE->decls_begin(), OE->decls_end(), [](NamedDecl *ND) { + llvm::any_of(OE->decls(), [](NamedDecl *ND) { return isa<FunctionTemplateDecl>(ND); })) { Diag(OE->getQualifier() ? OE->getQualifierLoc().getBeginLoc() @@ -19723,7 +19721,6 @@ public: void VisitConstantExpr(ConstantExpr *E) { // Don't mark declarations within a ConstantExpression, as this expression // will be evaluated and folded to a value. - return; } void VisitDeclRefExpr(DeclRefExpr *E) { diff --git a/clang/lib/Sema/SemaExprCXX.cpp b/clang/lib/Sema/SemaExprCXX.cpp index 11f33c7c6363..5331193de863 100644 --- a/clang/lib/Sema/SemaExprCXX.cpp +++ b/clang/lib/Sema/SemaExprCXX.cpp @@ -9006,14 +9006,14 @@ Sema::BuildExprRequirement( cast<TemplateTypeParmDecl>(TPL->getParam(0))->getTypeConstraint() ->getImmediatelyDeclaredConstraint(); ExprResult Constraint = SubstExpr(IDC, MLTAL); - assert(!Constraint.isInvalid() && - "Substitution cannot fail as it is simply putting a type template " - "argument into a concept specialization expression's parameter."); - - SubstitutedConstraintExpr = - cast<ConceptSpecializationExpr>(Constraint.get()); - if (!SubstitutedConstraintExpr->isSatisfied()) - Status = concepts::ExprRequirement::SS_ConstraintsNotSatisfied; + if (Constraint.isInvalid()) { + Status = concepts::ExprRequirement::SS_ExprSubstitutionFailure; + } else { + SubstitutedConstraintExpr = + cast<ConceptSpecializationExpr>(Constraint.get()); + if (!SubstitutedConstraintExpr->isSatisfied()) + Status = concepts::ExprRequirement::SS_ConstraintsNotSatisfied; + } } return new (Context) concepts::ExprRequirement(E, IsSimple, NoexceptLoc, ReturnTypeRequirement, Status, diff --git a/clang/lib/Sema/SemaLookup.cpp b/clang/lib/Sema/SemaLookup.cpp index 47c7a61f8072..242e1f81d75c 100644 --- a/clang/lib/Sema/SemaLookup.cpp +++ b/clang/lib/Sema/SemaLookup.cpp @@ -1615,7 +1615,10 @@ hasAcceptableDefaultArgument(Sema &S, const ParmDecl *D, if (!D->hasDefaultArgument()) return false; - while (D) { + llvm::SmallDenseSet<const ParmDecl *, 4> Visited; + while (D && !Visited.count(D)) { + Visited.insert(D); + auto &DefaultArg = D->getDefaultArgStorage(); if (!DefaultArg.isInherited() && S.isAcceptable(D, Kind)) return true; @@ -1625,7 +1628,8 @@ hasAcceptableDefaultArgument(Sema &S, const ParmDecl *D, Modules->push_back(S.getOwningModule(NonConstD)); } - // If there was a previous default argument, maybe its parameter is visible. + // If there was a previous default argument, maybe its parameter is + // acceptable. D = DefaultArg.getInheritedFrom(); } return false; @@ -2087,6 +2091,13 @@ bool LookupResult::isAvailableForLookup(Sema &SemaRef, NamedDecl *ND) { if (isVisible(SemaRef, ND)) return true; + // Deduction guide lives in namespace scope generally, but it is just a + // hint to the compilers. What we actually lookup for is the generated member + // of the corresponding template. So it is sufficient to check the + // reachability of the template decl. + if (auto *DeductionGuide = ND->getDeclName().getCXXDeductionGuideTemplate()) + return SemaRef.hasReachableDefinition(DeductionGuide); + auto *DC = ND->getDeclContext(); // If ND is not visible and it is at namespace scope, it shouldn't be found // by name lookup. diff --git a/clang/lib/Sema/SemaModule.cpp b/clang/lib/Sema/SemaModule.cpp index e9a1ac17ce86..f5c24bd10daa 100644 --- a/clang/lib/Sema/SemaModule.cpp +++ b/clang/lib/Sema/SemaModule.cpp @@ -344,6 +344,16 @@ Sema::ActOnModuleDecl(SourceLocation StartLoc, SourceLocation ModuleLoc, // statements, so imports are allowed. ImportState = ModuleImportState::ImportAllowed; + // For an implementation, We already made an implicit import (its interface). + // Make and return the import decl to be added to the current TU. + if (MDK == ModuleDeclKind::Implementation) { + // Make the import decl for the interface. + ImportDecl *Import = + ImportDecl::Create(Context, CurContext, ModuleLoc, Mod, Path[0].second); + // and return it to be added. + return ConvertDeclToDeclGroup(Import); + } + // FIXME: Create a ModuleDecl. return nullptr; } diff --git a/clang/lib/Sema/SemaTemplate.cpp b/clang/lib/Sema/SemaTemplate.cpp index 67cf8f0371c5..95c83ebfaeab 100644 --- a/clang/lib/Sema/SemaTemplate.cpp +++ b/clang/lib/Sema/SemaTemplate.cpp @@ -2520,7 +2520,7 @@ void Sema::DeclareImplicitDeductionGuides(TemplateDecl *Template, continue; // Cannot make a deduction guide when unparsed arguments are present. - if (std::any_of(CD->param_begin(), CD->param_end(), [](ParmVarDecl *P) { + if (llvm::any_of(CD->parameters(), [](ParmVarDecl *P) { return !P || P->hasUnparsedDefaultArg(); })) continue; @@ -4573,7 +4573,7 @@ Sema::CheckVarTemplateId(VarTemplateDecl *Template, SourceLocation TemplateLoc, void *InsertPos = nullptr; if (VarTemplateSpecializationDecl *Spec = Template->findSpecialization( Converted, InsertPos)) { - checkSpecializationVisibility(TemplateNameLoc, Spec); + checkSpecializationReachability(TemplateNameLoc, Spec); // If we already have a variable template specialization, return it. return Spec; } @@ -4694,7 +4694,7 @@ Sema::CheckVarTemplateId(VarTemplateDecl *Template, SourceLocation TemplateLoc, dyn_cast<VarTemplatePartialSpecializationDecl>(InstantiationPattern)) Decl->setInstantiationOf(D, InstantiationArgs); - checkSpecializationVisibility(TemplateNameLoc, Decl); + checkSpecializationReachability(TemplateNameLoc, Decl); assert(Decl && "No variable template specialization?"); return Decl; diff --git a/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp b/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp index bd166ff6f594..9bf6ca1f8084 100644 --- a/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp +++ b/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp @@ -2182,6 +2182,11 @@ Decl *TemplateDeclInstantiator::VisitFunctionDecl( // definition. We don't want non-template functions to be marked as being // template instantiations. Function->setInstantiationOfMemberFunction(D, TSK_ImplicitInstantiation); + } else if (!isFriend) { + // If this is not a function template, and this is not a friend (that is, + // this is a locally declared function), save the instantiation relationship + // for the purposes of constraint instantiation. + Function->setInstantiatedFromDecl(D); } if (isFriend) { diff --git a/clang/lib/Sema/SemaType.cpp b/clang/lib/Sema/SemaType.cpp index 3edce941c381..3ab5d26a9a75 100644 --- a/clang/lib/Sema/SemaType.cpp +++ b/clang/lib/Sema/SemaType.cpp @@ -8669,12 +8669,13 @@ bool Sema::hasAcceptableDefinition(NamedDecl *D, NamedDecl **Suggested, // of it will do. *Suggested = nullptr; for (auto *Redecl : ED->redecls()) { - if (isVisible(Redecl)) + if (isAcceptable(Redecl, Kind)) return true; if (Redecl->isThisDeclarationADefinition() || (Redecl->isCanonicalDecl() && !*Suggested)) *Suggested = Redecl; } + return false; } D = ED->getDefinition(); diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp index 04ade0a3b9d0..76281d26b2ae 100644 --- a/clang/lib/Serialization/ASTReader.cpp +++ b/clang/lib/Serialization/ASTReader.cpp @@ -9445,6 +9445,31 @@ void ASTReader::finishPendingActions() { PendingMergedDefinitionsToDeduplicate.clear(); } +static unsigned computeODRHash(QualType Ty) { + ODRHash Hasher; + Hasher.AddQualType(Ty); + return Hasher.CalculateHash(); +} + +static unsigned computeODRHash(const Stmt *S) { + ODRHash Hasher; + Hasher.AddStmt(S); + return Hasher.CalculateHash(); +} + +static unsigned computeODRHash(const Decl *D) { + assert(D); + ODRHash Hasher; + Hasher.AddSubDecl(D); + return Hasher.CalculateHash(); +} + +static unsigned computeODRHash(const TemplateArgument &TA) { + ODRHash Hasher; + Hasher.AddTemplateArgument(TA); + return Hasher.CalculateHash(); +} + void ASTReader::diagnoseOdrViolations() { if (PendingOdrMergeFailures.empty() && PendingOdrMergeChecks.empty() && PendingFunctionOdrMergeFailures.empty() && @@ -9584,42 +9609,6 @@ void ASTReader::diagnoseOdrViolations() { // we're producing our diagnostics. Deserializing RecursionGuard(this); - // Common code for hashing helpers. - ODRHash Hash; - auto ComputeQualTypeODRHash = [&Hash](QualType Ty) { - Hash.clear(); - Hash.AddQualType(Ty); - return Hash.CalculateHash(); - }; - - auto ComputeODRHash = [&Hash](const Stmt *S) { - assert(S); - Hash.clear(); - Hash.AddStmt(S); - return Hash.CalculateHash(); - }; - - auto ComputeSubDeclODRHash = [&Hash](const Decl *D) { - assert(D); - Hash.clear(); - Hash.AddSubDecl(D); - return Hash.CalculateHash(); - }; - - auto ComputeTemplateArgumentODRHash = [&Hash](const TemplateArgument &TA) { - Hash.clear(); - Hash.AddTemplateArgument(TA); - return Hash.CalculateHash(); - }; - - auto ComputeTemplateParameterListODRHash = - [&Hash](const TemplateParameterList *TPL) { - assert(TPL); - Hash.clear(); - Hash.AddTemplateParameterList(TPL); - return Hash.CalculateHash(); - }; - // Used with err_module_odr_violation_mismatch_decl and // note_module_odr_violation_mismatch_decl // This list should be the same Decl's as in ODRHash::isDeclToBeProcessed @@ -9639,49 +9628,13 @@ void ASTReader::diagnoseOdrViolations() { Other }; - // Used with err_module_odr_violation_record and - // note_module_odr_violation_record - enum ODRCXXRecordDifference { - StaticAssertCondition, - StaticAssertMessage, - StaticAssertOnlyMessage, - MethodName, - MethodDeleted, - MethodDefaulted, - MethodVirtual, - MethodStatic, - MethodVolatile, - MethodConst, - MethodInline, - MethodNumberParameters, - MethodParameterType, - MethodParameterName, - MethodParameterSingleDefaultArgument, - MethodParameterDifferentDefaultArgument, - MethodNoTemplateArguments, - MethodDifferentNumberTemplateArguments, - MethodDifferentTemplateArgument, - MethodSingleBody, - MethodDifferentBody, - FriendTypeFunction, - FriendType, - FriendFunction, - FunctionTemplateDifferentNumberParameters, - FunctionTemplateParameterDifferentKind, - FunctionTemplateParameterName, - FunctionTemplateParameterSingleDefaultArgument, - FunctionTemplateParameterDifferentDefaultArgument, - FunctionTemplateParameterDifferentType, - FunctionTemplatePackParameter, - }; - // These lambdas have the common portions of the ODR diagnostics. This // has the same return as Diag(), so addition parameters can be passed // in with operator<< - auto ODRDiagField = [this, &ComputeQualTypeODRHash, &ComputeODRHash]( - NamedDecl *FirstRecord, StringRef FirstModule, - StringRef SecondModule, FieldDecl *FirstField, - FieldDecl *SecondField) { + auto ODRDiagField = [this](NamedDecl *FirstRecord, StringRef FirstModule, + StringRef SecondModule, + const FieldDecl *FirstField, + const FieldDecl *SecondField) { enum ODRFieldDifference { FieldName, FieldTypeName, @@ -9719,8 +9672,7 @@ void ASTReader::diagnoseOdrViolations() { QualType FirstType = FirstField->getType(); QualType SecondType = SecondField->getType(); - if (ComputeQualTypeODRHash(FirstType) != - ComputeQualTypeODRHash(SecondType)) { + if (computeODRHash(FirstType) != computeODRHash(SecondType)) { DiagError(FieldTypeName) << FirstII << FirstType; DiagNote(FieldTypeName) << SecondII << SecondType; return true; @@ -9735,10 +9687,8 @@ void ASTReader::diagnoseOdrViolations() { } if (IsFirstBitField && IsSecondBitField) { - unsigned FirstBitWidthHash = - ComputeODRHash(FirstField->getBitWidth()); - unsigned SecondBitWidthHash = - ComputeODRHash(SecondField->getBitWidth()); + unsigned FirstBitWidthHash = computeODRHash(FirstField->getBitWidth()); + unsigned SecondBitWidthHash = computeODRHash(SecondField->getBitWidth()); if (FirstBitWidthHash != SecondBitWidthHash) { DiagError(FieldDifferentWidthBitField) << FirstII << FirstField->getBitWidth()->getSourceRange(); @@ -9771,8 +9721,8 @@ void ASTReader::diagnoseOdrViolations() { } if (FirstInitializer && SecondInitializer) { - unsigned FirstInitHash = ComputeODRHash(FirstInitializer); - unsigned SecondInitHash = ComputeODRHash(SecondInitializer); + unsigned FirstInitHash = computeODRHash(FirstInitializer); + unsigned SecondInitHash = computeODRHash(SecondInitializer); if (FirstInitHash != SecondInitHash) { DiagError(FieldDifferentInitializers) << FirstII << FirstInitializer->getSourceRange(); @@ -9786,10 +9736,9 @@ void ASTReader::diagnoseOdrViolations() { }; auto ODRDiagTypeDefOrAlias = - [this, &ComputeQualTypeODRHash]( - NamedDecl *FirstRecord, StringRef FirstModule, StringRef SecondModule, - TypedefNameDecl *FirstTD, TypedefNameDecl *SecondTD, - bool IsTypeAlias) { + [this](NamedDecl *FirstRecord, StringRef FirstModule, + StringRef SecondModule, const TypedefNameDecl *FirstTD, + const TypedefNameDecl *SecondTD, bool IsTypeAlias) { enum ODRTypedefDifference { TypedefName, TypedefType, @@ -9809,8 +9758,8 @@ void ASTReader::diagnoseOdrViolations() { << SecondModule << SecondTD->getSourceRange() << DiffType; }; - auto FirstName = FirstTD->getDeclName(); - auto SecondName = SecondTD->getDeclName(); + DeclarationName FirstName = FirstTD->getDeclName(); + DeclarationName SecondName = SecondTD->getDeclName(); if (FirstName != SecondName) { DiagError(TypedefName) << IsTypeAlias << FirstName; DiagNote(TypedefName) << IsTypeAlias << SecondName; @@ -9819,8 +9768,7 @@ void ASTReader::diagnoseOdrViolations() { QualType FirstType = FirstTD->getUnderlyingType(); QualType SecondType = SecondTD->getUnderlyingType(); - if (ComputeQualTypeODRHash(FirstType) != - ComputeQualTypeODRHash(SecondType)) { + if (computeODRHash(FirstType) != computeODRHash(SecondType)) { DiagError(TypedefType) << IsTypeAlias << FirstName << FirstType; DiagNote(TypedefType) << IsTypeAlias << SecondName << SecondType; return true; @@ -9829,10 +9777,9 @@ void ASTReader::diagnoseOdrViolations() { return false; }; - auto ODRDiagVar = [&ComputeQualTypeODRHash, &ComputeODRHash, - this](NamedDecl *FirstRecord, StringRef FirstModule, - StringRef SecondModule, VarDecl *FirstVD, - VarDecl *SecondVD) { + auto ODRDiagVar = [this](NamedDecl *FirstRecord, StringRef FirstModule, + StringRef SecondModule, const VarDecl *FirstVD, + const VarDecl *SecondVD) { enum ODRVarDifference { VarName, VarType, @@ -9854,8 +9801,8 @@ void ASTReader::diagnoseOdrViolations() { << SecondModule << SecondVD->getSourceRange() << DiffType; }; - auto FirstName = FirstVD->getDeclName(); - auto SecondName = SecondVD->getDeclName(); + DeclarationName FirstName = FirstVD->getDeclName(); + DeclarationName SecondName = SecondVD->getDeclName(); if (FirstName != SecondName) { DiagError(VarName) << FirstName; DiagNote(VarName) << SecondName; @@ -9864,8 +9811,7 @@ void ASTReader::diagnoseOdrViolations() { QualType FirstType = FirstVD->getType(); QualType SecondType = SecondVD->getType(); - if (ComputeQualTypeODRHash(FirstType) != - ComputeQualTypeODRHash(SecondType)) { + if (computeODRHash(FirstType) != computeODRHash(SecondType)) { DiagError(VarType) << FirstName << FirstType; DiagNote(VarType) << SecondName << SecondType; return true; @@ -9887,7 +9833,7 @@ void ASTReader::diagnoseOdrViolations() { } if (FirstInit && SecondInit && - ComputeODRHash(FirstInit) != ComputeODRHash(SecondInit)) { + computeODRHash(FirstInit) != computeODRHash(SecondInit)) { DiagError(VarDifferentInitializer) << FirstName << FirstInit->getSourceRange(); DiagNote(VarDifferentInitializer) @@ -9905,52 +9851,13 @@ void ASTReader::diagnoseOdrViolations() { return false; }; - auto DifferenceSelector = [](Decl *D) { - assert(D && "valid Decl required"); - switch (D->getKind()) { - default: - return Other; - case Decl::AccessSpec: - switch (D->getAccess()) { - case AS_public: - return PublicSpecifer; - case AS_private: - return PrivateSpecifer; - case AS_protected: - return ProtectedSpecifer; - case AS_none: - break; - } - llvm_unreachable("Invalid access specifier"); - case Decl::StaticAssert: - return StaticAssert; - case Decl::Field: - return Field; - case Decl::CXXMethod: - case Decl::CXXConstructor: - case Decl::CXXDestructor: - return CXXMethod; - case Decl::TypeAlias: - return TypeAlias; - case Decl::Typedef: - return TypeDef; - case Decl::Var: - return Var; - case Decl::Friend: - return Friend; - case Decl::FunctionTemplate: - return FunctionTemplate; - } - }; - using DeclHashes = llvm::SmallVector<std::pair<Decl *, unsigned>, 4>; - auto PopulateHashes = [&ComputeSubDeclODRHash](DeclHashes &Hashes, - RecordDecl *Record, - const DeclContext *DC) { + auto PopulateHashes = [](DeclHashes &Hashes, RecordDecl *Record, + const DeclContext *DC) { for (auto *D : Record->decls()) { if (!ODRHash::isDeclToBeProcessed(D, DC)) continue; - Hashes.emplace_back(D, ComputeSubDeclODRHash(D)); + Hashes.emplace_back(D, computeODRHash(D)); } }; @@ -9962,8 +9869,45 @@ void ASTReader::diagnoseOdrViolations() { // If there is a diagnoseable difference, FirstDiffType and // SecondDiffType will not be Other and FirstDecl and SecondDecl will be // filled in if not EndOfClass. - auto FindTypeDiffs = [&DifferenceSelector](DeclHashes &FirstHashes, - DeclHashes &SecondHashes) { + auto FindTypeDiffs = [](DeclHashes &FirstHashes, DeclHashes &SecondHashes) { + auto DifferenceSelector = [](Decl *D) { + assert(D && "valid Decl required"); + switch (D->getKind()) { + default: + return Other; + case Decl::AccessSpec: + switch (D->getAccess()) { + case AS_public: + return PublicSpecifer; + case AS_private: + return PrivateSpecifer; + case AS_protected: + return ProtectedSpecifer; + case AS_none: + break; + } + llvm_unreachable("Invalid access specifier"); + case Decl::StaticAssert: + return StaticAssert; + case Decl::Field: + return Field; + case Decl::CXXMethod: + case Decl::CXXConstructor: + case Decl::CXXDestructor: + return CXXMethod; + case Decl::TypeAlias: + return TypeAlias; + case Decl::Typedef: + return TypeDef; + case Decl::Var: + return Var; + case Decl::Friend: + return Friend; + case Decl::FunctionTemplate: + return FunctionTemplate; + } + }; + DiffResult DR; auto FirstIt = FirstHashes.begin(); auto SecondIt = SecondHashes.begin(); @@ -10062,19 +10006,6 @@ void ASTReader::diagnoseOdrViolations() { continue; std::string SecondModule = getOwningModuleNameForDiagnostic(SecondRecord); - auto ODRDiagDeclError = [FirstRecord, &FirstModule, - this](SourceLocation Loc, SourceRange Range, - ODRCXXRecordDifference DiffType) { - return Diag(Loc, diag::err_module_odr_violation_record) - << FirstRecord << FirstModule.empty() << FirstModule << Range - << DiffType; - }; - auto ODRDiagDeclNote = [&SecondModule, - this](SourceLocation Loc, SourceRange Range, - ODRCXXRecordDifference DiffType) { - return Diag(Loc, diag::note_module_odr_violation_record) - << SecondModule << Range << DiffType; - }; auto *FirstDD = FirstRecord->DefinitionData; auto *SecondDD = RecordPair.second; @@ -10103,20 +10034,18 @@ void ASTReader::diagnoseOdrViolations() { return Diag(Loc, diag::note_module_odr_violation_definition_data) << SecondModule << Range << DiffType; }; - - unsigned FirstNumBases = FirstDD->NumBases; - unsigned FirstNumVBases = FirstDD->NumVBases; - unsigned SecondNumBases = SecondDD->NumBases; - unsigned SecondNumVBases = SecondDD->NumVBases; - auto GetSourceRange = [](struct CXXRecordDecl::DefinitionData *DD) { unsigned NumBases = DD->NumBases; if (NumBases == 0) return SourceRange(); - auto bases = DD->bases(); + ArrayRef<CXXBaseSpecifier> bases = DD->bases(); return SourceRange(bases[0].getBeginLoc(), bases[NumBases - 1].getEndLoc()); }; + unsigned FirstNumBases = FirstDD->NumBases; + unsigned FirstNumVBases = FirstDD->NumVBases; + unsigned SecondNumBases = SecondDD->NumBases; + unsigned SecondNumVBases = SecondDD->NumVBases; if (FirstNumBases != SecondNumBases) { ODRDiagBaseError(FirstRecord->getLocation(), GetSourceRange(FirstDD), NumBases) @@ -10139,30 +10068,30 @@ void ASTReader::diagnoseOdrViolations() { break; } - auto FirstBases = FirstDD->bases(); - auto SecondBases = SecondDD->bases(); - unsigned i = 0; - for (i = 0; i < FirstNumBases; ++i) { - auto FirstBase = FirstBases[i]; - auto SecondBase = SecondBases[i]; - if (ComputeQualTypeODRHash(FirstBase.getType()) != - ComputeQualTypeODRHash(SecondBase.getType())) { + ArrayRef<CXXBaseSpecifier> FirstBases = FirstDD->bases(); + ArrayRef<CXXBaseSpecifier> SecondBases = SecondDD->bases(); + unsigned I = 0; + for (I = 0; I < FirstNumBases; ++I) { + const CXXBaseSpecifier FirstBase = FirstBases[I]; + const CXXBaseSpecifier SecondBase = SecondBases[I]; + if (computeODRHash(FirstBase.getType()) != + computeODRHash(SecondBase.getType())) { ODRDiagBaseError(FirstRecord->getLocation(), FirstBase.getSourceRange(), BaseType) - << (i + 1) << FirstBase.getType(); + << (I + 1) << FirstBase.getType(); ODRDiagBaseNote(SecondRecord->getLocation(), SecondBase.getSourceRange(), BaseType) - << (i + 1) << SecondBase.getType(); + << (I + 1) << SecondBase.getType(); break; } if (FirstBase.isVirtual() != SecondBase.isVirtual()) { ODRDiagBaseError(FirstRecord->getLocation(), FirstBase.getSourceRange(), BaseVirtual) - << (i + 1) << FirstBase.isVirtual() << FirstBase.getType(); + << (I + 1) << FirstBase.isVirtual() << FirstBase.getType(); ODRDiagBaseNote(SecondRecord->getLocation(), SecondBase.getSourceRange(), BaseVirtual) - << (i + 1) << SecondBase.isVirtual() << SecondBase.getType(); + << (I + 1) << SecondBase.isVirtual() << SecondBase.getType(); break; } @@ -10170,17 +10099,17 @@ void ASTReader::diagnoseOdrViolations() { SecondBase.getAccessSpecifierAsWritten()) { ODRDiagBaseError(FirstRecord->getLocation(), FirstBase.getSourceRange(), BaseAccess) - << (i + 1) << FirstBase.getType() + << (I + 1) << FirstBase.getType() << (int)FirstBase.getAccessSpecifierAsWritten(); ODRDiagBaseNote(SecondRecord->getLocation(), SecondBase.getSourceRange(), BaseAccess) - << (i + 1) << SecondBase.getType() + << (I + 1) << SecondBase.getType() << (int)SecondBase.getAccessSpecifierAsWritten(); break; } } - if (i != FirstNumBases) { + if (I != FirstNumBases) { Diagnosed = true; break; } @@ -10198,13 +10127,12 @@ void ASTReader::diagnoseOdrViolations() { DeclHashes FirstTemplateHashes; DeclHashes SecondTemplateHashes; - auto PopulateTemplateParameterHashs = - [&ComputeSubDeclODRHash](DeclHashes &Hashes, - const ClassTemplateDecl *TD) { - for (auto *D : TD->getTemplateParameters()->asArray()) { - Hashes.emplace_back(D, ComputeSubDeclODRHash(D)); - } - }; + auto PopulateTemplateParameterHashs = [](DeclHashes &Hashes, + const ClassTemplateDecl *TD) { + for (auto *D : TD->getTemplateParameters()->asArray()) { + Hashes.emplace_back(D, computeODRHash(D)); + } + }; PopulateTemplateParameterHashs(FirstTemplateHashes, FirstTemplate); PopulateTemplateParameterHashs(SecondTemplateHashes, SecondTemplate); @@ -10288,11 +10216,11 @@ void ASTReader::diagnoseOdrViolations() { PopulateHashes(FirstHashes, FirstRecord, DC); PopulateHashes(SecondHashes, SecondRecord, DC); - auto DR = FindTypeDiffs(FirstHashes, SecondHashes); + DiffResult DR = FindTypeDiffs(FirstHashes, SecondHashes); ODRMismatchDecl FirstDiffType = DR.FirstDiffType; ODRMismatchDecl SecondDiffType = DR.SecondDiffType; - Decl *FirstDecl = DR.FirstDecl; - Decl *SecondDecl = DR.SecondDecl; + const Decl *FirstDecl = DR.FirstDecl; + const Decl *SecondDecl = DR.SecondDecl; if (FirstDiffType == Other || SecondDiffType == Other) { DiagnoseODRUnexpected(DR, FirstRecord, FirstModule, SecondRecord, @@ -10308,8 +10236,56 @@ void ASTReader::diagnoseOdrViolations() { break; } - assert(FirstDiffType == SecondDiffType); + // Used with err_module_odr_violation_record and + // note_module_odr_violation_record + enum ODRCXXRecordDifference { + StaticAssertCondition, + StaticAssertMessage, + StaticAssertOnlyMessage, + MethodName, + MethodDeleted, + MethodDefaulted, + MethodVirtual, + MethodStatic, + MethodVolatile, + MethodConst, + MethodInline, + MethodNumberParameters, + MethodParameterType, + MethodParameterName, + MethodParameterSingleDefaultArgument, + MethodParameterDifferentDefaultArgument, + MethodNoTemplateArguments, + MethodDifferentNumberTemplateArguments, + MethodDifferentTemplateArgument, + MethodSingleBody, + MethodDifferentBody, + FriendTypeFunction, + FriendType, + FriendFunction, + FunctionTemplateDifferentNumberParameters, + FunctionTemplateParameterDifferentKind, + FunctionTemplateParameterName, + FunctionTemplateParameterSingleDefaultArgument, + FunctionTemplateParameterDifferentDefaultArgument, + FunctionTemplateParameterDifferentType, + FunctionTemplatePackParameter, + }; + auto ODRDiagDeclError = [FirstRecord, &FirstModule, + this](SourceLocation Loc, SourceRange Range, + ODRCXXRecordDifference DiffType) { + return Diag(Loc, diag::err_module_odr_violation_record) + << FirstRecord << FirstModule.empty() << FirstModule << Range + << DiffType; + }; + auto ODRDiagDeclNote = [&SecondModule, + this](SourceLocation Loc, SourceRange Range, + ODRCXXRecordDifference DiffType) { + return Diag(Loc, diag::note_module_odr_violation_record) + << SecondModule << Range << DiffType; + }; + assert(FirstDiffType == SecondDiffType); switch (FirstDiffType) { case Other: case EndOfClass: @@ -10319,13 +10295,13 @@ void ASTReader::diagnoseOdrViolations() { llvm_unreachable("Invalid diff type"); case StaticAssert: { - StaticAssertDecl *FirstSA = cast<StaticAssertDecl>(FirstDecl); - StaticAssertDecl *SecondSA = cast<StaticAssertDecl>(SecondDecl); + const StaticAssertDecl *FirstSA = cast<StaticAssertDecl>(FirstDecl); + const StaticAssertDecl *SecondSA = cast<StaticAssertDecl>(SecondDecl); - Expr *FirstExpr = FirstSA->getAssertExpr(); - Expr *SecondExpr = SecondSA->getAssertExpr(); - unsigned FirstODRHash = ComputeODRHash(FirstExpr); - unsigned SecondODRHash = ComputeODRHash(SecondExpr); + const Expr *FirstExpr = FirstSA->getAssertExpr(); + const Expr *SecondExpr = SecondSA->getAssertExpr(); + unsigned FirstODRHash = computeODRHash(FirstExpr); + unsigned SecondODRHash = computeODRHash(SecondExpr); if (FirstODRHash != SecondODRHash) { ODRDiagDeclError(FirstExpr->getBeginLoc(), FirstExpr->getSourceRange(), StaticAssertCondition); @@ -10335,8 +10311,8 @@ void ASTReader::diagnoseOdrViolations() { break; } - StringLiteral *FirstStr = FirstSA->getMessage(); - StringLiteral *SecondStr = SecondSA->getMessage(); + const StringLiteral *FirstStr = FirstSA->getMessage(); + const StringLiteral *SecondStr = SecondSA->getMessage(); assert((FirstStr || SecondStr) && "Both messages cannot be empty"); if ((FirstStr && !SecondStr) || (!FirstStr && SecondStr)) { SourceLocation FirstLoc, SecondLoc; @@ -10451,8 +10427,8 @@ void ASTReader::diagnoseOdrViolations() { // CXXMethodDecl::isStatic uses the canonical Decl. With Decl merging, // FirstDecl is the canonical Decl of SecondDecl, so the storage // class needs to be checked instead. - const auto FirstStorage = FirstMethod->getStorageClass(); - const auto SecondStorage = SecondMethod->getStorageClass(); + StorageClass FirstStorage = FirstMethod->getStorageClass(); + StorageClass SecondStorage = SecondMethod->getStorageClass(); const bool FirstStatic = FirstStorage == SC_Static; const bool SecondStatic = SecondStorage == SC_Static; if (FirstStatic != SecondStatic) { @@ -10507,8 +10483,8 @@ void ASTReader::diagnoseOdrViolations() { QualType FirstParamType = FirstParam->getType(); QualType SecondParamType = SecondParam->getType(); if (FirstParamType != SecondParamType && - ComputeQualTypeODRHash(FirstParamType) != - ComputeQualTypeODRHash(SecondParamType)) { + computeODRHash(FirstParamType) != + computeODRHash(SecondParamType)) { if (const DecayedType *ParamDecayedType = FirstParamType->getAs<DecayedType>()) { DiagMethodError(MethodParameterType) @@ -10555,14 +10531,13 @@ void ASTReader::diagnoseOdrViolations() { } if (FirstInit && SecondInit && - ComputeODRHash(FirstInit) != ComputeODRHash(SecondInit)) { + computeODRHash(FirstInit) != computeODRHash(SecondInit)) { DiagMethodError(MethodParameterDifferentDefaultArgument) << (I + 1) << FirstInit->getSourceRange(); DiagMethodNote(MethodParameterDifferentDefaultArgument) << (I + 1) << SecondInit->getSourceRange(); ParameterMismatch = true; break; - } } @@ -10571,9 +10546,9 @@ void ASTReader::diagnoseOdrViolations() { break; } - const auto *FirstTemplateArgs = + const TemplateArgumentList *FirstTemplateArgs = FirstMethod->getTemplateSpecializationArgs(); - const auto *SecondTemplateArgs = + const TemplateArgumentList *SecondTemplateArgs = SecondMethod->getTemplateSpecializationArgs(); if ((FirstTemplateArgs && !SecondTemplateArgs) || @@ -10619,8 +10594,7 @@ void ASTReader::diagnoseOdrViolations() { for (unsigned i = 0, e = FirstExpandedList.size(); i != e; ++i) { const TemplateArgument &FirstTA = *FirstExpandedList[i], &SecondTA = *SecondExpandedList[i]; - if (ComputeTemplateArgumentODRHash(FirstTA) == - ComputeTemplateArgumentODRHash(SecondTA)) { + if (computeODRHash(FirstTA) == computeODRHash(SecondTA)) { continue; } @@ -10639,10 +10613,10 @@ void ASTReader::diagnoseOdrViolations() { } // Compute the hash of the method as if it has no body. - auto ComputeCXXMethodODRHash = [&Hash](const CXXMethodDecl *D) { - Hash.clear(); - Hash.AddFunctionDecl(D, true /*SkipBody*/); - return Hash.CalculateHash(); + auto ComputeCXXMethodODRHash = [](const CXXMethodDecl *D) { + ODRHash Hasher; + Hasher.AddFunctionDecl(D, true /*SkipBody*/); + return Hasher.CalculateHash(); }; // Compare the hash generated to the hash stored. A difference means @@ -10684,11 +10658,11 @@ void ASTReader::diagnoseOdrViolations() { break; } case Friend: { - FriendDecl *FirstFriend = cast<FriendDecl>(FirstDecl); - FriendDecl *SecondFriend = cast<FriendDecl>(SecondDecl); + const FriendDecl *FirstFriend = cast<FriendDecl>(FirstDecl); + const FriendDecl *SecondFriend = cast<FriendDecl>(SecondDecl); - NamedDecl *FirstND = FirstFriend->getFriendDecl(); - NamedDecl *SecondND = SecondFriend->getFriendDecl(); + const NamedDecl *FirstND = FirstFriend->getFriendDecl(); + const NamedDecl *SecondND = SecondFriend->getFriendDecl(); TypeSourceInfo *FirstTSI = FirstFriend->getFriendType(); TypeSourceInfo *SecondTSI = SecondFriend->getFriendType(); @@ -10707,8 +10681,8 @@ void ASTReader::diagnoseOdrViolations() { if (FirstTSI && SecondTSI) { QualType FirstFriendType = FirstTSI->getType(); QualType SecondFriendType = SecondTSI->getType(); - assert(ComputeQualTypeODRHash(FirstFriendType) != - ComputeQualTypeODRHash(SecondFriendType)); + assert(computeODRHash(FirstFriendType) != + computeODRHash(SecondFriendType)); ODRDiagDeclError(FirstFriend->getFriendLoc(), FirstFriend->getSourceRange(), FriendType) << FirstFriendType; @@ -10729,9 +10703,9 @@ void ASTReader::diagnoseOdrViolations() { break; } case FunctionTemplate: { - FunctionTemplateDecl *FirstTemplate = + const FunctionTemplateDecl *FirstTemplate = cast<FunctionTemplateDecl>(FirstDecl); - FunctionTemplateDecl *SecondTemplate = + const FunctionTemplateDecl *SecondTemplate = cast<FunctionTemplateDecl>(SecondDecl); TemplateParameterList *FirstTPL = @@ -10826,8 +10800,7 @@ void ASTReader::diagnoseOdrViolations() { if (HasFirstDefaultArgument && HasSecondDefaultArgument) { QualType FirstType = FirstTTPD->getDefaultArgument(); QualType SecondType = SecondTTPD->getDefaultArgument(); - if (ComputeQualTypeODRHash(FirstType) != - ComputeQualTypeODRHash(SecondType)) { + if (computeODRHash(FirstType) != computeODRHash(SecondType)) { DiagTemplateError( FunctionTemplateParameterDifferentDefaultArgument) << (i + 1) << FirstType; @@ -10862,6 +10835,14 @@ void ASTReader::diagnoseOdrViolations() { TemplateParameterList *SecondTPL = SecondTTPD->getTemplateParameters(); + auto ComputeTemplateParameterListODRHash = + [](const TemplateParameterList *TPL) { + assert(TPL); + ODRHash Hasher; + Hasher.AddTemplateParameterList(TPL); + return Hasher.CalculateHash(); + }; + if (ComputeTemplateParameterListODRHash(FirstTPL) != ComputeTemplateParameterListODRHash(SecondTPL)) { DiagTemplateError(FunctionTemplateParameterDifferentType) @@ -10892,8 +10873,7 @@ void ASTReader::diagnoseOdrViolations() { FirstTTPD->getDefaultArgument().getArgument(); TemplateArgument SecondTA = SecondTTPD->getDefaultArgument().getArgument(); - if (ComputeTemplateArgumentODRHash(FirstTA) != - ComputeTemplateArgumentODRHash(SecondTA)) { + if (computeODRHash(FirstTA) != computeODRHash(SecondTA)) { DiagTemplateError( FunctionTemplateParameterDifferentDefaultArgument) << (i + 1) << FirstTA; @@ -10925,8 +10905,7 @@ void ASTReader::diagnoseOdrViolations() { QualType FirstType = FirstNTTPD->getType(); QualType SecondType = SecondNTTPD->getType(); - if (ComputeQualTypeODRHash(FirstType) != - ComputeQualTypeODRHash(SecondType)) { + if (computeODRHash(FirstType) != computeODRHash(SecondType)) { DiagTemplateError(FunctionTemplateParameterDifferentType) << (i + 1); DiagTemplateNote(FunctionTemplateParameterDifferentType) @@ -10953,8 +10932,8 @@ void ASTReader::diagnoseOdrViolations() { if (HasFirstDefaultArgument && HasSecondDefaultArgument) { Expr *FirstDefaultArgument = FirstNTTPD->getDefaultArgument(); Expr *SecondDefaultArgument = SecondNTTPD->getDefaultArgument(); - if (ComputeODRHash(FirstDefaultArgument) != - ComputeODRHash(SecondDefaultArgument)) { + if (computeODRHash(FirstDefaultArgument) != + computeODRHash(SecondDefaultArgument)) { DiagTemplateError( FunctionTemplateParameterDifferentDefaultArgument) << (i + 1) << FirstDefaultArgument; @@ -11050,8 +11029,8 @@ void ASTReader::diagnoseOdrViolations() { << SecondModule << Range << DiffType; }; - if (ComputeQualTypeODRHash(FirstFunction->getReturnType()) != - ComputeQualTypeODRHash(SecondFunction->getReturnType())) { + if (computeODRHash(FirstFunction->getReturnType()) != + computeODRHash(SecondFunction->getReturnType())) { ODRDiagError(FirstFunction->getReturnTypeSourceRange().getBegin(), FirstFunction->getReturnTypeSourceRange(), ReturnType) << FirstFunction->getReturnType(); @@ -11065,11 +11044,11 @@ void ASTReader::diagnoseOdrViolations() { assert(FirstFunction->param_size() == SecondFunction->param_size() && "Merged functions with different number of parameters"); - auto ParamSize = FirstFunction->param_size(); + size_t ParamSize = FirstFunction->param_size(); bool ParameterMismatch = false; for (unsigned I = 0; I < ParamSize; ++I) { - auto *FirstParam = FirstFunction->getParamDecl(I); - auto *SecondParam = SecondFunction->getParamDecl(I); + const ParmVarDecl *FirstParam = FirstFunction->getParamDecl(I); + const ParmVarDecl *SecondParam = SecondFunction->getParamDecl(I); assert(getContext().hasSameType(FirstParam->getType(), SecondParam->getType()) && @@ -11089,8 +11068,7 @@ void ASTReader::diagnoseOdrViolations() { QualType FirstParamType = FirstParam->getType(); QualType SecondParamType = SecondParam->getType(); if (FirstParamType != SecondParamType && - ComputeQualTypeODRHash(FirstParamType) != - ComputeQualTypeODRHash(SecondParamType)) { + computeODRHash(FirstParamType) != computeODRHash(SecondParamType)) { if (const DecayedType *ParamDecayedType = FirstParamType->getAs<DecayedType>()) { ODRDiagError(FirstParam->getLocation(), @@ -11134,7 +11112,7 @@ void ASTReader::diagnoseOdrViolations() { } if (FirstInit && SecondInit && - ComputeODRHash(FirstInit) != ComputeODRHash(SecondInit)) { + computeODRHash(FirstInit) != computeODRHash(SecondInit)) { ODRDiagError(FirstParam->getLocation(), FirstParam->getSourceRange(), ParameterDifferentDefaultArgument) << (I + 1) << FirstInit->getSourceRange(); @@ -11145,8 +11123,7 @@ void ASTReader::diagnoseOdrViolations() { break; } - assert(ComputeSubDeclODRHash(FirstParam) == - ComputeSubDeclODRHash(SecondParam) && + assert(computeODRHash(FirstParam) == computeODRHash(SecondParam) && "Undiagnosed parameter difference."); } @@ -11191,16 +11168,14 @@ void ASTReader::diagnoseOdrViolations() { using DeclHashes = llvm::SmallVector<std::pair<EnumConstantDecl *, unsigned>, 4>; - auto PopulateHashes = [&ComputeSubDeclODRHash, FirstEnum]( - DeclHashes &Hashes, EnumDecl *Enum) { + auto PopulateHashes = [FirstEnum](DeclHashes &Hashes, EnumDecl *Enum) { for (auto *D : Enum->decls()) { // Due to decl merging, the first EnumDecl is the parent of // Decls in both records. if (!ODRHash::isDeclToBeProcessed(D, FirstEnum)) continue; assert(isa<EnumConstantDecl>(D) && "Unexpected Decl kind"); - Hashes.emplace_back(cast<EnumConstantDecl>(D), - ComputeSubDeclODRHash(D)); + Hashes.emplace_back(cast<EnumConstantDecl>(D), computeODRHash(D)); } }; DeclHashes FirstHashes; @@ -11266,8 +11241,8 @@ void ASTReader::diagnoseOdrViolations() { } if (!FirstUnderlyingType.isNull() && !SecondUnderlyingType.isNull()) { - if (ComputeQualTypeODRHash(FirstUnderlyingType) != - ComputeQualTypeODRHash(SecondUnderlyingType)) { + if (computeODRHash(FirstUnderlyingType) != + computeODRHash(SecondUnderlyingType)) { ODRDiagError(FirstEnum, DifferentSpecifiedTypes) << FirstUnderlyingType; ODRDiagNote(SecondEnum, DifferentSpecifiedTypes) @@ -11292,39 +11267,38 @@ void ASTReader::diagnoseOdrViolations() { for (unsigned I = 0; I < FirstHashes.size(); ++I) { if (FirstHashes[I].second == SecondHashes[I].second) continue; - const EnumConstantDecl *FirstEnumConstant = FirstHashes[I].first; - const EnumConstantDecl *SecondEnumConstant = SecondHashes[I].first; + const EnumConstantDecl *FirstConstant = FirstHashes[I].first; + const EnumConstantDecl *SecondConstant = SecondHashes[I].first; - if (FirstEnumConstant->getDeclName() != - SecondEnumConstant->getDeclName()) { + if (FirstConstant->getDeclName() != SecondConstant->getDeclName()) { - ODRDiagError(FirstEnumConstant, EnumConstantName) - << I + 1 << FirstEnumConstant; - ODRDiagNote(SecondEnumConstant, EnumConstantName) - << I + 1 << SecondEnumConstant; + ODRDiagError(FirstConstant, EnumConstantName) + << I + 1 << FirstConstant; + ODRDiagNote(SecondConstant, EnumConstantName) + << I + 1 << SecondConstant; Diagnosed = true; break; } - const Expr *FirstInit = FirstEnumConstant->getInitExpr(); - const Expr *SecondInit = SecondEnumConstant->getInitExpr(); + const Expr *FirstInit = FirstConstant->getInitExpr(); + const Expr *SecondInit = SecondConstant->getInitExpr(); if (!FirstInit && !SecondInit) continue; if (!FirstInit || !SecondInit) { - ODRDiagError(FirstEnumConstant, EnumConstantSingleInitializer) - << I + 1 << FirstEnumConstant << (FirstInit != nullptr); - ODRDiagNote(SecondEnumConstant, EnumConstantSingleInitializer) - << I + 1 << SecondEnumConstant << (SecondInit != nullptr); + ODRDiagError(FirstConstant, EnumConstantSingleInitializer) + << I + 1 << FirstConstant << (FirstInit != nullptr); + ODRDiagNote(SecondConstant, EnumConstantSingleInitializer) + << I + 1 << SecondConstant << (SecondInit != nullptr); Diagnosed = true; break; } - if (ComputeODRHash(FirstInit) != ComputeODRHash(SecondInit)) { - ODRDiagError(FirstEnumConstant, EnumConstantDifferentInitializer) - << I + 1 << FirstEnumConstant; - ODRDiagNote(SecondEnumConstant, EnumConstantDifferentInitializer) - << I + 1 << SecondEnumConstant; + if (computeODRHash(FirstInit) != computeODRHash(SecondInit)) { + ODRDiagError(FirstConstant, EnumConstantDifferentInitializer) + << I + 1 << FirstConstant; + ODRDiagNote(SecondConstant, EnumConstantDifferentInitializer) + << I + 1 << SecondConstant; Diagnosed = true; break; } diff --git a/clang/lib/Serialization/ASTReaderDecl.cpp b/clang/lib/Serialization/ASTReaderDecl.cpp index b4506da2bb2b..d70e824224df 100644 --- a/clang/lib/Serialization/ASTReaderDecl.cpp +++ b/clang/lib/Serialization/ASTReaderDecl.cpp @@ -953,6 +953,10 @@ void ASTDeclReader::VisitFunctionDecl(FunctionDecl *FD) { case FunctionDecl::TK_NonTemplate: mergeRedeclarable(FD, Redecl); break; + case FunctionDecl::TK_DependentNonTemplate: + mergeRedeclarable(FD, Redecl); + FD->setInstantiatedFromDecl(readDeclAs<FunctionDecl>()); + break; case FunctionDecl::TK_FunctionTemplate: // Merged when we merge the template. FD->setDescribedFunctionTemplate(readDeclAs<FunctionTemplateDecl>()); diff --git a/clang/lib/Serialization/ASTWriterDecl.cpp b/clang/lib/Serialization/ASTWriterDecl.cpp index 01f692c9611b..35b8db27bd0e 100644 --- a/clang/lib/Serialization/ASTWriterDecl.cpp +++ b/clang/lib/Serialization/ASTWriterDecl.cpp @@ -585,6 +585,9 @@ void ASTDeclWriter::VisitFunctionDecl(FunctionDecl *D) { switch (D->getTemplatedKind()) { case FunctionDecl::TK_NonTemplate: break; + case FunctionDecl::TK_DependentNonTemplate: + Record.AddDeclRef(D->getInstantiatedFromDecl()); + break; case FunctionDecl::TK_FunctionTemplate: Record.AddDeclRef(D->getDescribedFunctionTemplate()); break; diff --git a/clang/lib/StaticAnalyzer/Checkers/ExprInspectionChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/ExprInspectionChecker.cpp index 1c33648b2b32..ec1b0a70d7d3 100644 --- a/clang/lib/StaticAnalyzer/Checkers/ExprInspectionChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/ExprInspectionChecker.cpp @@ -40,6 +40,7 @@ class ExprInspectionChecker void analyzerNumTimesReached(const CallExpr *CE, CheckerContext &C) const; void analyzerCrash(const CallExpr *CE, CheckerContext &C) const; void analyzerWarnOnDeadSymbol(const CallExpr *CE, CheckerContext &C) const; + void analyzerValue(const CallExpr *CE, CheckerContext &C) const; void analyzerDumpSValType(const CallExpr *CE, CheckerContext &C) const; void analyzerDump(const CallExpr *CE, CheckerContext &C) const; void analyzerExplain(const CallExpr *CE, CheckerContext &C) const; @@ -60,6 +61,7 @@ class ExprInspectionChecker Optional<SVal> ExprVal = None) const; ExplodedNode *reportBug(llvm::StringRef Msg, BugReporter &BR, ExplodedNode *N, Optional<SVal> ExprVal = None) const; + template <typename T> void printAndReport(CheckerContext &C, T What) const; const Expr *getArgExpr(const CallExpr *CE, CheckerContext &C) const; const MemRegion *getArgRegion(const CallExpr *CE, CheckerContext &C) const; @@ -99,6 +101,7 @@ bool ExprInspectionChecker::evalCall(const CallEvent &Call, &ExprInspectionChecker::analyzerDumpExtent) .Case("clang_analyzer_dumpElementCount", &ExprInspectionChecker::analyzerDumpElementCount) + .Case("clang_analyzer_value", &ExprInspectionChecker::analyzerValue) .StartsWith("clang_analyzer_dumpSvalType", &ExprInspectionChecker::analyzerDumpSValType) .StartsWith("clang_analyzer_dump", @@ -258,6 +261,45 @@ void ExprInspectionChecker::analyzerExplain(const CallExpr *CE, reportBug(Ex.Visit(V), C); } +static void printHelper(llvm::raw_svector_ostream &Out, CheckerContext &C, + const llvm::APSInt &I) { + Out << I.getBitWidth() << (I.isUnsigned() ? "u:" : "s:"); + Out << I; +} + +static void printHelper(llvm::raw_svector_ostream &Out, CheckerContext &C, + SymbolRef Sym) { + C.getConstraintManager().printValue(Out, C.getState(), Sym); +} + +static void printHelper(llvm::raw_svector_ostream &Out, CheckerContext &C, + SVal V) { + Out << V; +} + +template <typename T> +void ExprInspectionChecker::printAndReport(CheckerContext &C, T What) const { + llvm::SmallString<64> Str; + llvm::raw_svector_ostream OS(Str); + printHelper(OS, C, What); + reportBug(OS.str(), C); +} + +void ExprInspectionChecker::analyzerValue(const CallExpr *CE, + CheckerContext &C) const { + const Expr *Arg = getArgExpr(CE, C); + if (!Arg) + return; + + SVal V = C.getSVal(Arg); + if (const SymbolRef Sym = V.getAsSymbol()) + printAndReport(C, Sym); + else if (const llvm::APSInt *I = V.getAsInteger()) + printAndReport(C, *I); + else + reportBug("n/a", C); +} + void ExprInspectionChecker::analyzerDumpSValType(const CallExpr *CE, CheckerContext &C) const { const Expr *Arg = getArgExpr(CE, C); @@ -275,11 +317,7 @@ void ExprInspectionChecker::analyzerDump(const CallExpr *CE, return; SVal V = C.getSVal(Arg); - - llvm::SmallString<32> Str; - llvm::raw_svector_ostream OS(Str); - V.dumpToStream(OS); - reportBug(OS.str(), C); + printAndReport(C, V); } void ExprInspectionChecker::analyzerGetExtent(const CallExpr *CE, @@ -303,11 +341,7 @@ void ExprInspectionChecker::analyzerDumpExtent(const CallExpr *CE, DefinedOrUnknownSVal Size = getDynamicExtent(C.getState(), MR, C.getSValBuilder()); - - SmallString<64> Msg; - llvm::raw_svector_ostream Out(Msg); - Out << Size; - reportBug(Out.str(), C); + printAndReport(C, Size); } void ExprInspectionChecker::analyzerDumpElementCount(const CallExpr *CE, @@ -328,11 +362,7 @@ void ExprInspectionChecker::analyzerDumpElementCount(const CallExpr *CE, DefinedOrUnknownSVal ElementCount = getDynamicElementCount(C.getState(), MR, C.getSValBuilder(), ElementTy); - - SmallString<128> Msg; - llvm::raw_svector_ostream Out(Msg); - Out << ElementCount; - reportBug(Out.str(), C); + printAndReport(C, ElementCount); } void ExprInspectionChecker::analyzerPrintState(const CallExpr *CE, diff --git a/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp b/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp index 45af22de50ae..d8f56f2f8cff 100644 --- a/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp +++ b/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp @@ -185,6 +185,17 @@ typedef llvm::ImmutableMap<ConstructedObjectKey, SVal> REGISTER_TRAIT_WITH_PROGRAMSTATE(ObjectsUnderConstruction, ObjectsUnderConstructionMap) +// This trait is responsible for storing the index of the element that is to be +// constructed in the next iteration. As a result a CXXConstructExpr is only +// stored if it is array type. Also the index is the index of the continous +// memory region, which is important for multi-dimensional arrays. E.g:: int +// arr[2][2]; assume arr[1][1] will be the next element under construction, so +// the index is 3. +typedef llvm::ImmutableMap< + std::pair<const CXXConstructExpr *, const LocationContext *>, unsigned> + IndexOfElementToConstructMap; +REGISTER_TRAIT_WITH_PROGRAMSTATE(IndexOfElementToConstruct, + IndexOfElementToConstructMap) //===----------------------------------------------------------------------===// // Engine construction and deletion. //===----------------------------------------------------------------------===// @@ -441,16 +452,65 @@ ProgramStateRef ExprEngine::createTemporaryRegionIfNeeded( return State; } +ProgramStateRef ExprEngine::setIndexOfElementToConstruct( + ProgramStateRef State, const CXXConstructExpr *E, + const LocationContext *LCtx, unsigned Idx) { + auto Key = std::make_pair(E, LCtx->getStackFrame()); + + assert(!State->contains<IndexOfElementToConstruct>(Key) || Idx > 0); + + return State->set<IndexOfElementToConstruct>(Key, Idx); +} + +Optional<unsigned> +ExprEngine::getIndexOfElementToConstruct(ProgramStateRef State, + const CXXConstructExpr *E, + const LocationContext *LCtx) { + + return Optional<unsigned>::create( + State->get<IndexOfElementToConstruct>({E, LCtx->getStackFrame()})); +} + +ProgramStateRef +ExprEngine::removeIndexOfElementToConstruct(ProgramStateRef State, + const CXXConstructExpr *E, + const LocationContext *LCtx) { + auto Key = std::make_pair(E, LCtx->getStackFrame()); + + assert(E && State->contains<IndexOfElementToConstruct>(Key)); + return State->remove<IndexOfElementToConstruct>(Key); +} + ProgramStateRef ExprEngine::addObjectUnderConstruction(ProgramStateRef State, const ConstructionContextItem &Item, const LocationContext *LC, SVal V) { ConstructedObjectKey Key(Item, LC->getStackFrame()); + + const CXXConstructExpr *E = nullptr; + + if (auto DS = dyn_cast_or_null<DeclStmt>(Item.getStmtOrNull())) { + if (auto VD = dyn_cast_or_null<VarDecl>(DS->getSingleDecl())) + E = dyn_cast<CXXConstructExpr>(VD->getInit()); + } + + if (!E && !Item.getStmtOrNull()) { + auto CtorInit = Item.getCXXCtorInitializer(); + E = dyn_cast<CXXConstructExpr>(CtorInit->getInit()); + } + // FIXME: Currently the state might already contain the marker due to // incorrect handling of temporaries bound to default parameters. - assert(!State->get<ObjectsUnderConstruction>(Key) || - Key.getItem().getKind() == - ConstructionContextItem::TemporaryDestructorKind); + // The state will already contain the marker if we construct elements + // in an array, as we visit the same statement multiple times before + // the array declaration. The marker is removed when we exit the + // constructor call. + assert((!State->get<ObjectsUnderConstruction>(Key) || + Key.getItem().getKind() == + ConstructionContextItem::TemporaryDestructorKind || + State->contains<IndexOfElementToConstruct>({E, LC})) && + "The object is already marked as `UnderConstruction`, when it's not " + "supposed to!"); return State->set<ObjectsUnderConstruction>(Key, V); } @@ -582,6 +642,69 @@ printObjectsUnderConstructionJson(raw_ostream &Out, ProgramStateRef State, } } +static void printIndicesOfElementsToConstructJson( + raw_ostream &Out, ProgramStateRef State, const char *NL, + const LocationContext *LCtx, const ASTContext &Context, + unsigned int Space = 0, bool IsDot = false) { + using KeyT = std::pair<const Expr *, const LocationContext *>; + + PrintingPolicy PP = + LCtx->getAnalysisDeclContext()->getASTContext().getPrintingPolicy(); + + ++Space; + bool HasItem = false; + + // Store the last key. + KeyT LastKey; + for (const auto &I : State->get<IndexOfElementToConstruct>()) { + const KeyT &Key = I.first; + if (Key.second != LCtx) + continue; + + if (!HasItem) { + Out << "[" << NL; + HasItem = true; + } + + LastKey = Key; + } + + for (const auto &I : State->get<IndexOfElementToConstruct>()) { + const KeyT &Key = I.first; + unsigned Value = I.second; + if (Key.second != LCtx) + continue; + + Indent(Out, Space, IsDot) << "{ "; + + // Expr + const Expr *E = Key.first; + Out << "\"stmt_id\": " << E->getID(Context); + + // Kind - hack to display the current index + Out << ", \"kind\": \"Cur: " << Value - 1 << "\""; + + // Pretty-print + Out << ", \"pretty\": "; + Out << "\"" << E->getStmtClassName() << " " + << E->getSourceRange().printToString(Context.getSourceManager()) << " '" + << QualType::getAsString(E->getType().split(), PP); + Out << "'\""; + + Out << ", \"value\": \"Next: " << Value << "\" }"; + + if (Key != LastKey) + Out << ','; + Out << NL; + } + + if (HasItem) + Indent(Out, --Space, IsDot) << ']'; // End of "location_context". + else { + Out << "null "; + } +} + void ExprEngine::printJson(raw_ostream &Out, ProgramStateRef State, const LocationContext *LCtx, const char *NL, unsigned int Space, bool IsDot) const { @@ -600,6 +723,23 @@ void ExprEngine::printJson(raw_ostream &Out, ProgramStateRef State, Out << "null," << NL; } + Indent(Out, Space, IsDot) << "\"index_of_element\": "; + if (LCtx && !State->get<IndexOfElementToConstruct>().isEmpty()) { + ++Space; + + auto &Context = getContext(); + Out << '[' << NL; + LCtx->printJson(Out, NL, Space, IsDot, [&](const LocationContext *LC) { + printIndicesOfElementsToConstructJson(Out, State, NL, LC, Context, Space, + IsDot); + }); + + --Space; + Indent(Out, Space, IsDot) << "]," << NL; // End of "index_of_element". + } else { + Out << "null," << NL; + } + getCheckerManager().runCheckersForPrintStateJson(Out, State, NL, Space, IsDot); } @@ -961,8 +1101,9 @@ void ExprEngine::ProcessAutomaticObjDtor(const CFGAutomaticObjDtor Dtor, // This workaround will just run the first destructor (which will still // invalidate the entire array). EvalCallOptions CallOpts; - Region = makeZeroElementRegion(state, loc::MemRegionVal(Region), varType, - CallOpts.IsArrayCtorOrDtor).getAsRegion(); + Region = makeElementRegion(state, loc::MemRegionVal(Region), varType, + CallOpts.IsArrayCtorOrDtor) + .getAsRegion(); VisitCXXDestructor(varType, Region, Dtor.getTriggerStmt(), /*IsBase=*/false, Pred, Dst, CallOpts); @@ -1045,8 +1186,7 @@ void ExprEngine::ProcessMemberDtor(const CFGMemberDtor D, // This workaround will just run the first destructor (which will still // invalidate the entire array). EvalCallOptions CallOpts; - FieldVal = makeZeroElementRegion(State, FieldVal, T, - CallOpts.IsArrayCtorOrDtor); + FieldVal = makeElementRegion(State, FieldVal, T, CallOpts.IsArrayCtorOrDtor); VisitCXXDestructor(T, FieldVal.getAsRegion(), CurDtor->getBody(), /*IsBase=*/false, Pred, Dst, CallOpts); @@ -1105,7 +1245,7 @@ void ExprEngine::ProcessTemporaryDtor(const CFGTemporaryDtor D, CallOpts.IsArrayCtorOrDtor = true; } } else { - // We'd eventually need to makeZeroElementRegion() trick here, + // We'd eventually need to makeElementRegion() trick here, // but for now we don't have the respective construction contexts, // so MR would always be null in this case. Do nothing for now. } diff --git a/clang/lib/StaticAnalyzer/Core/ExprEngineCXX.cpp b/clang/lib/StaticAnalyzer/Core/ExprEngineCXX.cpp index 6d979da2755f..08fac9fb2e69 100644 --- a/clang/lib/StaticAnalyzer/Core/ExprEngineCXX.cpp +++ b/clang/lib/StaticAnalyzer/Core/ExprEngineCXX.cpp @@ -94,15 +94,17 @@ void ExprEngine::performTrivialCopy(NodeBuilder &Bldr, ExplodedNode *Pred, } } - -SVal ExprEngine::makeZeroElementRegion(ProgramStateRef State, SVal LValue, - QualType &Ty, bool &IsArray) { +SVal ExprEngine::makeElementRegion(ProgramStateRef State, SVal LValue, + QualType &Ty, bool &IsArray, unsigned Idx) { SValBuilder &SVB = State->getStateManager().getSValBuilder(); ASTContext &Ctx = SVB.getContext(); - while (const ArrayType *AT = Ctx.getAsArrayType(Ty)) { - Ty = AT->getElementType(); - LValue = State->getLValue(Ty, SVB.makeZeroArrayIndex(), LValue); + if (const ArrayType *AT = Ctx.getAsArrayType(Ty)) { + while (AT) { + Ty = AT->getElementType(); + AT = dyn_cast<ArrayType>(AT->getElementType()); + } + LValue = State->getLValue(Ty, SVB.makeArrayIndex(Idx), LValue); IsArray = true; } @@ -111,7 +113,7 @@ SVal ExprEngine::makeZeroElementRegion(ProgramStateRef State, SVal LValue, SVal ExprEngine::computeObjectUnderConstruction( const Expr *E, ProgramStateRef State, const LocationContext *LCtx, - const ConstructionContext *CC, EvalCallOptions &CallOpts) { + const ConstructionContext *CC, EvalCallOptions &CallOpts, unsigned Idx) { SValBuilder &SVB = getSValBuilder(); MemRegionManager &MRMgr = SVB.getRegionManager(); ASTContext &ACtx = SVB.getContext(); @@ -125,8 +127,8 @@ SVal ExprEngine::computeObjectUnderConstruction( const auto *DS = DSCC->getDeclStmt(); const auto *Var = cast<VarDecl>(DS->getSingleDecl()); QualType Ty = Var->getType(); - return makeZeroElementRegion(State, State->getLValue(Var, LCtx), Ty, - CallOpts.IsArrayCtorOrDtor); + return makeElementRegion(State, State->getLValue(Var, LCtx), Ty, + CallOpts.IsArrayCtorOrDtor, Idx); } case ConstructionContext::CXX17ElidedCopyConstructorInitializerKind: case ConstructionContext::SimpleConstructorInitializerKind: { @@ -158,8 +160,8 @@ SVal ExprEngine::computeObjectUnderConstruction( } QualType Ty = Field->getType(); - return makeZeroElementRegion(State, FieldVal, Ty, - CallOpts.IsArrayCtorOrDtor); + return makeElementRegion(State, FieldVal, Ty, CallOpts.IsArrayCtorOrDtor, + Idx); } case ConstructionContext::NewAllocatedObjectKind: { if (AMgr.getAnalyzerOptions().MayInlineCXXAllocator) { @@ -172,8 +174,12 @@ SVal ExprEngine::computeObjectUnderConstruction( // TODO: In fact, we need to call the constructor for every // allocated element, not just the first one! CallOpts.IsArrayCtorOrDtor = true; - return loc::MemRegionVal(getStoreManager().GetElementZeroRegion( - MR, NE->getType()->getPointeeType())); + + auto R = MRMgr.getElementRegion(NE->getType()->getPointeeType(), + svalBuilder.makeArrayIndex(Idx), MR, + SVB.getContext()); + + return loc::MemRegionVal(R); } return V; } @@ -484,10 +490,6 @@ void ExprEngine::handleConstructor(const Expr *E, } } - // FIXME: Handle arrays, which run the same constructor for every element. - // For now, we just run the first constructor (which should still invalidate - // the entire array). - EvalCallOptions CallOpts; auto C = getCurrentCFGElement().getAs<CFGConstructor>(); assert(C || getCurrentCFGElement().getAs<CFGStmt>()); @@ -500,9 +502,15 @@ void ExprEngine::handleConstructor(const Expr *E, // Inherited constructors are always base class constructors. assert(CE && !CIE && "A complete constructor is inherited?!"); + unsigned Idx = 0; + if (CE->getType()->isArrayType()) { + Idx = getIndexOfElementToConstruct(State, CE, LCtx).value_or(0u); + State = setIndexOfElementToConstruct(State, CE, LCtx, Idx + 1); + } + // The target region is found from construction context. std::tie(State, Target) = - handleConstructionContext(CE, State, LCtx, CC, CallOpts); + handleConstructionContext(CE, State, LCtx, CC, CallOpts, Idx); break; } case CXXConstructExpr::CK_VirtualBase: { @@ -894,14 +902,39 @@ void ExprEngine::VisitCXXNewExpr(const CXXNewExpr *CNE, ExplodedNode *Pred, SVal Result = symVal; if (CNE->isArray()) { - // FIXME: allocating an array requires simulating the constructors. - // For now, just return a symbolicated region. + if (const auto *NewReg = cast_or_null<SubRegion>(symVal.getAsRegion())) { - QualType ObjTy = CNE->getType()->getPointeeType(); + // If each element is initialized by their default constructor, the field + // values are properly placed inside the required region, however if an + // initializer list is used, this doesn't happen automatically. + auto *Init = CNE->getInitializer(); + bool isInitList = dyn_cast_or_null<InitListExpr>(Init); + + QualType ObjTy = + isInitList ? Init->getType() : CNE->getType()->getPointeeType(); const ElementRegion *EleReg = - getStoreManager().GetElementZeroRegion(NewReg, ObjTy); + MRMgr.getElementRegion(ObjTy, svalBuilder.makeArrayIndex(0), NewReg, + svalBuilder.getContext()); Result = loc::MemRegionVal(EleReg); + + // If the array is list initialized, we bind the initializer list to the + // memory region here, otherwise we would lose it. + if (isInitList) { + Bldr.takeNodes(Pred); + Pred = Bldr.generateNode(CNE, Pred, State); + + SVal V = State->getSVal(Init, LCtx); + ExplodedNodeSet evaluated; + evalBind(evaluated, CNE, Pred, Result, V, true); + + Bldr.takeNodes(Pred); + Bldr.addNodes(evaluated); + + Pred = *evaluated.begin(); + State = Pred->getState(); + } } + State = State->BindExpr(CNE, Pred->getLocationContext(), Result); Bldr.generateNode(CNE, Pred, State); return; diff --git a/clang/lib/StaticAnalyzer/Core/ExprEngineCallAndReturn.cpp b/clang/lib/StaticAnalyzer/Core/ExprEngineCallAndReturn.cpp index e1649f0b3df6..ebcca92a3e4e 100644 --- a/clang/lib/StaticAnalyzer/Core/ExprEngineCallAndReturn.cpp +++ b/clang/lib/StaticAnalyzer/Core/ExprEngineCallAndReturn.cpp @@ -227,6 +227,13 @@ void ExprEngine::processCallExit(ExplodedNode *CEBNode) { // Step 2: generate node with bound return value: CEBNode -> BindedRetNode. + // If this variable is set to 'true' the analyzer will evaluate the call + // statement we are about to exit again, instead of continuing the execution + // from the statement after the call. This is useful for non-POD type array + // construction where the CXXConstructExpr is referenced only once in the CFG, + // but we want to evaluate it as many times as many elements the array has. + bool ShouldRepeatCall = false; + // If the callee returns an expression, bind its value to CallExpr. if (CE) { if (const ReturnStmt *RS = dyn_cast_or_null<ReturnStmt>(LastSt)) { @@ -255,6 +262,12 @@ void ExprEngine::processCallExit(ExplodedNode *CEBNode) { SVal ThisV = state->getSVal(This); ThisV = state->getSVal(ThisV.castAs<Loc>()); state = state->BindExpr(CCE, callerCtx, ThisV); + + ShouldRepeatCall = shouldRepeatCtorCall(state, CCE, callerCtx); + + if (!ShouldRepeatCall && + getIndexOfElementToConstruct(state, CCE, callerCtx)) + state = removeIndexOfElementToConstruct(state, CCE, callerCtx); } if (const auto *CNE = dyn_cast<CXXNewExpr>(CE)) { @@ -358,9 +371,10 @@ void ExprEngine::processCallExit(ExplodedNode *CEBNode) { // Enqueue the next element in the block. for (ExplodedNodeSet::iterator PSI = Dst.begin(), PSE = Dst.end(); - PSI != PSE; ++PSI) { - Engine.getWorkList()->enqueue(*PSI, calleeCtx->getCallSiteBlock(), - calleeCtx->getIndex()+1); + PSI != PSE; ++PSI) { + unsigned Idx = calleeCtx->getIndex() + (ShouldRepeatCall ? 0 : 1); + + Engine.getWorkList()->enqueue(*PSI, calleeCtx->getCallSiteBlock(), Idx); } } } @@ -800,8 +814,11 @@ ExprEngine::mayInlineCallKind(const CallEvent &Call, const ExplodedNode *Pred, // initializers for array fields in default move/copy constructors. // We still allow construction into ElementRegion targets when they don't // represent array elements. - if (CallOpts.IsArrayCtorOrDtor) - return CIP_DisallowedOnce; + if (CallOpts.IsArrayCtorOrDtor) { + if (!shouldInlineArrayConstruction( + dyn_cast<ArrayType>(CtorExpr->getType()))) + return CIP_DisallowedOnce; + } // Inlining constructors requires including initializers in the CFG. const AnalysisDeclContext *ADC = CallerSFC->getAnalysisDeclContext(); @@ -852,7 +869,7 @@ ExprEngine::mayInlineCallKind(const CallEvent &Call, const ExplodedNode *Pred, assert(ADC->getCFGBuildOptions().AddImplicitDtors && "No CFG destructors"); (void)ADC; - // FIXME: We don't handle constructors or destructors for arrays properly. + // FIXME: We don't handle destructors for arrays properly. if (CallOpts.IsArrayCtorOrDtor) return CIP_DisallowedOnce; @@ -1065,6 +1082,38 @@ bool ExprEngine::shouldInlineCall(const CallEvent &Call, const Decl *D, return true; } +bool ExprEngine::shouldInlineArrayConstruction(const ArrayType *Type) { + if (!Type) + return false; + + // FIXME: Handle other arrays types. + if (const auto *CAT = dyn_cast<ConstantArrayType>(Type)) { + unsigned Size = getContext().getConstantArrayElementCount(CAT); + + return Size <= AMgr.options.maxBlockVisitOnPath; + } + + return false; +} + +bool ExprEngine::shouldRepeatCtorCall(ProgramStateRef State, + const CXXConstructExpr *E, + const LocationContext *LCtx) { + + if (!E) + return false; + + auto Ty = E->getType(); + + // FIXME: Handle non constant array types + if (const auto *CAT = dyn_cast<ConstantArrayType>(Ty)) { + unsigned Size = getContext().getConstantArrayElementCount(CAT); + return Size > getIndexOfElementToConstruct(State, E, LCtx); + } + + return false; +} + static bool isTrivialObjectAssignment(const CallEvent &Call) { const CXXInstanceCall *ICall = dyn_cast<CXXInstanceCall>(&Call); if (!ICall) diff --git a/clang/lib/StaticAnalyzer/Core/MemRegion.cpp b/clang/lib/StaticAnalyzer/Core/MemRegion.cpp index f0cda835e07c..81c11099e93f 100644 --- a/clang/lib/StaticAnalyzer/Core/MemRegion.cpp +++ b/clang/lib/StaticAnalyzer/Core/MemRegion.cpp @@ -794,7 +794,11 @@ DefinedOrUnknownSVal MemRegionManager::getStaticSize(const MemRegion *MR, if (Size.isZero()) return true; + if (getContext().getLangOpts().StrictFlexArrays >= 2) + return false; + const AnalyzerOptions &Opts = SVB.getAnalyzerOptions(); + // FIXME: this option is probably redundant with -fstrict-flex-arrays=1. if (Opts.ShouldConsiderSingleElementArraysAsFlexibleArrayMembers && Size.isOne()) return true; diff --git a/clang/lib/StaticAnalyzer/Core/RangeConstraintManager.cpp b/clang/lib/StaticAnalyzer/Core/RangeConstraintManager.cpp index e788a7a60830..2d4dfae1e750 100644 --- a/clang/lib/StaticAnalyzer/Core/RangeConstraintManager.cpp +++ b/clang/lib/StaticAnalyzer/Core/RangeConstraintManager.cpp @@ -1213,13 +1213,21 @@ public: } RangeSet VisitSymExpr(SymbolRef Sym) { - // If we got to this function, the actual type of the symbolic + if (Optional<RangeSet> RS = getRangeForNegatedSym(Sym)) + return *RS; + // If we've reached this line, the actual type of the symbolic // expression is not supported for advanced inference. // In this case, we simply backoff to the default "let's simply // infer the range from the expression's type". return infer(Sym->getType()); } + RangeSet VisitUnarySymExpr(const UnarySymExpr *USE) { + if (Optional<RangeSet> RS = getRangeForNegatedUnarySym(USE)) + return *RS; + return infer(USE->getType()); + } + RangeSet VisitSymIntExpr(const SymIntExpr *Sym) { return VisitBinaryOperator(Sym); } @@ -1228,14 +1236,25 @@ public: return VisitBinaryOperator(Sym); } - RangeSet VisitSymSymExpr(const SymSymExpr *Sym) { + RangeSet VisitSymSymExpr(const SymSymExpr *SSE) { return intersect( RangeFactory, + // If Sym is a difference of symbols A - B, then maybe we have range + // set stored for B - A. + // + // If we have range set stored for both A - B and B - A then + // calculate the effective range set by intersecting the range set + // for A - B and the negated range set of B - A. + getRangeForNegatedSymSym(SSE), + // If Sym is a comparison expression (except <=>), + // find any other comparisons with the same operands. + // See function description. + getRangeForComparisonSymbol(SSE), // If Sym is (dis)equality, we might have some information // on that in our equality classes data structure. - getRangeForEqualities(Sym), + getRangeForEqualities(SSE), // And we should always check what we can get from the operands. - VisitBinaryOperator(Sym)); + VisitBinaryOperator(SSE)); } private: @@ -1264,25 +1283,13 @@ private: } RangeSet infer(SymbolRef Sym) { - return intersect( - RangeFactory, - // Of course, we should take the constraint directly associated with - // this symbol into consideration. - getConstraint(State, Sym), - // If Sym is a difference of symbols A - B, then maybe we have range - // set stored for B - A. - // - // If we have range set stored for both A - B and B - A then - // calculate the effective range set by intersecting the range set - // for A - B and the negated range set of B - A. - getRangeForNegatedSub(Sym), - // If Sym is a comparison expression (except <=>), - // find any other comparisons with the same operands. - // See function description. - getRangeForComparisonSymbol(Sym), - // Apart from the Sym itself, we can infer quite a lot if we look - // into subexpressions of Sym. - Visit(Sym)); + return intersect(RangeFactory, + // Of course, we should take the constraint directly + // associated with this symbol into consideration. + getConstraint(State, Sym), + // Apart from the Sym itself, we can infer quite a lot if + // we look into subexpressions of Sym. + Visit(Sym)); } RangeSet infer(EquivalenceClass Class) { @@ -1443,38 +1450,53 @@ private: return RangeFactory.deletePoint(Domain, IntType.getZeroValue()); } - Optional<RangeSet> getRangeForNegatedSub(SymbolRef Sym) { + template <typename ProduceNegatedSymFunc> + Optional<RangeSet> getRangeForNegatedExpr(ProduceNegatedSymFunc F, + QualType T) { // Do not negate if the type cannot be meaningfully negated. - if (!Sym->getType()->isUnsignedIntegerOrEnumerationType() && - !Sym->getType()->isSignedIntegerOrEnumerationType()) + if (!T->isUnsignedIntegerOrEnumerationType() && + !T->isSignedIntegerOrEnumerationType()) return llvm::None; - const RangeSet *NegatedRange = nullptr; - SymbolManager &SymMgr = State->getSymbolManager(); - if (const auto *USE = dyn_cast<UnarySymExpr>(Sym)) { - if (USE->getOpcode() == UO_Minus) { - // Just get the operand when we negate a symbol that is already negated. - // -(-a) == a - NegatedRange = getConstraint(State, USE->getOperand()); - } - } else if (const SymSymExpr *SSE = dyn_cast<SymSymExpr>(Sym)) { - if (SSE->getOpcode() == BO_Sub) { - QualType T = Sym->getType(); - SymbolRef NegatedSym = - SymMgr.getSymSymExpr(SSE->getRHS(), BO_Sub, SSE->getLHS(), T); - NegatedRange = getConstraint(State, NegatedSym); - } - } else { - SymbolRef NegatedSym = - SymMgr.getUnarySymExpr(Sym, UO_Minus, Sym->getType()); - NegatedRange = getConstraint(State, NegatedSym); - } + if (SymbolRef NegatedSym = F()) + if (const RangeSet *NegatedRange = getConstraint(State, NegatedSym)) + return RangeFactory.negate(*NegatedRange); - if (NegatedRange) - return RangeFactory.negate(*NegatedRange); return llvm::None; } + Optional<RangeSet> getRangeForNegatedUnarySym(const UnarySymExpr *USE) { + // Just get the operand when we negate a symbol that is already negated. + // -(-a) == a + return getRangeForNegatedExpr( + [USE]() -> SymbolRef { + if (USE->getOpcode() == UO_Minus) + return USE->getOperand(); + return nullptr; + }, + USE->getType()); + } + + Optional<RangeSet> getRangeForNegatedSymSym(const SymSymExpr *SSE) { + return getRangeForNegatedExpr( + [SSE, State = this->State]() -> SymbolRef { + if (SSE->getOpcode() == BO_Sub) + return State->getSymbolManager().getSymSymExpr( + SSE->getRHS(), BO_Sub, SSE->getLHS(), SSE->getType()); + return nullptr; + }, + SSE->getType()); + } + + Optional<RangeSet> getRangeForNegatedSym(SymbolRef Sym) { + return getRangeForNegatedExpr( + [Sym, State = this->State]() { + return State->getSymbolManager().getUnarySymExpr(Sym, UO_Minus, + Sym->getType()); + }, + Sym->getType()); + } + // Returns ranges only for binary comparison operators (except <=>) // when left and right operands are symbolic values. // Finds any other comparisons with the same operands. @@ -1485,11 +1507,7 @@ private: // It covers all possible combinations (see CmpOpTable description). // Note that `x` and `y` can also stand for subexpressions, // not only for actual symbols. - Optional<RangeSet> getRangeForComparisonSymbol(SymbolRef Sym) { - const auto *SSE = dyn_cast<SymSymExpr>(Sym); - if (!SSE) - return llvm::None; - + Optional<RangeSet> getRangeForComparisonSymbol(const SymSymExpr *SSE) { const BinaryOperatorKind CurrentOP = SSE->getOpcode(); // We currently do not support <=> (C++20). @@ -1801,6 +1819,8 @@ public: void printJson(raw_ostream &Out, ProgramStateRef State, const char *NL = "\n", unsigned int Space = 0, bool IsDot = false) const override; + void printValue(raw_ostream &Out, ProgramStateRef State, + SymbolRef Sym) override; void printConstraints(raw_ostream &Out, ProgramStateRef State, const char *NL = "\n", unsigned int Space = 0, bool IsDot = false) const; @@ -3154,6 +3174,13 @@ void RangeConstraintManager::printJson(raw_ostream &Out, ProgramStateRef State, printDisequalities(Out, State, NL, Space, IsDot); } +void RangeConstraintManager::printValue(raw_ostream &Out, ProgramStateRef State, + SymbolRef Sym) { + const RangeSet RS = getRange(State, Sym); + Out << RS.getBitWidth() << (RS.isUnsigned() ? "u:" : "s:"); + RS.dump(Out); +} + static std::string toString(const SymbolRef &Sym) { std::string S; llvm::raw_string_ostream O(S); diff --git a/clang/lib/StaticAnalyzer/Core/SValBuilder.cpp b/clang/lib/StaticAnalyzer/Core/SValBuilder.cpp index cf3d13ffb7ba..d90e869196eb 100644 --- a/clang/lib/StaticAnalyzer/Core/SValBuilder.cpp +++ b/clang/lib/StaticAnalyzer/Core/SValBuilder.cpp @@ -19,15 +19,16 @@ #include "clang/AST/ExprObjC.h" #include "clang/AST/Stmt.h" #include "clang/AST/Type.h" -#include "clang/Basic/LLVM.h" #include "clang/Analysis/AnalysisDeclContext.h" -#include "clang/StaticAnalyzer/Core/PathSensitive/AnalysisManager.h" +#include "clang/Basic/LLVM.h" #include "clang/StaticAnalyzer/Core/PathSensitive/APSIntType.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/AnalysisManager.h" #include "clang/StaticAnalyzer/Core/PathSensitive/BasicValueFactory.h" #include "clang/StaticAnalyzer/Core/PathSensitive/ExprEngine.h" #include "clang/StaticAnalyzer/Core/PathSensitive/MemRegion.h" #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramState.h" #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramState_Fwd.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/SValVisitor.h" #include "clang/StaticAnalyzer/Core/PathSensitive/SVals.h" #include "clang/StaticAnalyzer/Core/PathSensitive/Store.h" #include "clang/StaticAnalyzer/Core/PathSensitive/SymExpr.h" @@ -617,517 +618,478 @@ SVal SValBuilder::evalIntegralCast(ProgramStateRef state, SVal val, } //===----------------------------------------------------------------------===// -// Cast methods. -// `evalCast` is the main method -// `evalCastKind` and `evalCastSubKind` are helpers +// Cast method. +// `evalCast` and its helper `EvalCastVisitor` //===----------------------------------------------------------------------===// -/// Cast a given SVal to another SVal using given QualType's. -/// \param V -- SVal that should be casted. -/// \param CastTy -- QualType that V should be casted according to. -/// \param OriginalTy -- QualType which is associated to V. It provides -/// additional information about what type the cast performs from. -/// \returns the most appropriate casted SVal. -/// Note: Many cases don't use an exact OriginalTy. It can be extracted -/// from SVal or the cast can performs unconditionaly. Always pass OriginalTy! -/// It can be crucial in certain cases and generates different results. -/// FIXME: If `OriginalTy.isNull()` is true, then cast performs based on CastTy -/// only. This behavior is uncertain and should be improved. -SVal SValBuilder::evalCast(SVal V, QualType CastTy, QualType OriginalTy) { - if (CastTy.isNull()) - return V; - - CastTy = Context.getCanonicalType(CastTy); +namespace { +class EvalCastVisitor : public SValVisitor<EvalCastVisitor, SVal> { +private: + SValBuilder &VB; + ASTContext &Context; + QualType CastTy, OriginalTy; - const bool IsUnknownOriginalType = OriginalTy.isNull(); - if (!IsUnknownOriginalType) { - OriginalTy = Context.getCanonicalType(OriginalTy); +public: + EvalCastVisitor(SValBuilder &VB, QualType CastTy, QualType OriginalTy) + : VB(VB), Context(VB.getContext()), CastTy(CastTy), + OriginalTy(OriginalTy) {} - if (CastTy == OriginalTy) + SVal Visit(SVal V) { + if (CastTy.isNull()) return V; - // FIXME: Move this check to the most appropriate - // evalCastKind/evalCastSubKind function. For const casts, casts to void, - // just propagate the value. - if (!CastTy->isVariableArrayType() && !OriginalTy->isVariableArrayType()) - if (shouldBeModeledWithNoOp(Context, Context.getPointerType(CastTy), - Context.getPointerType(OriginalTy))) - return V; - } - - // Cast SVal according to kinds. - switch (V.getBaseKind()) { - case SVal::UndefinedValKind: - return evalCastKind(V.castAs<UndefinedVal>(), CastTy, OriginalTy); - case SVal::UnknownValKind: - return evalCastKind(V.castAs<UnknownVal>(), CastTy, OriginalTy); - case SVal::LocKind: - return evalCastKind(V.castAs<Loc>(), CastTy, OriginalTy); - case SVal::NonLocKind: - return evalCastKind(V.castAs<NonLoc>(), CastTy, OriginalTy); - } - - llvm_unreachable("Unknown SVal kind"); -} - -SVal SValBuilder::evalCastKind(UndefinedVal V, QualType CastTy, - QualType OriginalTy) { - return V; -} - -SVal SValBuilder::evalCastKind(UnknownVal V, QualType CastTy, - QualType OriginalTy) { - return V; -} - -SVal SValBuilder::evalCastKind(Loc V, QualType CastTy, QualType OriginalTy) { - switch (V.getSubKind()) { - case loc::ConcreteIntKind: - return evalCastSubKind(V.castAs<loc::ConcreteInt>(), CastTy, OriginalTy); - case loc::GotoLabelKind: - return evalCastSubKind(V.castAs<loc::GotoLabel>(), CastTy, OriginalTy); - case loc::MemRegionValKind: - return evalCastSubKind(V.castAs<loc::MemRegionVal>(), CastTy, OriginalTy); - } - - llvm_unreachable("Unknown SVal kind"); -} - -SVal SValBuilder::evalCastKind(NonLoc V, QualType CastTy, QualType OriginalTy) { - switch (V.getSubKind()) { - case nonloc::CompoundValKind: - return evalCastSubKind(V.castAs<nonloc::CompoundVal>(), CastTy, OriginalTy); - case nonloc::ConcreteIntKind: - return evalCastSubKind(V.castAs<nonloc::ConcreteInt>(), CastTy, OriginalTy); - case nonloc::LazyCompoundValKind: - return evalCastSubKind(V.castAs<nonloc::LazyCompoundVal>(), CastTy, - OriginalTy); - case nonloc::LocAsIntegerKind: - return evalCastSubKind(V.castAs<nonloc::LocAsInteger>(), CastTy, - OriginalTy); - case nonloc::SymbolValKind: - return evalCastSubKind(V.castAs<nonloc::SymbolVal>(), CastTy, OriginalTy); - case nonloc::PointerToMemberKind: - return evalCastSubKind(V.castAs<nonloc::PointerToMember>(), CastTy, - OriginalTy); - } + CastTy = Context.getCanonicalType(CastTy); - llvm_unreachable("Unknown SVal kind"); -} + const bool IsUnknownOriginalType = OriginalTy.isNull(); + if (!IsUnknownOriginalType) { + OriginalTy = Context.getCanonicalType(OriginalTy); -SVal SValBuilder::evalCastSubKind(loc::ConcreteInt V, QualType CastTy, - QualType OriginalTy) { - // Pointer to bool. - if (CastTy->isBooleanType()) - return makeTruthVal(V.getValue().getBoolValue(), CastTy); - - // Pointer to integer. - if (CastTy->isIntegralOrEnumerationType()) { - llvm::APSInt Value = V.getValue(); - BasicVals.getAPSIntType(CastTy).apply(Value); - return makeIntVal(Value); - } + if (CastTy == OriginalTy) + return V; - // Pointer to any pointer. - if (Loc::isLocType(CastTy)) { - llvm::APSInt Value = V.getValue(); - BasicVals.getAPSIntType(CastTy).apply(Value); - return loc::ConcreteInt(BasicVals.getValue(Value)); + // FIXME: Move this check to the most appropriate + // evalCastKind/evalCastSubKind function. For const casts, casts to void, + // just propagate the value. + if (!CastTy->isVariableArrayType() && !OriginalTy->isVariableArrayType()) + if (shouldBeModeledWithNoOp(Context, Context.getPointerType(CastTy), + Context.getPointerType(OriginalTy))) + return V; + } + return SValVisitor::Visit(V); } + SVal VisitUndefinedVal(UndefinedVal V) { return V; } + SVal VisitUnknownVal(UnknownVal V) { return V; } + SVal VisitLocConcreteInt(loc::ConcreteInt V) { + // Pointer to bool. + if (CastTy->isBooleanType()) + return VB.makeTruthVal(V.getValue().getBoolValue(), CastTy); + + // Pointer to integer. + if (CastTy->isIntegralOrEnumerationType()) { + llvm::APSInt Value = V.getValue(); + VB.getBasicValueFactory().getAPSIntType(CastTy).apply(Value); + return VB.makeIntVal(Value); + } - // Pointer to whatever else. - return UnknownVal(); -} - -SVal SValBuilder::evalCastSubKind(loc::GotoLabel V, QualType CastTy, - QualType OriginalTy) { - // Pointer to bool. - if (CastTy->isBooleanType()) - // Labels are always true. - return makeTruthVal(true, CastTy); - - // Pointer to integer. - if (CastTy->isIntegralOrEnumerationType()) { - const unsigned BitWidth = Context.getIntWidth(CastTy); - return makeLocAsInteger(V, BitWidth); - } + // Pointer to any pointer. + if (Loc::isLocType(CastTy)) { + llvm::APSInt Value = V.getValue(); + VB.getBasicValueFactory().getAPSIntType(CastTy).apply(Value); + return loc::ConcreteInt(VB.getBasicValueFactory().getValue(Value)); + } - const bool IsUnknownOriginalType = OriginalTy.isNull(); - if (!IsUnknownOriginalType) { - // Array to pointer. - if (isa<ArrayType>(OriginalTy)) - if (CastTy->isPointerType() || CastTy->isReferenceType()) - return UnknownVal(); + // Pointer to whatever else. + return UnknownVal(); } - - // Pointer to any pointer. - if (Loc::isLocType(CastTy)) - return V; - - // Pointer to whatever else. - return UnknownVal(); -} - -static bool hasSameUnqualifiedPointeeType(QualType ty1, QualType ty2) { - return ty1->getPointeeType().getCanonicalType().getTypePtr() == - ty2->getPointeeType().getCanonicalType().getTypePtr(); -} - -SVal SValBuilder::evalCastSubKind(loc::MemRegionVal V, QualType CastTy, - QualType OriginalTy) { - // Pointer to bool. - if (CastTy->isBooleanType()) { - const MemRegion *R = V.getRegion(); - if (const FunctionCodeRegion *FTR = dyn_cast<FunctionCodeRegion>(R)) - if (const FunctionDecl *FD = dyn_cast<FunctionDecl>(FTR->getDecl())) - if (FD->isWeak()) - // FIXME: Currently we are using an extent symbol here, - // because there are no generic region address metadata - // symbols to use, only content metadata. - return nonloc::SymbolVal(SymMgr.getExtentSymbol(FTR)); - - if (const SymbolicRegion *SymR = R->getSymbolicBase()) { - SymbolRef Sym = SymR->getSymbol(); - QualType Ty = Sym->getType(); - // This change is needed for architectures with varying - // pointer widths. See the amdgcn opencl reproducer with - // this change as an example: solver-sym-simplification-ptr-bool.cl - if (!Ty->isReferenceType()) - return makeNonLoc(Sym, BO_NE, BasicVals.getZeroWithTypeSize(Ty), - CastTy); + SVal VisitLocGotoLabel(loc::GotoLabel V) { + // Pointer to bool. + if (CastTy->isBooleanType()) + // Labels are always true. + return VB.makeTruthVal(true, CastTy); + + // Pointer to integer. + if (CastTy->isIntegralOrEnumerationType()) { + const unsigned BitWidth = Context.getIntWidth(CastTy); + return VB.makeLocAsInteger(V, BitWidth); } - // Non-symbolic memory regions are always true. - return makeTruthVal(true, CastTy); - } - const bool IsUnknownOriginalType = OriginalTy.isNull(); - // Try to cast to array - const auto *ArrayTy = - IsUnknownOriginalType - ? nullptr - : dyn_cast<ArrayType>(OriginalTy.getCanonicalType()); - - // Pointer to integer. - if (CastTy->isIntegralOrEnumerationType()) { - SVal Val = V; - // Array to integer. - if (ArrayTy) { - // We will always decay to a pointer. - QualType ElemTy = ArrayTy->getElementType(); - Val = StateMgr.ArrayToPointer(V, ElemTy); - // FIXME: Keep these here for now in case we decide soon that we - // need the original decayed type. - // QualType elemTy = cast<ArrayType>(originalTy)->getElementType(); - // QualType pointerTy = C.getPointerType(elemTy); + const bool IsUnknownOriginalType = OriginalTy.isNull(); + if (!IsUnknownOriginalType) { + // Array to pointer. + if (isa<ArrayType>(OriginalTy)) + if (CastTy->isPointerType() || CastTy->isReferenceType()) + return UnknownVal(); } - const unsigned BitWidth = Context.getIntWidth(CastTy); - return makeLocAsInteger(Val.castAs<Loc>(), BitWidth); - } - // Pointer to pointer. - if (Loc::isLocType(CastTy)) { - - if (IsUnknownOriginalType) { - // When retrieving symbolic pointer and expecting a non-void pointer, - // wrap them into element regions of the expected type if necessary. - // It is necessary to make sure that the retrieved value makes sense, - // because there's no other cast in the AST that would tell us to cast - // it to the correct pointer type. We might need to do that for non-void - // pointers as well. - // FIXME: We really need a single good function to perform casts for us - // correctly every time we need it. + // Pointer to any pointer. + if (Loc::isLocType(CastTy)) + return V; + + // Pointer to whatever else. + return UnknownVal(); + } + SVal VisitLocMemRegionVal(loc::MemRegionVal V) { + // Pointer to bool. + if (CastTy->isBooleanType()) { const MemRegion *R = V.getRegion(); - if (CastTy->isPointerType() && !CastTy->isVoidPointerType()) { - if (const auto *SR = dyn_cast<SymbolicRegion>(R)) { - QualType SRTy = SR->getSymbol()->getType(); - if (!hasSameUnqualifiedPointeeType(SRTy, CastTy)) { - if (auto OptMemRegV = getCastedMemRegionVal(SR, CastTy)) - return *OptMemRegV; - } - } - } - // Next fixes pointer dereference using type different from its initial - // one. See PR37503 and PR49007 for details. - if (const auto *ER = dyn_cast<ElementRegion>(R)) { - if (auto OptMemRegV = getCastedMemRegionVal(ER, CastTy)) - return *OptMemRegV; + if (const FunctionCodeRegion *FTR = dyn_cast<FunctionCodeRegion>(R)) + if (const FunctionDecl *FD = dyn_cast<FunctionDecl>(FTR->getDecl())) + if (FD->isWeak()) + // FIXME: Currently we are using an extent symbol here, + // because there are no generic region address metadata + // symbols to use, only content metadata. + return nonloc::SymbolVal( + VB.getSymbolManager().getExtentSymbol(FTR)); + + if (const SymbolicRegion *SymR = R->getSymbolicBase()) { + SymbolRef Sym = SymR->getSymbol(); + QualType Ty = Sym->getType(); + // This change is needed for architectures with varying + // pointer widths. See the amdgcn opencl reproducer with + // this change as an example: solver-sym-simplification-ptr-bool.cl + if (!Ty->isReferenceType()) + return VB.makeNonLoc( + Sym, BO_NE, VB.getBasicValueFactory().getZeroWithTypeSize(Ty), + CastTy); } - - return V; + // Non-symbolic memory regions are always true. + return VB.makeTruthVal(true, CastTy); } - if (OriginalTy->isIntegralOrEnumerationType() || - OriginalTy->isBlockPointerType() || OriginalTy->isFunctionPointerType()) - return V; - - // Array to pointer. - if (ArrayTy) { - // Are we casting from an array to a pointer? If so just pass on - // the decayed value. - if (CastTy->isPointerType() || CastTy->isReferenceType()) { + const bool IsUnknownOriginalType = OriginalTy.isNull(); + // Try to cast to array + const auto *ArrayTy = + IsUnknownOriginalType + ? nullptr + : dyn_cast<ArrayType>(OriginalTy.getCanonicalType()); + + // Pointer to integer. + if (CastTy->isIntegralOrEnumerationType()) { + SVal Val = V; + // Array to integer. + if (ArrayTy) { // We will always decay to a pointer. QualType ElemTy = ArrayTy->getElementType(); - return StateMgr.ArrayToPointer(V, ElemTy); + Val = VB.getStateManager().ArrayToPointer(V, ElemTy); + // FIXME: Keep these here for now in case we decide soon that we + // need the original decayed type. + // QualType elemTy = cast<ArrayType>(originalTy)->getElementType(); + // QualType pointerTy = C.getPointerType(elemTy); } - // Are we casting from an array to an integer? If so, cast the decayed - // pointer value to an integer. - assert(CastTy->isIntegralOrEnumerationType()); + const unsigned BitWidth = Context.getIntWidth(CastTy); + return VB.makeLocAsInteger(Val.castAs<Loc>(), BitWidth); } - // Other pointer to pointer. - assert(Loc::isLocType(OriginalTy) || OriginalTy->isFunctionType() || - CastTy->isReferenceType()); + // Pointer to pointer. + if (Loc::isLocType(CastTy)) { - // We get a symbolic function pointer for a dereference of a function - // pointer, but it is of function type. Example: + if (IsUnknownOriginalType) { + // When retrieving symbolic pointer and expecting a non-void pointer, + // wrap them into element regions of the expected type if necessary. + // It is necessary to make sure that the retrieved value makes sense, + // because there's no other cast in the AST that would tell us to cast + // it to the correct pointer type. We might need to do that for non-void + // pointers as well. + // FIXME: We really need a single good function to perform casts for us + // correctly every time we need it. + const MemRegion *R = V.getRegion(); + if (CastTy->isPointerType() && !CastTy->isVoidPointerType()) { + if (const auto *SR = dyn_cast<SymbolicRegion>(R)) { + QualType SRTy = SR->getSymbol()->getType(); + + auto HasSameUnqualifiedPointeeType = [](QualType ty1, + QualType ty2) { + return ty1->getPointeeType().getCanonicalType().getTypePtr() == + ty2->getPointeeType().getCanonicalType().getTypePtr(); + }; + if (!HasSameUnqualifiedPointeeType(SRTy, CastTy)) { + if (auto OptMemRegV = VB.getCastedMemRegionVal(SR, CastTy)) + return *OptMemRegV; + } + } + } + // Next fixes pointer dereference using type different from its initial + // one. See PR37503 and PR49007 for details. + if (const auto *ER = dyn_cast<ElementRegion>(R)) { + if (auto OptMemRegV = VB.getCastedMemRegionVal(ER, CastTy)) + return *OptMemRegV; + } - // struct FPRec { - // void (*my_func)(int * x); - // }; - // - // int bar(int x); - // - // int f1_a(struct FPRec* foo) { - // int x; - // (*foo->my_func)(&x); - // return bar(x)+1; // no-warning - // } - - // Get the result of casting a region to a different type. - const MemRegion *R = V.getRegion(); - if (auto OptMemRegV = getCastedMemRegionVal(R, CastTy)) - return *OptMemRegV; - } + return V; + } - // Pointer to whatever else. - // FIXME: There can be gross cases where one casts the result of a - // function (that returns a pointer) to some other value that happens to - // fit within that pointer value. We currently have no good way to model - // such operations. When this happens, the underlying operation is that - // the caller is reasoning about bits. Conceptually we are layering a - // "view" of a location on top of those bits. Perhaps we need to be more - // lazy about mutual possible views, even on an SVal? This may be - // necessary for bit-level reasoning as well. - return UnknownVal(); -} + if (OriginalTy->isIntegralOrEnumerationType() || + OriginalTy->isBlockPointerType() || + OriginalTy->isFunctionPointerType()) + return V; -SVal SValBuilder::evalCastSubKind(nonloc::CompoundVal V, QualType CastTy, - QualType OriginalTy) { - // Compound to whatever. - return UnknownVal(); -} + // Array to pointer. + if (ArrayTy) { + // Are we casting from an array to a pointer? If so just pass on + // the decayed value. + if (CastTy->isPointerType() || CastTy->isReferenceType()) { + // We will always decay to a pointer. + QualType ElemTy = ArrayTy->getElementType(); + return VB.getStateManager().ArrayToPointer(V, ElemTy); + } + // Are we casting from an array to an integer? If so, cast the decayed + // pointer value to an integer. + assert(CastTy->isIntegralOrEnumerationType()); + } -SVal SValBuilder::evalCastSubKind(nonloc::ConcreteInt V, QualType CastTy, - QualType OriginalTy) { - auto CastedValue = [V, CastTy, this]() { - llvm::APSInt Value = V.getValue(); - BasicVals.getAPSIntType(CastTy).apply(Value); - return Value; - }; + // Other pointer to pointer. + assert(Loc::isLocType(OriginalTy) || OriginalTy->isFunctionType() || + CastTy->isReferenceType()); - // Integer to bool. - if (CastTy->isBooleanType()) - return makeTruthVal(V.getValue().getBoolValue(), CastTy); + // We get a symbolic function pointer for a dereference of a function + // pointer, but it is of function type. Example: + + // struct FPRec { + // void (*my_func)(int * x); + // }; + // + // int bar(int x); + // + // int f1_a(struct FPRec* foo) { + // int x; + // (*foo->my_func)(&x); + // return bar(x)+1; // no-warning + // } + + // Get the result of casting a region to a different type. + const MemRegion *R = V.getRegion(); + if (auto OptMemRegV = VB.getCastedMemRegionVal(R, CastTy)) + return *OptMemRegV; + } - // Integer to pointer. - if (CastTy->isIntegralOrEnumerationType()) - return makeIntVal(CastedValue()); + // Pointer to whatever else. + // FIXME: There can be gross cases where one casts the result of a + // function (that returns a pointer) to some other value that happens to + // fit within that pointer value. We currently have no good way to model + // such operations. When this happens, the underlying operation is that + // the caller is reasoning about bits. Conceptually we are layering a + // "view" of a location on top of those bits. Perhaps we need to be more + // lazy about mutual possible views, even on an SVal? This may be + // necessary for bit-level reasoning as well. + return UnknownVal(); + } + SVal VisitNonLocCompoundVal(nonloc::CompoundVal V) { + // Compound to whatever. + return UnknownVal(); + } + SVal VisitNonLocConcreteInt(nonloc::ConcreteInt V) { + auto CastedValue = [V, this]() { + llvm::APSInt Value = V.getValue(); + VB.getBasicValueFactory().getAPSIntType(CastTy).apply(Value); + return Value; + }; - // Integer to pointer. - if (Loc::isLocType(CastTy)) - return makeIntLocVal(CastedValue()); + // Integer to bool. + if (CastTy->isBooleanType()) + return VB.makeTruthVal(V.getValue().getBoolValue(), CastTy); - // Pointer to whatever else. - return UnknownVal(); -} + // Integer to pointer. + if (CastTy->isIntegralOrEnumerationType()) + return VB.makeIntVal(CastedValue()); -SVal SValBuilder::evalCastSubKind(nonloc::LazyCompoundVal V, QualType CastTy, - QualType OriginalTy) { - // Compound to whatever. - return UnknownVal(); -} + // Integer to pointer. + if (Loc::isLocType(CastTy)) + return VB.makeIntLocVal(CastedValue()); -SVal SValBuilder::evalCastSubKind(nonloc::LocAsInteger V, QualType CastTy, - QualType OriginalTy) { - Loc L = V.getLoc(); - - // Pointer as integer to bool. - if (CastTy->isBooleanType()) - // Pass to Loc function. - return evalCastKind(L, CastTy, OriginalTy); - - const bool IsUnknownOriginalType = OriginalTy.isNull(); - // Pointer as integer to pointer. - if (!IsUnknownOriginalType && Loc::isLocType(CastTy) && - OriginalTy->isIntegralOrEnumerationType()) { - if (const MemRegion *R = L.getAsRegion()) - if (auto OptMemRegV = getCastedMemRegionVal(R, CastTy)) - return *OptMemRegV; - return L; + // Pointer to whatever else. + return UnknownVal(); } - - // Pointer as integer with region to integer/pointer. - const MemRegion *R = L.getAsRegion(); - if (!IsUnknownOriginalType && R) { - if (CastTy->isIntegralOrEnumerationType()) - return evalCastSubKind(loc::MemRegionVal(R), CastTy, OriginalTy); - - if (Loc::isLocType(CastTy)) { - assert(Loc::isLocType(OriginalTy) || OriginalTy->isFunctionType() || - CastTy->isReferenceType()); - // Delegate to store manager to get the result of casting a region to a - // different type. If the MemRegion* returned is NULL, this expression - // Evaluates to UnknownVal. - if (auto OptMemRegV = getCastedMemRegionVal(R, CastTy)) - return *OptMemRegV; - } - } else { - if (Loc::isLocType(CastTy)) { - if (IsUnknownOriginalType) - return evalCastSubKind(loc::MemRegionVal(R), CastTy, OriginalTy); + SVal VisitNonLocLazyCompoundVal(nonloc::LazyCompoundVal V) { + // LazyCompound to whatever. + return UnknownVal(); + } + SVal VisitNonLocLocAsInteger(nonloc::LocAsInteger V) { + Loc L = V.getLoc(); + + // Pointer as integer to bool. + if (CastTy->isBooleanType()) + // Pass to Loc function. + return Visit(L); + + const bool IsUnknownOriginalType = OriginalTy.isNull(); + // Pointer as integer to pointer. + if (!IsUnknownOriginalType && Loc::isLocType(CastTy) && + OriginalTy->isIntegralOrEnumerationType()) { + if (const MemRegion *R = L.getAsRegion()) + if (auto OptMemRegV = VB.getCastedMemRegionVal(R, CastTy)) + return *OptMemRegV; return L; } - SymbolRef SE = nullptr; - if (R) { - if (const SymbolicRegion *SR = - dyn_cast<SymbolicRegion>(R->StripCasts())) { - SE = SR->getSymbol(); + // Pointer as integer with region to integer/pointer. + const MemRegion *R = L.getAsRegion(); + if (!IsUnknownOriginalType && R) { + if (CastTy->isIntegralOrEnumerationType()) + return VisitLocMemRegionVal(loc::MemRegionVal(R)); + + if (Loc::isLocType(CastTy)) { + assert(Loc::isLocType(OriginalTy) || OriginalTy->isFunctionType() || + CastTy->isReferenceType()); + // Delegate to store manager to get the result of casting a region to a + // different type. If the MemRegion* returned is NULL, this expression + // Evaluates to UnknownVal. + if (auto OptMemRegV = VB.getCastedMemRegionVal(R, CastTy)) + return *OptMemRegV; + } + } else { + if (Loc::isLocType(CastTy)) { + if (IsUnknownOriginalType) + return VisitLocMemRegionVal(loc::MemRegionVal(R)); + return L; } - } - - if (!CastTy->isFloatingType() || !SE || SE->getType()->isFloatingType()) { - // FIXME: Correctly support promotions/truncations. - const unsigned CastSize = Context.getIntWidth(CastTy); - if (CastSize == V.getNumBits()) - return V; - return makeLocAsInteger(L, CastSize); - } - } + SymbolRef SE = nullptr; + if (R) { + if (const SymbolicRegion *SR = + dyn_cast<SymbolicRegion>(R->StripCasts())) { + SE = SR->getSymbol(); + } + } - // Pointer as integer to whatever else. - return UnknownVal(); -} + if (!CastTy->isFloatingType() || !SE || SE->getType()->isFloatingType()) { + // FIXME: Correctly support promotions/truncations. + const unsigned CastSize = Context.getIntWidth(CastTy); + if (CastSize == V.getNumBits()) + return V; -SVal SValBuilder::evalCastSubKind(nonloc::SymbolVal V, QualType CastTy, - QualType OriginalTy) { - SymbolRef SE = V.getSymbol(); - - const bool IsUnknownOriginalType = OriginalTy.isNull(); - // Symbol to bool. - if (!IsUnknownOriginalType && CastTy->isBooleanType()) { - // Non-float to bool. - if (Loc::isLocType(OriginalTy) || - OriginalTy->isIntegralOrEnumerationType() || - OriginalTy->isMemberPointerType()) { - BasicValueFactory &BVF = getBasicValueFactory(); - return makeNonLoc(SE, BO_NE, BVF.getValue(0, SE->getType()), CastTy); + return VB.makeLocAsInteger(L, CastSize); + } } - } else { - // Symbol to integer, float. - QualType T = Context.getCanonicalType(SE->getType()); - // Produce SymbolCast if CastTy and T are different integers. - // NOTE: In the end the type of SymbolCast shall be equal to CastTy. - if (T->isIntegralOrUnscopedEnumerationType() && - CastTy->isIntegralOrUnscopedEnumerationType()) { - AnalyzerOptions &Opts = - StateMgr.getOwningEngine().getAnalysisManager().getAnalyzerOptions(); - // If appropriate option is disabled, ignore the cast. - // NOTE: ShouldSupportSymbolicIntegerCasts is `false` by default. - if (!Opts.ShouldSupportSymbolicIntegerCasts) - return V; - return simplifySymbolCast(V, CastTy); + // Pointer as integer to whatever else. + return UnknownVal(); + } + SVal VisitNonLocSymbolVal(nonloc::SymbolVal V) { + SymbolRef SE = V.getSymbol(); + + const bool IsUnknownOriginalType = OriginalTy.isNull(); + // Symbol to bool. + if (!IsUnknownOriginalType && CastTy->isBooleanType()) { + // Non-float to bool. + if (Loc::isLocType(OriginalTy) || + OriginalTy->isIntegralOrEnumerationType() || + OriginalTy->isMemberPointerType()) { + BasicValueFactory &BVF = VB.getBasicValueFactory(); + return VB.makeNonLoc(SE, BO_NE, BVF.getValue(0, SE->getType()), CastTy); + } + } else { + // Symbol to integer, float. + QualType T = Context.getCanonicalType(SE->getType()); + + // Produce SymbolCast if CastTy and T are different integers. + // NOTE: In the end the type of SymbolCast shall be equal to CastTy. + if (T->isIntegralOrUnscopedEnumerationType() && + CastTy->isIntegralOrUnscopedEnumerationType()) { + AnalyzerOptions &Opts = VB.getStateManager() + .getOwningEngine() + .getAnalysisManager() + .getAnalyzerOptions(); + // If appropriate option is disabled, ignore the cast. + // NOTE: ShouldSupportSymbolicIntegerCasts is `false` by default. + if (!Opts.ShouldSupportSymbolicIntegerCasts) + return V; + return simplifySymbolCast(V, CastTy); + } + if (!Loc::isLocType(CastTy)) + if (!IsUnknownOriginalType || !CastTy->isFloatingType() || + T->isFloatingType()) + return VB.makeNonLoc(SE, T, CastTy); } - if (!Loc::isLocType(CastTy)) - if (!IsUnknownOriginalType || !CastTy->isFloatingType() || - T->isFloatingType()) - return makeNonLoc(SE, T, CastTy); + + // Symbol to pointer and whatever else. + return UnknownVal(); + } + SVal VisitNonLocPointerToMember(nonloc::PointerToMember V) { + // Member pointer to whatever. + return V; } - // Symbol to pointer and whatever else. - return UnknownVal(); -} + /// Reduce cast expression by removing redundant intermediate casts. + /// E.g. + /// - (char)(short)(int x) -> (char)(int x) + /// - (int)(int x) -> int x + /// + /// \param V -- SymbolVal, which pressumably contains SymbolCast or any symbol + /// that is applicable for cast operation. + /// \param CastTy -- QualType, which `V` shall be cast to. + /// \return SVal with simplified cast expression. + /// \note: Currently only support integral casts. + nonloc::SymbolVal simplifySymbolCast(nonloc::SymbolVal V, QualType CastTy) { + // We use seven conditions to recognize a simplification case. + // For the clarity let `CastTy` be `C`, SE->getType() - `T`, root type - + // `R`, prefix `u` for unsigned, `s` for signed, no prefix - any sign: E.g. + // (char)(short)(uint x) + // ( sC )( sT )( uR x) + // + // C === R (the same type) + // (char)(char x) -> (char x) + // (long)(long x) -> (long x) + // Note: Comparisons operators below are for bit width. + // C == T + // (short)(short)(int x) -> (short)(int x) + // (int)(long)(char x) -> (int)(char x) (sizeof(long) == sizeof(int)) + // (long)(ullong)(char x) -> (long)(char x) (sizeof(long) == + // sizeof(ullong)) + // C < T + // (short)(int)(char x) -> (short)(char x) + // (char)(int)(short x) -> (char)(short x) + // (short)(int)(short x) -> (short x) + // C > T > uR + // (int)(short)(uchar x) -> (int)(uchar x) + // (uint)(short)(uchar x) -> (uint)(uchar x) + // (int)(ushort)(uchar x) -> (int)(uchar x) + // C > sT > sR + // (int)(short)(char x) -> (int)(char x) + // (uint)(short)(char x) -> (uint)(char x) + // C > sT == sR + // (int)(char)(char x) -> (int)(char x) + // (uint)(short)(short x) -> (uint)(short x) + // C > uT == uR + // (int)(uchar)(uchar x) -> (int)(uchar x) + // (uint)(ushort)(ushort x) -> (uint)(ushort x) + // (llong)(ulong)(uint x) -> (llong)(uint x) (sizeof(ulong) == + // sizeof(uint)) + + SymbolRef SE = V.getSymbol(); + QualType T = Context.getCanonicalType(SE->getType()); -SVal SValBuilder::evalCastSubKind(nonloc::PointerToMember V, QualType CastTy, - QualType OriginalTy) { - // Member pointer to whatever. - return V; -} + if (T == CastTy) + return V; -nonloc::SymbolVal SValBuilder::simplifySymbolCast(nonloc::SymbolVal V, - QualType CastTy) { - // We use seven conditions to recognize a simplification case. - // For the clarity let `CastTy` be `C`, SE->getType() - `T`, root type - `R`, - // prefix `u` for unsigned, `s` for signed, no prefix - any sign: - // E.g. (char)(short)(uint x) - // ( sC )( sT )( uR x) - // - // C === R (the same type) - // (char)(char x) -> (char x) - // (long)(long x) -> (long x) - // Note: Comparisons operators below are for bit width. - // C == T - // (short)(short)(int x) -> (short)(int x) - // (int)(long)(char x) -> (int)(char x) (sizeof(long) == sizeof(int)) - // (long)(ullong)(char x) -> (long)(char x) (sizeof(long) == sizeof(ullong)) - // C < T - // (short)(int)(char x) -> (short)(char x) - // (char)(int)(short x) -> (char)(short x) - // (short)(int)(short x) -> (short x) - // C > T > uR - // (int)(short)(uchar x) -> (int)(uchar x) - // (uint)(short)(uchar x) -> (uint)(uchar x) - // (int)(ushort)(uchar x) -> (int)(uchar x) - // C > sT > sR - // (int)(short)(char x) -> (int)(char x) - // (uint)(short)(char x) -> (uint)(char x) - // C > sT == sR - // (int)(char)(char x) -> (int)(char x) - // (uint)(short)(short x) -> (uint)(short x) - // C > uT == uR - // (int)(uchar)(uchar x) -> (int)(uchar x) - // (uint)(ushort)(ushort x) -> (uint)(ushort x) - // (llong)(ulong)(uint x) -> (llong)(uint x) (sizeof(ulong) == sizeof(uint)) - - SymbolRef SE = V.getSymbol(); - QualType T = Context.getCanonicalType(SE->getType()); - - if (T == CastTy) - return V; + if (!isa<SymbolCast>(SE)) + return VB.makeNonLoc(SE, T, CastTy); - if (!isa<SymbolCast>(SE)) - return makeNonLoc(SE, T, CastTy); + SymbolRef RootSym = cast<SymbolCast>(SE)->getOperand(); + QualType RT = RootSym->getType().getCanonicalType(); - SymbolRef RootSym = cast<SymbolCast>(SE)->getOperand(); - QualType RT = RootSym->getType().getCanonicalType(); + // FIXME support simplification from non-integers. + if (!RT->isIntegralOrEnumerationType()) + return VB.makeNonLoc(SE, T, CastTy); - // FIXME support simplification from non-integers. - if (!RT->isIntegralOrEnumerationType()) - return makeNonLoc(SE, T, CastTy); + BasicValueFactory &BVF = VB.getBasicValueFactory(); + APSIntType CTy = BVF.getAPSIntType(CastTy); + APSIntType TTy = BVF.getAPSIntType(T); - BasicValueFactory &BVF = getBasicValueFactory(); - APSIntType CTy = BVF.getAPSIntType(CastTy); - APSIntType TTy = BVF.getAPSIntType(T); + const auto WC = CTy.getBitWidth(); + const auto WT = TTy.getBitWidth(); - const auto WC = CTy.getBitWidth(); - const auto WT = TTy.getBitWidth(); + if (WC <= WT) { + const bool isSameType = (RT == CastTy); + if (isSameType) + return nonloc::SymbolVal(RootSym); + return VB.makeNonLoc(RootSym, RT, CastTy); + } - if (WC <= WT) { - const bool isSameType = (RT == CastTy); - if (isSameType) - return nonloc::SymbolVal(RootSym); - return makeNonLoc(RootSym, RT, CastTy); - } + APSIntType RTy = BVF.getAPSIntType(RT); + const auto WR = RTy.getBitWidth(); + const bool UT = TTy.isUnsigned(); + const bool UR = RTy.isUnsigned(); - APSIntType RTy = BVF.getAPSIntType(RT); - const auto WR = RTy.getBitWidth(); - const bool UT = TTy.isUnsigned(); - const bool UR = RTy.isUnsigned(); + if (((WT > WR) && (UR || !UT)) || ((WT == WR) && (UT == UR))) + return VB.makeNonLoc(RootSym, RT, CastTy); - if (((WT > WR) && (UR || !UT)) || ((WT == WR) && (UT == UR))) - return makeNonLoc(RootSym, RT, CastTy); + return VB.makeNonLoc(SE, T, CastTy); + } +}; +} // end anonymous namespace - return makeNonLoc(SE, T, CastTy); +/// Cast a given SVal to another SVal using given QualType's. +/// \param V -- SVal that should be casted. +/// \param CastTy -- QualType that V should be casted according to. +/// \param OriginalTy -- QualType which is associated to V. It provides +/// additional information about what type the cast performs from. +/// \returns the most appropriate casted SVal. +/// Note: Many cases don't use an exact OriginalTy. It can be extracted +/// from SVal or the cast can performs unconditionaly. Always pass OriginalTy! +/// It can be crucial in certain cases and generates different results. +/// FIXME: If `OriginalTy.isNull()` is true, then cast performs based on CastTy +/// only. This behavior is uncertain and should be improved. +SVal SValBuilder::evalCast(SVal V, QualType CastTy, QualType OriginalTy) { + EvalCastVisitor TRV{*this, CastTy, OriginalTy}; + return TRV.Visit(V); } diff --git a/clang/lib/StaticAnalyzer/Core/SVals.cpp b/clang/lib/StaticAnalyzer/Core/SVals.cpp index 67913a55b3dc..31725926cd0d 100644 --- a/clang/lib/StaticAnalyzer/Core/SVals.cpp +++ b/clang/lib/StaticAnalyzer/Core/SVals.cpp @@ -109,6 +109,14 @@ SymbolRef SVal::getAsSymbol(bool IncludeBaseRegions) const { return getAsLocSymbol(IncludeBaseRegions); } +const llvm::APSInt *SVal::getAsInteger() const { + if (auto CI = getAs<nonloc::ConcreteInt>()) + return &CI->getValue(); + if (auto CI = getAs<loc::ConcreteInt>()) + return &CI->getValue(); + return nullptr; +} + const MemRegion *SVal::getAsRegion() const { if (Optional<loc::MemRegionVal> X = getAs<loc::MemRegionVal>()) return X->getRegion(); @@ -136,6 +144,8 @@ public: } template <class ConcreteInt> QualType VisitConcreteInt(ConcreteInt CI) { const llvm::APSInt &Value = CI.getValue(); + if (1 == Value.getBitWidth()) + return Context.BoolTy; return Context.getIntTypeForBitwidth(Value.getBitWidth(), Value.isSigned()); } QualType VisitLocConcreteInt(loc::ConcreteInt CI) { diff --git a/clang/lib/Tooling/DependencyScanning/DependencyScanningTool.cpp b/clang/lib/Tooling/DependencyScanning/DependencyScanningTool.cpp index 43127ea2df98..411fd9676ffd 100644 --- a/clang/lib/Tooling/DependencyScanning/DependencyScanningTool.cpp +++ b/clang/lib/Tooling/DependencyScanning/DependencyScanningTool.cpp @@ -50,8 +50,9 @@ FullDependencies::getCommandLineWithoutModulePaths() const { } DependencyScanningTool::DependencyScanningTool( - DependencyScanningService &Service) - : Worker(Service) {} + DependencyScanningService &Service, + llvm::IntrusiveRefCntPtr<llvm::vfs::FileSystem> FS) + : Worker(Service, std::move(FS)) {} llvm::Expected<std::string> DependencyScanningTool::getDependencyFile( const std::vector<std::string> &CommandLine, StringRef CWD, diff --git a/clang/lib/Tooling/DependencyScanning/DependencyScanningWorker.cpp b/clang/lib/Tooling/DependencyScanning/DependencyScanningWorker.cpp index e7d1375c83f0..474808d888ec 100644 --- a/clang/lib/Tooling/DependencyScanning/DependencyScanningWorker.cpp +++ b/clang/lib/Tooling/DependencyScanning/DependencyScanningWorker.cpp @@ -264,7 +264,8 @@ private: } // end anonymous namespace DependencyScanningWorker::DependencyScanningWorker( - DependencyScanningService &Service) + DependencyScanningService &Service, + llvm::IntrusiveRefCntPtr<llvm::vfs::FileSystem> FS) : Format(Service.getFormat()), OptimizeArgs(Service.canOptimizeArgs()) { PCHContainerOps = std::make_shared<PCHContainerOperations>(); PCHContainerOps->registerReader( @@ -274,8 +275,8 @@ DependencyScanningWorker::DependencyScanningWorker( PCHContainerOps->registerWriter( std::make_unique<ObjectFilePCHContainerWriter>()); - auto OverlayFS = llvm::makeIntrusiveRefCnt<llvm::vfs::OverlayFileSystem>( - llvm::vfs::createPhysicalFileSystem()); + auto OverlayFS = + llvm::makeIntrusiveRefCnt<llvm::vfs::OverlayFileSystem>(std::move(FS)); InMemoryFS = llvm::makeIntrusiveRefCnt<llvm::vfs::InMemoryFileSystem>(); OverlayFS->pushOverlay(InMemoryFS); RealFS = OverlayFS; diff --git a/clang/lib/Tooling/Syntax/BuildTree.cpp b/clang/lib/Tooling/Syntax/BuildTree.cpp index 484cf61664fe..041cc4f939d9 100644 --- a/clang/lib/Tooling/Syntax/BuildTree.cpp +++ b/clang/lib/Tooling/Syntax/BuildTree.cpp @@ -27,6 +27,7 @@ #include "clang/Lex/Lexer.h" #include "clang/Lex/LiteralSupport.h" #include "clang/Tooling/Syntax/Nodes.h" +#include "clang/Tooling/Syntax/TokenBufferTokenManager.h" #include "clang/Tooling/Syntax/Tokens.h" #include "clang/Tooling/Syntax/Tree.h" #include "llvm/ADT/ArrayRef.h" @@ -365,21 +366,24 @@ private: /// Call finalize() to finish building the tree and consume the root node. class syntax::TreeBuilder { public: - TreeBuilder(syntax::Arena &Arena) : Arena(Arena), Pending(Arena) { - for (const auto &T : Arena.getTokenBuffer().expandedTokens()) + TreeBuilder(syntax::Arena &Arena, TokenBufferTokenManager& TBTM) + : Arena(Arena), + TBTM(TBTM), + Pending(Arena, TBTM.tokenBuffer()) { + for (const auto &T : TBTM.tokenBuffer().expandedTokens()) LocationToToken.insert({T.location(), &T}); } llvm::BumpPtrAllocator &allocator() { return Arena.getAllocator(); } const SourceManager &sourceManager() const { - return Arena.getSourceManager(); + return TBTM.sourceManager(); } /// Populate children for \p New node, assuming it covers tokens from \p /// Range. void foldNode(ArrayRef<syntax::Token> Range, syntax::Tree *New, ASTPtr From) { assert(New); - Pending.foldChildren(Arena, Range, New); + Pending.foldChildren(TBTM.tokenBuffer(), Range, New); if (From) Mapping.add(From, New); } @@ -392,7 +396,7 @@ public: void foldNode(llvm::ArrayRef<syntax::Token> Range, syntax::Tree *New, NestedNameSpecifierLoc From) { assert(New); - Pending.foldChildren(Arena, Range, New); + Pending.foldChildren(TBTM.tokenBuffer(), Range, New); if (From) Mapping.add(From, New); } @@ -403,7 +407,7 @@ public: ASTPtr From) { assert(New); auto ListRange = Pending.shrinkToFitList(SuperRange); - Pending.foldChildren(Arena, ListRange, New); + Pending.foldChildren(TBTM.tokenBuffer(), ListRange, New); if (From) Mapping.add(From, New); } @@ -434,12 +438,12 @@ public: /// Finish building the tree and consume the root node. syntax::TranslationUnit *finalize() && { - auto Tokens = Arena.getTokenBuffer().expandedTokens(); + auto Tokens = TBTM.tokenBuffer().expandedTokens(); assert(!Tokens.empty()); assert(Tokens.back().kind() == tok::eof); // Build the root of the tree, consuming all the children. - Pending.foldChildren(Arena, Tokens.drop_back(), + Pending.foldChildren(TBTM.tokenBuffer(), Tokens.drop_back(), new (Arena.getAllocator()) syntax::TranslationUnit); auto *TU = cast<syntax::TranslationUnit>(std::move(Pending).finalize()); @@ -464,7 +468,7 @@ public: assert(First.isValid()); assert(Last.isValid()); assert(First == Last || - Arena.getSourceManager().isBeforeInTranslationUnit(First, Last)); + TBTM.sourceManager().isBeforeInTranslationUnit(First, Last)); return llvm::makeArrayRef(findToken(First), std::next(findToken(Last))); } @@ -564,15 +568,16 @@ private: /// /// Ensures that added nodes properly nest and cover the whole token stream. struct Forest { - Forest(syntax::Arena &A) { - assert(!A.getTokenBuffer().expandedTokens().empty()); - assert(A.getTokenBuffer().expandedTokens().back().kind() == tok::eof); + Forest(syntax::Arena &A, const syntax::TokenBuffer &TB) { + assert(!TB.expandedTokens().empty()); + assert(TB.expandedTokens().back().kind() == tok::eof); // Create all leaf nodes. // Note that we do not have 'eof' in the tree. - for (const auto &T : A.getTokenBuffer().expandedTokens().drop_back()) { - auto *L = new (A.getAllocator()) syntax::Leaf(&T); + for (const auto &T : TB.expandedTokens().drop_back()) { + auto *L = new (A.getAllocator()) + syntax::Leaf(reinterpret_cast<TokenManager::Key>(&T)); L->Original = true; - L->CanModify = A.getTokenBuffer().spelledForExpanded(T).has_value(); + L->CanModify = TB.spelledForExpanded(T).has_value(); Trees.insert(Trees.end(), {&T, L}); } } @@ -620,8 +625,8 @@ private: } /// Add \p Node to the forest and attach child nodes based on \p Tokens. - void foldChildren(const syntax::Arena &A, ArrayRef<syntax::Token> Tokens, - syntax::Tree *Node) { + void foldChildren(const syntax::TokenBuffer &TB, + ArrayRef<syntax::Token> Tokens, syntax::Tree *Node) { // Attach children to `Node`. assert(Node->getFirstChild() == nullptr && "node already has children"); @@ -646,7 +651,7 @@ private: // Mark that this node came from the AST and is backed by the source code. Node->Original = true; Node->CanModify = - A.getTokenBuffer().spelledForExpanded(Tokens).has_value(); + TB.spelledForExpanded(Tokens).has_value(); Trees.erase(BeginChildren, EndChildren); Trees.insert({FirstToken, Node}); @@ -660,18 +665,18 @@ private: return Root; } - std::string str(const syntax::Arena &A) const { + std::string str(const syntax::TokenBufferTokenManager &STM) const { std::string R; for (auto It = Trees.begin(); It != Trees.end(); ++It) { unsigned CoveredTokens = It != Trees.end() ? (std::next(It)->first - It->first) - : A.getTokenBuffer().expandedTokens().end() - It->first; + : STM.tokenBuffer().expandedTokens().end() - It->first; R += std::string( formatv("- '{0}' covers '{1}'+{2} tokens\n", It->second->getKind(), - It->first->text(A.getSourceManager()), CoveredTokens)); - R += It->second->dump(A.getSourceManager()); + It->first->text(STM.sourceManager()), CoveredTokens)); + R += It->second->dump(STM); } return R; } @@ -684,9 +689,10 @@ private: }; /// For debugging purposes. - std::string str() { return Pending.str(Arena); } + std::string str() { return Pending.str(TBTM); } syntax::Arena &Arena; + TokenBufferTokenManager& TBTM; /// To quickly find tokens by their start location. llvm::DenseMap<SourceLocation, const syntax::Token *> LocationToToken; Forest Pending; @@ -1718,7 +1724,7 @@ void syntax::TreeBuilder::markStmtChild(Stmt *Child, NodeRole Role) { markExprChild(ChildExpr, NodeRole::Expression); ChildNode = new (allocator()) syntax::ExpressionStatement; // (!) 'getStmtRange()' ensures this covers a trailing semicolon. - Pending.foldChildren(Arena, getStmtRange(Child), ChildNode); + Pending.foldChildren(TBTM.tokenBuffer(), getStmtRange(Child), ChildNode); } else { ChildNode = Mapping.find(Child); } @@ -1745,8 +1751,9 @@ const syntax::Token *syntax::TreeBuilder::findToken(SourceLocation L) const { } syntax::TranslationUnit *syntax::buildSyntaxTree(Arena &A, + TokenBufferTokenManager& TBTM, ASTContext &Context) { - TreeBuilder Builder(A); + TreeBuilder Builder(A, TBTM); BuildTreeVisitor(Context, Builder).TraverseAST(Context); return std::move(Builder).finalize(); } diff --git a/clang/lib/Tooling/Syntax/ComputeReplacements.cpp b/clang/lib/Tooling/Syntax/ComputeReplacements.cpp index 31e1a40c74b6..08e09e4ebdbf 100644 --- a/clang/lib/Tooling/Syntax/ComputeReplacements.cpp +++ b/clang/lib/Tooling/Syntax/ComputeReplacements.cpp @@ -7,7 +7,9 @@ //===----------------------------------------------------------------------===// #include "clang/Tooling/Core/Replacement.h" #include "clang/Tooling/Syntax/Mutations.h" +#include "clang/Tooling/Syntax/TokenBufferTokenManager.h" #include "clang/Tooling/Syntax/Tokens.h" +#include "clang/Tooling/Syntax/Tree.h" #include "llvm/Support/Error.h" using namespace clang; @@ -16,10 +18,13 @@ namespace { using ProcessTokensFn = llvm::function_ref<void(llvm::ArrayRef<syntax::Token>, bool /*IsOriginal*/)>; /// Enumerates spans of tokens from the tree consecutively laid out in memory. -void enumerateTokenSpans(const syntax::Tree *Root, ProcessTokensFn Callback) { +void enumerateTokenSpans(const syntax::Tree *Root, + const syntax::TokenBufferTokenManager &STM, + ProcessTokensFn Callback) { struct Enumerator { - Enumerator(ProcessTokensFn Callback) - : SpanBegin(nullptr), SpanEnd(nullptr), SpanIsOriginal(false), + Enumerator(const syntax::TokenBufferTokenManager &STM, + ProcessTokensFn Callback) + : STM(STM), SpanBegin(nullptr), SpanEnd(nullptr), SpanIsOriginal(false), Callback(Callback) {} void run(const syntax::Tree *Root) { @@ -39,7 +44,8 @@ void enumerateTokenSpans(const syntax::Tree *Root, ProcessTokensFn Callback) { } auto *L = cast<syntax::Leaf>(N); - if (SpanEnd == L->getToken() && SpanIsOriginal == L->isOriginal()) { + if (SpanEnd == STM.getToken(L->getTokenKey()) && + SpanIsOriginal == L->isOriginal()) { // Extend the current span. ++SpanEnd; return; @@ -48,24 +54,25 @@ void enumerateTokenSpans(const syntax::Tree *Root, ProcessTokensFn Callback) { if (SpanBegin) Callback(llvm::makeArrayRef(SpanBegin, SpanEnd), SpanIsOriginal); // Start recording a new span. - SpanBegin = L->getToken(); + SpanBegin = STM.getToken(L->getTokenKey()); SpanEnd = SpanBegin + 1; SpanIsOriginal = L->isOriginal(); } + const syntax::TokenBufferTokenManager &STM; const syntax::Token *SpanBegin; const syntax::Token *SpanEnd; bool SpanIsOriginal; ProcessTokensFn Callback; }; - return Enumerator(Callback).run(Root); + return Enumerator(STM, Callback).run(Root); } -syntax::FileRange rangeOfExpanded(const syntax::Arena &A, +syntax::FileRange rangeOfExpanded(const syntax::TokenBufferTokenManager &STM, llvm::ArrayRef<syntax::Token> Expanded) { - const auto &Buffer = A.getTokenBuffer(); - const auto &SM = A.getSourceManager(); + const auto &Buffer = STM.tokenBuffer(); + const auto &SM = STM.sourceManager(); // Check that \p Expanded actually points into expanded tokens. assert(Buffer.expandedTokens().begin() <= Expanded.begin()); @@ -83,10 +90,10 @@ syntax::FileRange rangeOfExpanded(const syntax::Arena &A, } // namespace tooling::Replacements -syntax::computeReplacements(const syntax::Arena &A, +syntax::computeReplacements(const TokenBufferTokenManager &TBTM, const syntax::TranslationUnit &TU) { - const auto &Buffer = A.getTokenBuffer(); - const auto &SM = A.getSourceManager(); + const auto &Buffer = TBTM.tokenBuffer(); + const auto &SM = TBTM.sourceManager(); tooling::Replacements Replacements; // Text inserted by the replacement we are building now. @@ -95,13 +102,13 @@ syntax::computeReplacements(const syntax::Arena &A, if (ReplacedRange.empty() && Replacement.empty()) return; llvm::cantFail(Replacements.add(tooling::Replacement( - SM, rangeOfExpanded(A, ReplacedRange).toCharRange(SM), Replacement))); + SM, rangeOfExpanded(TBTM, ReplacedRange).toCharRange(SM), + Replacement))); Replacement = ""; }; - const syntax::Token *NextOriginal = Buffer.expandedTokens().begin(); enumerateTokenSpans( - &TU, [&](llvm::ArrayRef<syntax::Token> Tokens, bool IsOriginal) { + &TU, TBTM, [&](llvm::ArrayRef<syntax::Token> Tokens, bool IsOriginal) { if (!IsOriginal) { Replacement += syntax::Token::range(SM, Tokens.front(), Tokens.back()).text(SM); diff --git a/clang/lib/Tooling/Syntax/Mutations.cpp b/clang/lib/Tooling/Syntax/Mutations.cpp index f8a652219b22..824f1942532d 100644 --- a/clang/lib/Tooling/Syntax/Mutations.cpp +++ b/clang/lib/Tooling/Syntax/Mutations.cpp @@ -77,7 +77,8 @@ public: } }; -void syntax::removeStatement(syntax::Arena &A, syntax::Statement *S) { +void syntax::removeStatement(syntax::Arena &A, TokenBufferTokenManager &TBTM, + syntax::Statement *S) { assert(S); assert(S->canModify()); @@ -90,5 +91,5 @@ void syntax::removeStatement(syntax::Arena &A, syntax::Statement *S) { if (isa<EmptyStatement>(S)) return; // already an empty statement, nothing to do. - MutationsImpl::replace(S, createEmptyStatement(A)); + MutationsImpl::replace(S, createEmptyStatement(A, TBTM)); } diff --git a/clang/lib/Tooling/Syntax/Nodes.cpp b/clang/lib/Tooling/Syntax/Nodes.cpp index fc6f8ef1a82c..d0c1e9297cfa 100644 --- a/clang/lib/Tooling/Syntax/Nodes.cpp +++ b/clang/lib/Tooling/Syntax/Nodes.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// #include "clang/Tooling/Syntax/Nodes.h" -#include "clang/Basic/TokenKinds.h" +#include "llvm/Support/raw_ostream.h" using namespace clang; diff --git a/clang/lib/Tooling/Syntax/Synthesis.cpp b/clang/lib/Tooling/Syntax/Synthesis.cpp index ef6492882be6..39c19951ae76 100644 --- a/clang/lib/Tooling/Syntax/Synthesis.cpp +++ b/clang/lib/Tooling/Syntax/Synthesis.cpp @@ -8,6 +8,8 @@ #include "clang/Basic/TokenKinds.h" #include "clang/Tooling/Syntax/BuildTree.h" #include "clang/Tooling/Syntax/Tree.h" +#include "clang/Tooling/Syntax/Tokens.h" +#include "clang/Tooling/Syntax/TokenBufferTokenManager.h" using namespace clang; @@ -27,35 +29,40 @@ public: } static std::pair<FileID, ArrayRef<Token>> - lexBuffer(syntax::Arena &A, std::unique_ptr<llvm::MemoryBuffer> Buffer) { - return A.lexBuffer(std::move(Buffer)); + lexBuffer(TokenBufferTokenManager &TBTM, + std::unique_ptr<llvm::MemoryBuffer> Buffer) { + return TBTM.lexBuffer(std::move(Buffer)); } }; // FIXME: `createLeaf` is based on `syntax::tokenize` internally, as such it // doesn't support digraphs or line continuations. -syntax::Leaf *clang::syntax::createLeaf(syntax::Arena &A, tok::TokenKind K, - StringRef Spelling) { +syntax::Leaf *clang::syntax::createLeaf(syntax::Arena &A, + TokenBufferTokenManager &TBTM, + tok::TokenKind K, StringRef Spelling) { auto Tokens = - FactoryImpl::lexBuffer(A, llvm::MemoryBuffer::getMemBufferCopy(Spelling)) + FactoryImpl::lexBuffer(TBTM, llvm::MemoryBuffer::getMemBufferCopy(Spelling)) .second; assert(Tokens.size() == 1); assert(Tokens.front().kind() == K && "spelling is not lexed into the expected kind of token"); - auto *Leaf = new (A.getAllocator()) syntax::Leaf(Tokens.begin()); + auto *Leaf = new (A.getAllocator()) syntax::Leaf( + reinterpret_cast<TokenManager::Key>(Tokens.begin())); syntax::FactoryImpl::setCanModify(Leaf); Leaf->assertInvariants(); return Leaf; } -syntax::Leaf *clang::syntax::createLeaf(syntax::Arena &A, tok::TokenKind K) { +syntax::Leaf *clang::syntax::createLeaf(syntax::Arena &A, + TokenBufferTokenManager &TBTM, + tok::TokenKind K) { const auto *Spelling = tok::getPunctuatorSpelling(K); if (!Spelling) Spelling = tok::getKeywordSpelling(K); assert(Spelling && "Cannot infer the spelling of the token from its token kind."); - return createLeaf(A, K, Spelling); + return createLeaf(A, TBTM, K, Spelling); } namespace { @@ -208,24 +215,25 @@ syntax::Tree *clang::syntax::createTree( } syntax::Node *clang::syntax::deepCopyExpandingMacros(syntax::Arena &A, + TokenBufferTokenManager &TBTM, const syntax::Node *N) { if (const auto *L = dyn_cast<syntax::Leaf>(N)) // `L->getToken()` gives us the expanded token, thus we implicitly expand // any macros here. - return createLeaf(A, L->getToken()->kind(), - L->getToken()->text(A.getSourceManager())); + return createLeaf(A, TBTM, TBTM.getToken(L->getTokenKey())->kind(), + TBTM.getText(L->getTokenKey())); const auto *T = cast<syntax::Tree>(N); std::vector<std::pair<syntax::Node *, syntax::NodeRole>> Children; for (const auto *Child = T->getFirstChild(); Child; Child = Child->getNextSibling()) - Children.push_back({deepCopyExpandingMacros(A, Child), Child->getRole()}); + Children.push_back({deepCopyExpandingMacros(A, TBTM, Child), Child->getRole()}); return createTree(A, Children, N->getKind()); } -syntax::EmptyStatement *clang::syntax::createEmptyStatement(syntax::Arena &A) { +syntax::EmptyStatement *clang::syntax::createEmptyStatement(syntax::Arena &A, TokenBufferTokenManager &TBTM) { return cast<EmptyStatement>( - createTree(A, {{createLeaf(A, tok::semi), NodeRole::Unknown}}, + createTree(A, {{createLeaf(A, TBTM, tok::semi), NodeRole::Unknown}}, NodeKind::EmptyStatement)); } diff --git a/clang/lib/Tooling/Syntax/TokenBufferTokenManager.cpp b/clang/lib/Tooling/Syntax/TokenBufferTokenManager.cpp new file mode 100644 index 000000000000..a06f7e2900d4 --- /dev/null +++ b/clang/lib/Tooling/Syntax/TokenBufferTokenManager.cpp @@ -0,0 +1,25 @@ +//===- TokenBufferTokenManager.cpp ----------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "clang/Tooling/Syntax/TokenBufferTokenManager.h" + +namespace clang { +namespace syntax { +constexpr llvm::StringLiteral syntax::TokenBufferTokenManager::Kind; + +std::pair<FileID, ArrayRef<syntax::Token>> +syntax::TokenBufferTokenManager::lexBuffer( + std::unique_ptr<llvm::MemoryBuffer> Input) { + auto FID = SM.createFileID(std::move(Input)); + auto It = ExtraTokens.try_emplace(FID, tokenize(FID, SM, LangOpts)); + assert(It.second && "duplicate FileID"); + return {FID, It.first->second}; +} + +} // namespace syntax +} // namespace clang diff --git a/clang/lib/Tooling/Syntax/Tree.cpp b/clang/lib/Tooling/Syntax/Tree.cpp index 981bac508f73..20f7bd087aa0 100644 --- a/clang/lib/Tooling/Syntax/Tree.cpp +++ b/clang/lib/Tooling/Syntax/Tree.cpp @@ -8,9 +8,8 @@ #include "clang/Tooling/Syntax/Tree.h" #include "clang/Basic/TokenKinds.h" #include "clang/Tooling/Syntax/Nodes.h" -#include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/BitVector.h" -#include "llvm/ADT/STLExtras.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/Support/Casting.h" #include <cassert> @@ -33,25 +32,7 @@ static void traverse(syntax::Node *N, } } // namespace -syntax::Arena::Arena(SourceManager &SourceMgr, const LangOptions &LangOpts, - const TokenBuffer &Tokens) - : SourceMgr(SourceMgr), LangOpts(LangOpts), Tokens(Tokens) {} - -const syntax::TokenBuffer &syntax::Arena::getTokenBuffer() const { - return Tokens; -} - -std::pair<FileID, ArrayRef<syntax::Token>> -syntax::Arena::lexBuffer(std::unique_ptr<llvm::MemoryBuffer> Input) { - auto FID = SourceMgr.createFileID(std::move(Input)); - auto It = ExtraTokens.try_emplace(FID, tokenize(FID, SourceMgr, LangOpts)); - assert(It.second && "duplicate FileID"); - return {FID, It.first->second}; -} - -syntax::Leaf::Leaf(const syntax::Token *Tok) : Node(NodeKind::Leaf), Tok(Tok) { - assert(Tok != nullptr); -} +syntax::Leaf::Leaf(syntax::TokenManager::Key K) : Node(NodeKind::Leaf), K(K) {} syntax::Node::Node(NodeKind Kind) : Parent(nullptr), NextSibling(nullptr), PreviousSibling(nullptr), @@ -190,20 +171,8 @@ void syntax::Tree::replaceChildRangeLowLevel(Node *Begin, Node *End, } namespace { -static void dumpLeaf(raw_ostream &OS, const syntax::Leaf *L, - const SourceManager &SM) { - assert(L); - const auto *Token = L->getToken(); - assert(Token); - // Handle 'eof' separately, calling text() on it produces an empty string. - if (Token->kind() == tok::eof) - OS << "<eof>"; - else - OS << Token->text(SM); -} - static void dumpNode(raw_ostream &OS, const syntax::Node *N, - const SourceManager &SM, llvm::BitVector IndentMask) { + const syntax::TokenManager &TM, llvm::BitVector IndentMask) { auto DumpExtraInfo = [&OS](const syntax::Node *N) { if (N->getRole() != syntax::NodeRole::Unknown) OS << " " << N->getRole(); @@ -216,7 +185,7 @@ static void dumpNode(raw_ostream &OS, const syntax::Node *N, assert(N); if (const auto *L = dyn_cast<syntax::Leaf>(N)) { OS << "'"; - dumpLeaf(OS, L, SM); + OS << TM.getText(L->getTokenKey()); OS << "'"; DumpExtraInfo(N); OS << "\n"; @@ -242,25 +211,25 @@ static void dumpNode(raw_ostream &OS, const syntax::Node *N, OS << "|-"; IndentMask.push_back(true); } - dumpNode(OS, &It, SM, IndentMask); + dumpNode(OS, &It, TM, IndentMask); IndentMask.pop_back(); } } } // namespace -std::string syntax::Node::dump(const SourceManager &SM) const { +std::string syntax::Node::dump(const TokenManager &TM) const { std::string Str; llvm::raw_string_ostream OS(Str); - dumpNode(OS, this, SM, /*IndentMask=*/{}); + dumpNode(OS, this, TM, /*IndentMask=*/{}); return std::move(OS.str()); } -std::string syntax::Node::dumpTokens(const SourceManager &SM) const { +std::string syntax::Node::dumpTokens(const TokenManager &TM) const { std::string Storage; llvm::raw_string_ostream OS(Storage); traverse(this, [&](const syntax::Node *N) { if (const auto *L = dyn_cast<syntax::Leaf>(N)) { - dumpLeaf(OS, L, SM); + OS << TM.getText(L->getTokenKey()); OS << " "; } }); @@ -297,7 +266,8 @@ void syntax::Node::assertInvariants() const { C.getRole() == NodeRole::ListDelimiter); if (C.getRole() == NodeRole::ListDelimiter) { assert(isa<Leaf>(C)); - assert(cast<Leaf>(C).getToken()->kind() == L->getDelimiterTokenKind()); + // FIXME: re-enable it when there is way to retrieve token kind in Leaf. + // assert(cast<Leaf>(C).getToken()->kind() == L->getDelimiterTokenKind()); } } diff --git a/clang/tools/driver/cc1_main.cpp b/clang/tools/driver/cc1_main.cpp index 5adc07154f88..de33aa9ea934 100644 --- a/clang/tools/driver/cc1_main.cpp +++ b/clang/tools/driver/cc1_main.cpp @@ -212,7 +212,9 @@ int cc1_main(ArrayRef<const char *> Argv, const char *Argv0, void *MainAddr) { bool Success = CompilerInvocation::CreateFromArgs(Clang->getInvocation(), Argv, Diags, Argv0); - if (Clang->getFrontendOpts().TimeTrace) { + if (Clang->getFrontendOpts().TimeTrace || + !Clang->getFrontendOpts().TimeTracePath.empty()) { + Clang->getFrontendOpts().TimeTrace = 1; llvm::timeTraceProfilerInitialize( Clang->getFrontendOpts().TimeTraceGranularity, Argv0); } @@ -256,6 +258,13 @@ int cc1_main(ArrayRef<const char *> Argv, const char *Argv0, void *MainAddr) { if (llvm::timeTraceProfilerEnabled()) { SmallString<128> Path(Clang->getFrontendOpts().OutputFile); llvm::sys::path::replace_extension(Path, "json"); + if (!Clang->getFrontendOpts().TimeTracePath.empty()) { + // replace the suffix to '.json' directly + SmallString<128> TracePath(Clang->getFrontendOpts().TimeTracePath); + if (llvm::sys::fs::is_directory(TracePath)) + llvm::sys::path::append(TracePath, llvm::sys::path::filename(Path)); + Path.assign(TracePath); + } if (auto profilerOutput = Clang->createOutputFile( Path.str(), /*Binary=*/false, /*RemoveFileOnSignal=*/false, /*useTemporary=*/false)) { diff --git a/clang/utils/TableGen/ClangOptionDocEmitter.cpp b/clang/utils/TableGen/ClangOptionDocEmitter.cpp index 6c24ad2bdcc5..75f5d057c33a 100644 --- a/clang/utils/TableGen/ClangOptionDocEmitter.cpp +++ b/clang/utils/TableGen/ClangOptionDocEmitter.cpp @@ -168,6 +168,29 @@ bool hasFlag(const Record *OptionOrGroup, StringRef OptionFlag) { return false; } +bool isIncluded(const Record *OptionOrGroup, const Record *DocInfo) { + assert(DocInfo->getValue("IncludedFlags") && "Missing includeFlags"); + for (StringRef Inclusion : DocInfo->getValueAsListOfStrings("IncludedFlags")) + if (hasFlag(OptionOrGroup, Inclusion)) + return true; + return false; +} + +bool isGroupIncluded(const DocumentedGroup &Group, const Record *DocInfo) { + if (isIncluded(Group.Group, DocInfo)) + return true; + for (auto &O : Group.Options) + if (isIncluded(O.Option, DocInfo)) + return true; + for (auto &G : Group.Groups) { + if (isIncluded(G.Group, DocInfo)) + return true; + if (isGroupIncluded(G, DocInfo)) + return true; + } + return false; +} + bool isExcluded(const Record *OptionOrGroup, const Record *DocInfo) { // FIXME: Provide a flag to specify the set of exclusions. for (StringRef Exclusion : DocInfo->getValueAsListOfStrings("ExcludedFlags")) @@ -304,6 +327,8 @@ void emitOption(const DocumentedOption &Option, const Record *DocInfo, raw_ostream &OS) { if (isExcluded(Option.Option, DocInfo)) return; + if (DocInfo->getValue("IncludedFlags") && !isIncluded(Option.Option, DocInfo)) + return; if (Option.Option->getValueAsDef("Kind")->getName() == "KIND_UNKNOWN" || Option.Option->getValueAsDef("Kind")->getName() == "KIND_INPUT") return; @@ -379,6 +404,9 @@ void emitGroup(int Depth, const DocumentedGroup &Group, const Record *DocInfo, if (isExcluded(Group.Group, DocInfo)) return; + if (DocInfo->getValue("IncludedFlags") && !isGroupIncluded(Group, DocInfo)) + return; + emitHeading(Depth, getRSTStringWithTextFallback(Group.Group, "DocName", "Name"), OS); diff --git a/compiler-rt/lib/asan/asan_interceptors.cpp b/compiler-rt/lib/asan/asan_interceptors.cpp index 13311b7e409b..817008253fc0 100644 --- a/compiler-rt/lib/asan/asan_interceptors.cpp +++ b/compiler-rt/lib/asan/asan_interceptors.cpp @@ -243,13 +243,26 @@ DEFINE_REAL_PTHREAD_FUNCTIONS #if ASAN_INTERCEPT_SWAPCONTEXT static void ClearShadowMemoryForContextStack(uptr stack, uptr ssize) { + // Only clear if we know the stack. This should be true only for contexts + // created with makecontext(). + if (!ssize) + return; // Align to page size. uptr PageSize = GetPageSizeCached(); - uptr bottom = stack & ~(PageSize - 1); + uptr bottom = RoundDownTo(stack, PageSize); + if (!AddrIsInMem(bottom)) + return; ssize += stack - bottom; ssize = RoundUpTo(ssize, PageSize); - if (AddrIsInMem(bottom) && ssize) - PoisonShadow(bottom, ssize, 0); + PoisonShadow(bottom, ssize, 0); +} + +INTERCEPTOR(int, getcontext, struct ucontext_t *ucp) { + // API does not requires to have ucp clean, and sets only part of fields. We + // use ucp->uc_stack to unpoison new stack. We prefer to have zeroes then + // uninitialized bytes. + ResetContextStack(ucp); + return REAL(getcontext)(ucp); } INTERCEPTOR(int, swapcontext, struct ucontext_t *oucp, @@ -265,15 +278,18 @@ INTERCEPTOR(int, swapcontext, struct ucontext_t *oucp, uptr stack, ssize; ReadContextStack(ucp, &stack, &ssize); ClearShadowMemoryForContextStack(stack, ssize); -#if __has_attribute(__indirect_return__) && \ - (defined(__x86_64__) || defined(__i386__)) + + // See getcontext interceptor. + ResetContextStack(oucp); + +# if __has_attribute(__indirect_return__) && \ + (defined(__x86_64__) || defined(__i386__)) int (*real_swapcontext)(struct ucontext_t *, struct ucontext_t *) - __attribute__((__indirect_return__)) - = REAL(swapcontext); + __attribute__((__indirect_return__)) = REAL(swapcontext); int res = real_swapcontext(oucp, ucp); -#else +# else int res = REAL(swapcontext)(oucp, ucp); -#endif +# endif // swapcontext technically does not return, but program may swap context to // "oucp" later, that would look as if swapcontext() returned 0. // We need to clear shadow for ucp once again, as it may be in arbitrary @@ -643,6 +659,7 @@ void InitializeAsanInterceptors() { ASAN_INTERCEPT_FUNC(longjmp); #if ASAN_INTERCEPT_SWAPCONTEXT + ASAN_INTERCEPT_FUNC(getcontext); ASAN_INTERCEPT_FUNC(swapcontext); #endif #if ASAN_INTERCEPT__LONGJMP diff --git a/compiler-rt/lib/asan/asan_interceptors_vfork.S b/compiler-rt/lib/asan/asan_interceptors_vfork.S index 3ae5503e83cd..ec29adc7b132 100644 --- a/compiler-rt/lib/asan/asan_interceptors_vfork.S +++ b/compiler-rt/lib/asan/asan_interceptors_vfork.S @@ -6,6 +6,7 @@ #include "sanitizer_common/sanitizer_common_interceptors_vfork_aarch64.inc.S" #include "sanitizer_common/sanitizer_common_interceptors_vfork_arm.inc.S" #include "sanitizer_common/sanitizer_common_interceptors_vfork_i386.inc.S" +#include "sanitizer_common/sanitizer_common_interceptors_vfork_loongarch64.inc.S" #include "sanitizer_common/sanitizer_common_interceptors_vfork_riscv64.inc.S" #include "sanitizer_common/sanitizer_common_interceptors_vfork_x86_64.inc.S" #endif diff --git a/compiler-rt/lib/asan/asan_internal.h b/compiler-rt/lib/asan/asan_internal.h index 7468f126d37b..9c46f225116e 100644 --- a/compiler-rt/lib/asan/asan_internal.h +++ b/compiler-rt/lib/asan/asan_internal.h @@ -106,6 +106,7 @@ void AsanApplyToGlobals(globals_op_fptr op, const void *needle); void AsanOnDeadlySignal(int, void *siginfo, void *context); void ReadContextStack(void *context, uptr *stack, uptr *ssize); +void ResetContextStack(void *context); void StopInitOrderChecking(); // Wrapper for TLS/TSD. diff --git a/compiler-rt/lib/asan/asan_linux.cpp b/compiler-rt/lib/asan/asan_linux.cpp index defd81bc19e2..89450fc120a0 100644 --- a/compiler-rt/lib/asan/asan_linux.cpp +++ b/compiler-rt/lib/asan/asan_linux.cpp @@ -214,11 +214,19 @@ void ReadContextStack(void *context, uptr *stack, uptr *ssize) { *stack = (uptr)ucp->uc_stack.ss_sp; *ssize = ucp->uc_stack.ss_size; } -#else + +void ResetContextStack(void *context) { + ucontext_t *ucp = (ucontext_t *)context; + ucp->uc_stack.ss_sp = nullptr; + ucp->uc_stack.ss_size = 0; +} +# else void ReadContextStack(void *context, uptr *stack, uptr *ssize) { UNIMPLEMENTED(); } -#endif + +void ResetContextStack(void *context) { UNIMPLEMENTED(); } +# endif void *AsanDlSymNext(const char *sym) { return dlsym(RTLD_NEXT, sym); diff --git a/compiler-rt/lib/asan/asan_mac.cpp b/compiler-rt/lib/asan/asan_mac.cpp index 4f4ce92cc6a1..a2d5c31a3f77 100644 --- a/compiler-rt/lib/asan/asan_mac.cpp +++ b/compiler-rt/lib/asan/asan_mac.cpp @@ -99,6 +99,8 @@ void ReadContextStack(void *context, uptr *stack, uptr *ssize) { UNIMPLEMENTED(); } +void ResetContextStack(void *context) { UNIMPLEMENTED(); } + // Support for the following functions from libdispatch on Mac OS: // dispatch_async_f() // dispatch_async() diff --git a/compiler-rt/lib/asan/asan_mapping.h b/compiler-rt/lib/asan/asan_mapping.h index cc5f5836e742..aeadb9d94ebd 100644 --- a/compiler-rt/lib/asan/asan_mapping.h +++ b/compiler-rt/lib/asan/asan_mapping.h @@ -114,6 +114,13 @@ // || `[0x0080000000000, 0x008ffffffffff]` || LowShadow || // || `[0x0000000000000, 0x007ffffffffff]` || LowMem || // +// Default Linux/LoongArch64 (47-bit VMA) mapping: +// || `[0x500000000000, 0x7fffffffffff]` || HighMem || +// || `[0x4a0000000000, 0x4fffffffffff]` || HighShadow || +// || `[0x480000000000, 0x49ffffffffff]` || ShadowGap || +// || `[0x400000000000, 0x47ffffffffff]` || LowShadow || +// || `[0x000000000000, 0x3fffffffffff]` || LowMem || +// // Shadow mapping on FreeBSD/x86-64 with SHADOW_OFFSET == 0x400000000000: // || `[0x500000000000, 0x7fffffffffff]` || HighMem || // || `[0x4a0000000000, 0x4fffffffffff]` || HighShadow || @@ -196,6 +203,8 @@ # define ASAN_SHADOW_OFFSET_CONST 0x0000002000000000 # elif defined(__sparc__) # define ASAN_SHADOW_OFFSET_CONST 0x0000080000000000 +# elif SANITIZER_LOONGARCH64 +# define ASAN_SHADOW_OFFSET_CONST 0x0000400000000000 # elif SANITIZER_WINDOWS64 # define ASAN_SHADOW_OFFSET_DYNAMIC # else diff --git a/compiler-rt/lib/asan/asan_win.cpp b/compiler-rt/lib/asan/asan_win.cpp index 81958038fb1c..f11df0613d1f 100644 --- a/compiler-rt/lib/asan/asan_win.cpp +++ b/compiler-rt/lib/asan/asan_win.cpp @@ -267,6 +267,8 @@ void ReadContextStack(void *context, uptr *stack, uptr *ssize) { UNIMPLEMENTED(); } +void ResetContextStack(void *context) { UNIMPLEMENTED(); } + void AsanOnDeadlySignal(int, void *siginfo, void *context) { UNIMPLEMENTED(); } bool PlatformUnpoisonStacks() { return false; } diff --git a/compiler-rt/lib/hwasan/hwasan_linux.cpp b/compiler-rt/lib/hwasan/hwasan_linux.cpp index dcab473d8ad1..2d7a44525c5f 100644 --- a/compiler-rt/lib/hwasan/hwasan_linux.cpp +++ b/compiler-rt/lib/hwasan/hwasan_linux.cpp @@ -114,19 +114,20 @@ void InitializeOsSupport() { # define PR_SET_TAGGED_ADDR_CTRL 55 # define PR_GET_TAGGED_ADDR_CTRL 56 # define PR_TAGGED_ADDR_ENABLE (1UL << 0) -# define ARCH_GET_UNTAG_MASK 0x4001 +# define ARCH_GET_UNTAG_MASK 0x4001 # define ARCH_ENABLE_TAGGED_ADDR 0x4002 // Check we're running on a kernel that can use the tagged address ABI. int local_errno = 0; bool has_abi; # if defined(__x86_64__) has_abi = (internal_iserror(internal_arch_prctl(ARCH_GET_UNTAG_MASK, 0), - &local_errno) && - local_errno == EINVAL); + &local_errno) && + local_errno == EINVAL); # else - has_abi = (internal_iserror(internal_prctl(PR_GET_TAGGED_ADDR_CTRL, 0, 0, 0, 0), - &local_errno) && - local_errno == EINVAL); + has_abi = + (internal_iserror(internal_prctl(PR_GET_TAGGED_ADDR_CTRL, 0, 0, 0, 0), + &local_errno) && + local_errno == EINVAL); # endif if (has_abi) { # if SANITIZER_ANDROID || defined(HWASAN_ALIASING_MODE) diff --git a/compiler-rt/lib/lsan/lsan_common.cpp b/compiler-rt/lib/lsan/lsan_common.cpp index d6510d34fd68..94bb3cca0083 100644 --- a/compiler-rt/lib/lsan/lsan_common.cpp +++ b/compiler-rt/lib/lsan/lsan_common.cpp @@ -949,7 +949,7 @@ void __lsan_ignore_object(const void *p) { Lock l(&global_mutex); IgnoreObjectResult res = IgnoreObjectLocked(p); if (res == kIgnoreObjectInvalid) - VReport(1, "__lsan_ignore_object(): no heap object found at %p", p); + VReport(1, "__lsan_ignore_object(): no heap object found at %p\n", p); if (res == kIgnoreObjectAlreadyIgnored) VReport(1, "__lsan_ignore_object(): " diff --git a/compiler-rt/lib/memprof/memprof_internal.h b/compiler-rt/lib/memprof/memprof_internal.h index 1adb368e3e41..bba465e60d82 100644 --- a/compiler-rt/lib/memprof/memprof_internal.h +++ b/compiler-rt/lib/memprof/memprof_internal.h @@ -66,8 +66,6 @@ void *MemprofDoesNotSupportStaticLinkage(); // memprof_thread.cpp MemprofThread *CreateMainThread(); -void ReadContextStack(void *context, uptr *stack, uptr *ssize); - // Wrapper for TLS/TSD. void TSDInit(void (*destructor)(void *tsd)); void *TSDGet(); diff --git a/compiler-rt/lib/memprof/memprof_linux.cpp b/compiler-rt/lib/memprof/memprof_linux.cpp index 61c833bfdf64..fcd927023f5c 100644 --- a/compiler-rt/lib/memprof/memprof_linux.cpp +++ b/compiler-rt/lib/memprof/memprof_linux.cpp @@ -69,12 +69,6 @@ uptr FindDynamicShadowStart() { /*min_shadow_base_alignment*/ 0, kHighMemEnd); } -void ReadContextStack(void *context, uptr *stack, uptr *ssize) { - ucontext_t *ucp = (ucontext_t *)context; - *stack = (uptr)ucp->uc_stack.ss_sp; - *ssize = ucp->uc_stack.ss_size; -} - void *MemprofDlSymNext(const char *sym) { return dlsym(RTLD_NEXT, sym); } } // namespace __memprof diff --git a/compiler-rt/lib/msan/msan.h b/compiler-rt/lib/msan/msan.h index 30efce94af54..1a4cc8c0c975 100644 --- a/compiler-rt/lib/msan/msan.h +++ b/compiler-rt/lib/msan/msan.h @@ -195,6 +195,27 @@ const MappingDesc kMemoryLayout[] = { ((((uptr)(mem)) & ~0xC00000000000ULL) + 0x080000000000ULL) #define SHADOW_TO_ORIGIN(shadow) (((uptr)(shadow)) + 0x140000000000ULL) +#elif SANITIZER_FREEBSD && defined(__aarch64__) + +// Low memory: main binary, MAP_32BIT mappings and modules +// High memory: heap, modules and main thread stack +const MappingDesc kMemoryLayout[] = { + {0x000000000000ULL, 0x020000000000ULL, MappingDesc::APP, "low memory"}, + {0x020000000000ULL, 0x200000000000ULL, MappingDesc::INVALID, "invalid"}, + {0x200000000000ULL, 0x620000000000ULL, MappingDesc::SHADOW, "shadow"}, + {0x620000000000ULL, 0x700000000000ULL, MappingDesc::INVALID, "invalid"}, + {0x700000000000ULL, 0xb20000000000ULL, MappingDesc::ORIGIN, "origin"}, + {0xb20000000000ULL, 0xc00000000000ULL, MappingDesc::INVALID, "invalid"}, + {0xc00000000000ULL, 0x1000000000000ULL, MappingDesc::APP, "high memory"}}; + +// Maps low and high app ranges to contiguous space with zero base: +// Low: 0000 0000 0000 - 01ff ffff ffff -> 4000 0000 0000 - 41ff ffff ffff +// High: c000 0000 0000 - ffff ffff ffff -> 0000 0000 0000 - 3fff ffff ffff +#define LINEARIZE_MEM(mem) \ + (((uptr)(mem) & ~0x1800000000000ULL) ^ 0x400000000000ULL) +#define MEM_TO_SHADOW(mem) (LINEARIZE_MEM((mem)) + 0x200000000000ULL) +#define SHADOW_TO_ORIGIN(shadow) (((uptr)(shadow)) + 0x500000000000) + #elif SANITIZER_FREEBSD && SANITIZER_WORDSIZE == 64 // Low memory: main binary, MAP_32BIT mappings and modules diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors_vfork_loongarch64.inc.S b/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors_vfork_loongarch64.inc.S new file mode 100644 index 000000000000..05192485d597 --- /dev/null +++ b/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors_vfork_loongarch64.inc.S @@ -0,0 +1,63 @@ +#if defined(__loongarch_lp64) && defined(__linux__) + +#include "sanitizer_common/sanitizer_asm.h" + +ASM_HIDDEN(COMMON_INTERCEPTOR_SPILL_AREA) +ASM_HIDDEN(_ZN14__interception10real_vforkE) + +.bss +.type _ZN14__interception10real_vforkE, @object +.size _ZN14__interception10real_vforkE, 8 +_ZN14__interception10real_vforkE: + .zero 8 + +.text +.globl ASM_WRAPPER_NAME(vfork) +ASM_TYPE_FUNCTION(ASM_WRAPPER_NAME(vfork)) +ASM_WRAPPER_NAME(vfork): + // Save ra in the off-stack spill area. + // allocate space on stack + addi.d $sp, $sp, -16 + // store $ra value + st.d $ra, $sp, 8 + bl COMMON_INTERCEPTOR_SPILL_AREA + // restore previous values from stack + ld.d $ra, $sp, 8 + // adjust stack + addi.d $sp, $sp, 16 + // store $ra by $a0 + st.d $ra, $a0, 0 + + // Call real vfork. This may return twice. User code that runs between the first and the second return + // may clobber the stack frame of the interceptor; that's why it does not have a frame. + la.local $a0, _ZN14__interception10real_vforkE + ld.d $a0, $a0, 0 + jirl $ra, $a0, 0 + + // adjust stack + addi.d $sp, $sp, -16 + // store $a0 by adjusted stack + st.d $a0, $sp, 8 + // jump to exit label if $a0 is 0 + beqz $a0, .L_exit + + // $a0 != 0 => parent process. Clear stack shadow. + // put old $sp to $a0 + addi.d $a0, $sp, 16 + bl %plt(COMMON_INTERCEPTOR_HANDLE_VFORK) + +.L_exit: + // Restore $ra + bl COMMON_INTERCEPTOR_SPILL_AREA + ld.d $ra, $a0, 0 + // load value by stack + ld.d $a0, $sp, 8 + // adjust stack + addi.d $sp, $sp, 16 + jr $ra +ASM_SIZE(vfork) + +.weak vfork +.set vfork, ASM_WRAPPER_NAME(vfork) + +#endif diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_coverage_interface.inc b/compiler-rt/lib/sanitizer_common/sanitizer_coverage_interface.inc index d7ab0c3d98c1..9d36a40270d5 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_coverage_interface.inc +++ b/compiler-rt/lib/sanitizer_common/sanitizer_coverage_interface.inc @@ -27,6 +27,16 @@ INTERFACE_WEAK_FUNCTION(__sanitizer_cov_trace_gep) INTERFACE_WEAK_FUNCTION(__sanitizer_cov_trace_pc_guard) INTERFACE_WEAK_FUNCTION(__sanitizer_cov_trace_pc_guard_init) INTERFACE_WEAK_FUNCTION(__sanitizer_cov_trace_pc_indir) +INTERFACE_WEAK_FUNCTION(__sanitizer_cov_load1) +INTERFACE_WEAK_FUNCTION(__sanitizer_cov_load2) +INTERFACE_WEAK_FUNCTION(__sanitizer_cov_load4) +INTERFACE_WEAK_FUNCTION(__sanitizer_cov_load8) +INTERFACE_WEAK_FUNCTION(__sanitizer_cov_load16) +INTERFACE_WEAK_FUNCTION(__sanitizer_cov_store1) +INTERFACE_WEAK_FUNCTION(__sanitizer_cov_store2) +INTERFACE_WEAK_FUNCTION(__sanitizer_cov_store4) +INTERFACE_WEAK_FUNCTION(__sanitizer_cov_store8) +INTERFACE_WEAK_FUNCTION(__sanitizer_cov_store16) INTERFACE_WEAK_FUNCTION(__sanitizer_cov_trace_switch) INTERFACE_WEAK_FUNCTION(__sanitizer_cov_8bit_counters_init) INTERFACE_WEAK_FUNCTION(__sanitizer_cov_bool_flag_init) diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_coverage_libcdep_new.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_coverage_libcdep_new.cpp index 3dcb39f32f6c..956b48e0b434 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_coverage_libcdep_new.cpp +++ b/compiler-rt/lib/sanitizer_common/sanitizer_coverage_libcdep_new.cpp @@ -259,6 +259,16 @@ SANITIZER_INTERFACE_WEAK_DEF(void, __sanitizer_cov_trace_div4, void) {} SANITIZER_INTERFACE_WEAK_DEF(void, __sanitizer_cov_trace_div8, void) {} SANITIZER_INTERFACE_WEAK_DEF(void, __sanitizer_cov_trace_gep, void) {} SANITIZER_INTERFACE_WEAK_DEF(void, __sanitizer_cov_trace_pc_indir, void) {} +SANITIZER_INTERFACE_WEAK_DEF(void, __sanitizer_cov_load1, void){} +SANITIZER_INTERFACE_WEAK_DEF(void, __sanitizer_cov_load2, void){} +SANITIZER_INTERFACE_WEAK_DEF(void, __sanitizer_cov_load4, void){} +SANITIZER_INTERFACE_WEAK_DEF(void, __sanitizer_cov_load8, void){} +SANITIZER_INTERFACE_WEAK_DEF(void, __sanitizer_cov_load16, void){} +SANITIZER_INTERFACE_WEAK_DEF(void, __sanitizer_cov_store1, void){} +SANITIZER_INTERFACE_WEAK_DEF(void, __sanitizer_cov_store2, void){} +SANITIZER_INTERFACE_WEAK_DEF(void, __sanitizer_cov_store4, void){} +SANITIZER_INTERFACE_WEAK_DEF(void, __sanitizer_cov_store8, void){} +SANITIZER_INTERFACE_WEAK_DEF(void, __sanitizer_cov_store16, void){} SANITIZER_INTERFACE_WEAK_DEF(void, __sanitizer_cov_8bit_counters_init, char* start, char* end) { __sancov::SingletonCounterCoverage::Cov8bitCountersInit(start, end); diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_interface_internal.h b/compiler-rt/lib/sanitizer_common/sanitizer_interface_internal.h index e9dc78c6354e..ad34e5e5ba54 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_interface_internal.h +++ b/compiler-rt/lib/sanitizer_common/sanitizer_interface_internal.h @@ -107,6 +107,26 @@ __sanitizer_cov_trace_gep(); SANITIZER_INTERFACE_ATTRIBUTE SANITIZER_WEAK_ATTRIBUTE void __sanitizer_cov_trace_pc_indir(); SANITIZER_INTERFACE_ATTRIBUTE SANITIZER_WEAK_ATTRIBUTE void +__sanitizer_cov_load1(); +SANITIZER_INTERFACE_ATTRIBUTE SANITIZER_WEAK_ATTRIBUTE void +__sanitizer_cov_load2(); +SANITIZER_INTERFACE_ATTRIBUTE SANITIZER_WEAK_ATTRIBUTE void +__sanitizer_cov_load4(); +SANITIZER_INTERFACE_ATTRIBUTE SANITIZER_WEAK_ATTRIBUTE void +__sanitizer_cov_load8(); +SANITIZER_INTERFACE_ATTRIBUTE SANITIZER_WEAK_ATTRIBUTE void +__sanitizer_cov_load16(); +SANITIZER_INTERFACE_ATTRIBUTE SANITIZER_WEAK_ATTRIBUTE void +__sanitizer_cov_store1(); +SANITIZER_INTERFACE_ATTRIBUTE SANITIZER_WEAK_ATTRIBUTE void +__sanitizer_cov_store2(); +SANITIZER_INTERFACE_ATTRIBUTE SANITIZER_WEAK_ATTRIBUTE void +__sanitizer_cov_store4(); +SANITIZER_INTERFACE_ATTRIBUTE SANITIZER_WEAK_ATTRIBUTE void +__sanitizer_cov_store8(); +SANITIZER_INTERFACE_ATTRIBUTE SANITIZER_WEAK_ATTRIBUTE void +__sanitizer_cov_store16(); +SANITIZER_INTERFACE_ATTRIBUTE SANITIZER_WEAK_ATTRIBUTE void __sanitizer_cov_trace_pc_guard(__sanitizer::u32 *); SANITIZER_INTERFACE_ATTRIBUTE SANITIZER_WEAK_ATTRIBUTE void __sanitizer_cov_trace_pc_guard_init(__sanitizer::u32 *, __sanitizer::u32 *); diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_linux.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_linux.cpp index be37fd7f68b3..dc2ea933fadc 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_linux.cpp +++ b/compiler-rt/lib/sanitizer_common/sanitizer_linux.cpp @@ -78,6 +78,10 @@ #include <sys/personality.h> #endif +#if SANITIZER_LINUX && defined(__loongarch__) +# include <sys/sysmacros.h> +#endif + #if SANITIZER_FREEBSD #include <sys/exec.h> #include <sys/procctl.h> @@ -188,6 +192,8 @@ ScopedBlockSignals::~ScopedBlockSignals() { SetSigProcMask(&saved_, nullptr); } # include "sanitizer_syscall_linux_arm.inc" # elif SANITIZER_LINUX && defined(__hexagon__) # include "sanitizer_syscall_linux_hexagon.inc" +# elif SANITIZER_LINUX && SANITIZER_LOONGARCH64 +# include "sanitizer_syscall_linux_loongarch64.inc" # else # include "sanitizer_syscall_generic.inc" # endif @@ -290,6 +296,28 @@ static void stat64_to_stat(struct stat64 *in, struct stat *out) { } #endif +#if SANITIZER_LINUX && defined(__loongarch__) +static void statx_to_stat(struct statx *in, struct stat *out) { + internal_memset(out, 0, sizeof(*out)); + out->st_dev = makedev(in->stx_dev_major, in->stx_dev_minor); + out->st_ino = in->stx_ino; + out->st_mode = in->stx_mode; + out->st_nlink = in->stx_nlink; + out->st_uid = in->stx_uid; + out->st_gid = in->stx_gid; + out->st_rdev = makedev(in->stx_rdev_major, in->stx_rdev_minor); + out->st_size = in->stx_size; + out->st_blksize = in->stx_blksize; + out->st_blocks = in->stx_blocks; + out->st_atime = in->stx_atime.tv_sec; + out->st_atim.tv_nsec = in->stx_atime.tv_nsec; + out->st_mtime = in->stx_mtime.tv_sec; + out->st_mtim.tv_nsec = in->stx_mtime.tv_nsec; + out->st_ctime = in->stx_ctime.tv_sec; + out->st_ctim.tv_nsec = in->stx_ctime.tv_nsec; +} +#endif + #if SANITIZER_MIPS64 // Undefine compatibility macros from <sys/stat.h> // so that they would not clash with the kernel_stat @@ -341,52 +369,65 @@ static void kernel_stat_to_stat(struct kernel_stat *in, struct stat *out) { #endif uptr internal_stat(const char *path, void *buf) { -#if SANITIZER_FREEBSD +# if SANITIZER_FREEBSD return internal_syscall(SYSCALL(fstatat), AT_FDCWD, (uptr)path, (uptr)buf, 0); -# elif SANITIZER_LINUX -# if (SANITIZER_WORDSIZE == 64 || SANITIZER_X32 || \ +# elif SANITIZER_LINUX +# if defined(__loongarch__) + struct statx bufx; + int res = internal_syscall(SYSCALL(statx), AT_FDCWD, (uptr)path, + AT_NO_AUTOMOUNT, STATX_BASIC_STATS, (uptr)&bufx); + statx_to_stat(&bufx, (struct stat *)buf); + return res; +# elif (SANITIZER_WORDSIZE == 64 || SANITIZER_X32 || \ (defined(__mips__) && _MIPS_SIM == _ABIN32)) && \ - !SANITIZER_SPARC + !SANITIZER_SPARC return internal_syscall(SYSCALL(newfstatat), AT_FDCWD, (uptr)path, (uptr)buf, 0); -# else +# else struct stat64 buf64; int res = internal_syscall(SYSCALL(fstatat64), AT_FDCWD, (uptr)path, (uptr)&buf64, 0); stat64_to_stat(&buf64, (struct stat *)buf); return res; -# endif -# else +# endif +# else struct stat64 buf64; int res = internal_syscall(SYSCALL(stat64), path, &buf64); stat64_to_stat(&buf64, (struct stat *)buf); return res; -# endif +# endif } uptr internal_lstat(const char *path, void *buf) { -#if SANITIZER_FREEBSD +# if SANITIZER_FREEBSD return internal_syscall(SYSCALL(fstatat), AT_FDCWD, (uptr)path, (uptr)buf, AT_SYMLINK_NOFOLLOW); -# elif SANITIZER_LINUX -# if (defined(_LP64) || SANITIZER_X32 || \ +# elif SANITIZER_LINUX +# if defined(__loongarch__) + struct statx bufx; + int res = internal_syscall(SYSCALL(statx), AT_FDCWD, (uptr)path, + AT_SYMLINK_NOFOLLOW | AT_NO_AUTOMOUNT, + STATX_BASIC_STATS, (uptr)&bufx); + statx_to_stat(&bufx, (struct stat *)buf); + return res; +# elif (defined(_LP64) || SANITIZER_X32 || \ (defined(__mips__) && _MIPS_SIM == _ABIN32)) && \ - !SANITIZER_SPARC + !SANITIZER_SPARC return internal_syscall(SYSCALL(newfstatat), AT_FDCWD, (uptr)path, (uptr)buf, AT_SYMLINK_NOFOLLOW); -# else +# else struct stat64 buf64; int res = internal_syscall(SYSCALL(fstatat64), AT_FDCWD, (uptr)path, (uptr)&buf64, AT_SYMLINK_NOFOLLOW); stat64_to_stat(&buf64, (struct stat *)buf); return res; -# endif -# else +# endif +# else struct stat64 buf64; int res = internal_syscall(SYSCALL(lstat64), path, &buf64); stat64_to_stat(&buf64, (struct stat *)buf); return res; -# endif +# endif } uptr internal_fstat(fd_t fd, void *buf) { @@ -397,9 +438,15 @@ uptr internal_fstat(fd_t fd, void *buf) { int res = internal_syscall(SYSCALL(fstat), fd, &kbuf); kernel_stat_to_stat(&kbuf, (struct stat *)buf); return res; -# else +# elif SANITIZER_LINUX && defined(__loongarch__) + struct statx bufx; + int res = internal_syscall(SYSCALL(statx), fd, 0, AT_EMPTY_PATH, + STATX_BASIC_STATS, (uptr)&bufx); + statx_to_stat(&bufx, (struct stat *)buf); + return res; +# else return internal_syscall(SYSCALL(fstat), fd, (uptr)buf); -# endif +# endif #else struct stat64 buf64; int res = internal_syscall(SYSCALL(fstat64), fd, &buf64); @@ -445,15 +492,15 @@ uptr internal_unlink(const char *path) { } uptr internal_rename(const char *oldpath, const char *newpath) { -#if defined(__riscv) && defined(__linux__) +# if (defined(__riscv) || defined(__loongarch__)) && defined(__linux__) return internal_syscall(SYSCALL(renameat2), AT_FDCWD, (uptr)oldpath, AT_FDCWD, (uptr)newpath, 0); -# elif SANITIZER_LINUX +# elif SANITIZER_LINUX return internal_syscall(SYSCALL(renameat), AT_FDCWD, (uptr)oldpath, AT_FDCWD, (uptr)newpath); -# else +# else return internal_syscall(SYSCALL(rename), (uptr)oldpath, (uptr)newpath); -# endif +# endif } uptr internal_sched_yield() { @@ -763,14 +810,14 @@ uptr internal_lseek(fd_t fd, OFF_T offset, int whence) { uptr internal_prctl(int option, uptr arg2, uptr arg3, uptr arg4, uptr arg5) { return internal_syscall(SYSCALL(prctl), option, arg2, arg3, arg4, arg5); } -# if defined(__x86_64__) -#include <asm/unistd_64.h> +# if defined(__x86_64__) +# include <asm/unistd_64.h> // Currently internal_arch_prctl() is only needed on x86_64. uptr internal_arch_prctl(int option, uptr arg2) { return internal_syscall(__NR_arch_prctl, option, arg2); } -# endif -#endif +# endif +# endif uptr internal_sigaltstack(const void *ss, void *oss) { return internal_syscall(SYSCALL(sigaltstack), (uptr)ss, (uptr)oss); @@ -2176,6 +2223,11 @@ static void GetPcSpBp(void *context, uptr *pc, uptr *sp, uptr *bp) { *pc = ucontext->uc_mcontext.pc; *bp = ucontext->uc_mcontext.r30; *sp = ucontext->uc_mcontext.r29; +# elif defined(__loongarch__) + ucontext_t *ucontext = (ucontext_t *)context; + *pc = ucontext->uc_mcontext.__pc; + *bp = ucontext->uc_mcontext.__gregs[22]; + *sp = ucontext->uc_mcontext.__gregs[3]; # else # error "Unsupported arch" # endif diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_linux_libcdep.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_linux_libcdep.cpp index 9bf14ef64731..d74851c43e14 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_linux_libcdep.cpp +++ b/compiler-rt/lib/sanitizer_common/sanitizer_linux_libcdep.cpp @@ -27,6 +27,7 @@ #include "sanitizer_linux.h" #include "sanitizer_placement_new.h" #include "sanitizer_procmaps.h" +#include "sanitizer_solaris.h" #if SANITIZER_NETBSD #define _RTLD_SOURCE // for __lwp_gettcb_fast() / __lwp_getprivate_fast() @@ -62,6 +63,7 @@ #endif #if SANITIZER_SOLARIS +#include <stddef.h> #include <stdlib.h> #include <thread.h> #endif @@ -350,19 +352,43 @@ static uptr TlsGetOffset(uptr ti_module, uptr ti_offset) { extern "C" void *__tls_get_addr(size_t *); #endif +static size_t main_tls_modid; + static int CollectStaticTlsBlocks(struct dl_phdr_info *info, size_t size, void *data) { - if (!info->dlpi_tls_modid) + size_t tls_modid; +#if SANITIZER_SOLARIS + // dlpi_tls_modid is only available since Solaris 11.4 SRU 10. Use + // dlinfo(RTLD_DI_LINKMAP) instead which works on all of Solaris 11.3, + // 11.4, and Illumos. The tlsmodid of the executable was changed to 1 in + // 11.4 to match other implementations. + if (size >= offsetof(dl_phdr_info_test, dlpi_tls_modid)) + main_tls_modid = 1; + else + main_tls_modid = 0; + g_use_dlpi_tls_data = 0; + Rt_map *map; + dlinfo(RTLD_SELF, RTLD_DI_LINKMAP, &map); + tls_modid = map->rt_tlsmodid; +#else + main_tls_modid = 1; + tls_modid = info->dlpi_tls_modid; +#endif + + if (tls_modid < main_tls_modid) return 0; - uptr begin = (uptr)info->dlpi_tls_data; + uptr begin; +#if !SANITIZER_SOLARIS + begin = (uptr)info->dlpi_tls_data; +#endif if (!g_use_dlpi_tls_data) { // Call __tls_get_addr as a fallback. This forces TLS allocation on glibc // and FreeBSD. #ifdef __s390__ begin = (uptr)__builtin_thread_pointer() + - TlsGetOffset(info->dlpi_tls_modid, 0); + TlsGetOffset(tls_modid, 0); #else - size_t mod_and_off[2] = {info->dlpi_tls_modid, 0}; + size_t mod_and_off[2] = {tls_modid, 0}; begin = (uptr)__tls_get_addr(mod_and_off); #endif } @@ -370,7 +396,7 @@ static int CollectStaticTlsBlocks(struct dl_phdr_info *info, size_t size, if (info->dlpi_phdr[i].p_type == PT_TLS) { static_cast<InternalMmapVector<TlsBlock> *>(data)->push_back( TlsBlock{begin, begin + info->dlpi_phdr[i].p_memsz, - info->dlpi_phdr[i].p_align, info->dlpi_tls_modid}); + info->dlpi_phdr[i].p_align, tls_modid}); break; } return 0; @@ -382,11 +408,11 @@ __attribute__((unused)) static void GetStaticTlsBoundary(uptr *addr, uptr *size, dl_iterate_phdr(CollectStaticTlsBlocks, &ranges); uptr len = ranges.size(); Sort(ranges.begin(), len); - // Find the range with tls_modid=1. For glibc, because libc.so uses PT_TLS, - // this module is guaranteed to exist and is one of the initially loaded - // modules. + // Find the range with tls_modid == main_tls_modid. For glibc, because + // libc.so uses PT_TLS, this module is guaranteed to exist and is one of + // the initially loaded modules. uptr one = 0; - while (one != len && ranges[one].tls_modid != 1) ++one; + while (one != len && ranges[one].tls_modid != main_tls_modid) ++one; if (one == len) { // This may happen with musl if no module uses PT_TLS. *addr = 0; @@ -395,14 +421,14 @@ __attribute__((unused)) static void GetStaticTlsBoundary(uptr *addr, uptr *size, return; } // Find the maximum consecutive ranges. We consider two modules consecutive if - // the gap is smaller than the alignment. The dynamic loader places static TLS - // blocks this way not to waste space. + // the gap is smaller than the alignment of the latter range. The dynamic + // loader places static TLS blocks this way not to waste space. uptr l = one; *align = ranges[l].align; - while (l != 0 && ranges[l].begin < ranges[l - 1].end + ranges[l - 1].align) + while (l != 0 && ranges[l].begin < ranges[l - 1].end + ranges[l].align) *align = Max(*align, ranges[--l].align); uptr r = one + 1; - while (r != len && ranges[r].begin < ranges[r - 1].end + ranges[r - 1].align) + while (r != len && ranges[r].begin < ranges[r - 1].end + ranges[r].align) *align = Max(*align, ranges[r++].align); *addr = ranges[l].begin; *size = ranges[r - 1].end - ranges[l].begin; diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_mac.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_mac.cpp index 7ce6eff832e5..1ae69e14b237 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_mac.cpp +++ b/compiler-rt/lib/sanitizer_common/sanitizer_mac.cpp @@ -73,6 +73,7 @@ extern "C" { #include <malloc/malloc.h> #include <os/log.h> #include <pthread.h> +#include <pthread/introspection.h> #include <sched.h> #include <signal.h> #include <spawn.h> @@ -1395,6 +1396,61 @@ u32 GetNumberOfCPUs() { void InitializePlatformCommonFlags(CommonFlags *cf) {} +// Pthread introspection hook +// +// * GCD worker threads are created without a call to pthread_create(), but we +// still need to register these threads (with ThreadCreate/Start()). +// * We use the "pthread introspection hook" below to observe the creation of +// such threads. +// * GCD worker threads don't have parent threads and the CREATE event is +// delivered in the context of the thread itself. CREATE events for regular +// threads, are delivered on the parent. We use this to tell apart which +// threads are GCD workers with `thread == pthread_self()`. +// +static pthread_introspection_hook_t prev_pthread_introspection_hook; +static ThreadEventCallbacks thread_event_callbacks; + +static void sanitizer_pthread_introspection_hook(unsigned int event, + pthread_t thread, void *addr, + size_t size) { + // create -> start -> terminate -> destroy + // * create/destroy are usually (not guaranteed) delivered on the parent and + // track resource allocation/reclamation + // * start/terminate are guaranteed to be delivered in the context of the + // thread and give hooks into "just after (before) thread starts (stops) + // executing" + DCHECK(event >= PTHREAD_INTROSPECTION_THREAD_CREATE && + event <= PTHREAD_INTROSPECTION_THREAD_DESTROY); + + if (event == PTHREAD_INTROSPECTION_THREAD_CREATE) { + bool gcd_worker = (thread == pthread_self()); + if (thread_event_callbacks.create) + thread_event_callbacks.create((uptr)thread, gcd_worker); + } else if (event == PTHREAD_INTROSPECTION_THREAD_START) { + CHECK_EQ(thread, pthread_self()); + if (thread_event_callbacks.start) + thread_event_callbacks.start((uptr)thread); + } + + if (prev_pthread_introspection_hook) + prev_pthread_introspection_hook(event, thread, addr, size); + + if (event == PTHREAD_INTROSPECTION_THREAD_TERMINATE) { + CHECK_EQ(thread, pthread_self()); + if (thread_event_callbacks.terminate) + thread_event_callbacks.terminate((uptr)thread); + } else if (event == PTHREAD_INTROSPECTION_THREAD_DESTROY) { + if (thread_event_callbacks.destroy) + thread_event_callbacks.destroy((uptr)thread); + } +} + +void InstallPthreadIntrospectionHook(const ThreadEventCallbacks &callbacks) { + thread_event_callbacks = callbacks; + prev_pthread_introspection_hook = + pthread_introspection_hook_install(&sanitizer_pthread_introspection_hook); +} + } // namespace __sanitizer #endif // SANITIZER_APPLE diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_mac.h b/compiler-rt/lib/sanitizer_common/sanitizer_mac.h index a8b274e8c82c..f0a97d098eea 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_mac.h +++ b/compiler-rt/lib/sanitizer_common/sanitizer_mac.h @@ -62,6 +62,17 @@ char **GetEnviron(); void RestrictMemoryToMaxAddress(uptr max_address); +using ThreadEventCallback = void (*)(uptr thread); +using ThreadCreateEventCallback = void (*)(uptr thread, bool gcd_worker); +struct ThreadEventCallbacks { + ThreadCreateEventCallback create; + ThreadEventCallback start; + ThreadEventCallback terminate; + ThreadEventCallback destroy; +}; + +void InstallPthreadIntrospectionHook(const ThreadEventCallbacks &callbacks); + } // namespace __sanitizer #endif // SANITIZER_APPLE diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_platform.h b/compiler-rt/lib/sanitizer_common/sanitizer_platform.h index 4d89ecaf1071..ea4e5b015d11 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_platform.h +++ b/compiler-rt/lib/sanitizer_common/sanitizer_platform.h @@ -272,6 +272,12 @@ # define SANITIZER_RISCV64 0 #endif +#if defined(__loongarch_lp64) +# define SANITIZER_LOONGARCH64 1 +#else +# define SANITIZER_LOONGARCH64 0 +#endif + // By default we allow to use SizeClassAllocator64 on 64-bit platform. // But in some cases (e.g. AArch64's 39-bit address space) SizeClassAllocator64 // does not work well and we need to fallback to SizeClassAllocator32. diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_linux.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_linux.cpp index 06bafd68dc74..bf0f355847cb 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_linux.cpp +++ b/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_linux.cpp @@ -59,7 +59,8 @@ using namespace __sanitizer; # if !defined(__powerpc64__) && !defined(__x86_64__) && \ !defined(__aarch64__) && !defined(__mips__) && !defined(__s390__) && \ - !defined(__sparc__) && !defined(__riscv) && !defined(__hexagon__) + !defined(__sparc__) && !defined(__riscv) && !defined(__hexagon__) && \ + !defined(__loongarch__) COMPILER_CHECK(struct___old_kernel_stat_sz == sizeof(struct __old_kernel_stat)); #endif diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_posix.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_posix.cpp index 3a94b260686f..c85cf1626a75 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_posix.cpp +++ b/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_posix.cpp @@ -271,6 +271,10 @@ namespace __sanitizer { defined(__powerpc__) || defined(__s390__) || defined(__sparc__) || \ defined(__hexagon__) # define SIZEOF_STRUCT_USTAT 20 +# elif defined(__loongarch__) + // Not used. The minimum Glibc version available for LoongArch is 2.36 + // so ustat() wrapper is already gone. +# define SIZEOF_STRUCT_USTAT 0 # else # error Unknown size of struct ustat # endif @@ -1271,7 +1275,7 @@ CHECK_SIZE_AND_OFFSET(group, gr_passwd); CHECK_SIZE_AND_OFFSET(group, gr_gid); CHECK_SIZE_AND_OFFSET(group, gr_mem); -#if HAVE_RPC_XDR_H +#if HAVE_RPC_XDR_H && !SANITIZER_APPLE CHECK_TYPE_SIZE(XDR); CHECK_SIZE_AND_OFFSET(XDR, x_op); CHECK_SIZE_AND_OFFSET(XDR, x_ops); diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_posix.h b/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_posix.h index 8f7df12c4986..bd5692ed511b 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_posix.h +++ b/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_posix.h @@ -98,9 +98,10 @@ const unsigned struct_kernel_stat64_sz = 104; const unsigned struct_kernel_stat_sz = 144; const unsigned struct_kernel_stat64_sz = 104; #elif defined(__mips__) -const unsigned struct_kernel_stat_sz = SANITIZER_ANDROID - ? FIRST_32_SECOND_64(104, 128) - : FIRST_32_SECOND_64(160, 216); +const unsigned struct_kernel_stat_sz = + SANITIZER_ANDROID + ? FIRST_32_SECOND_64(104, 128) + : FIRST_32_SECOND_64((_MIPS_SIM == _ABIN32) ? 160 : 144, 216); const unsigned struct_kernel_stat64_sz = 104; #elif defined(__s390__) && !defined(__s390x__) const unsigned struct_kernel_stat_sz = 64; @@ -122,6 +123,9 @@ const unsigned struct_kernel_stat64_sz = 0; // RISCV64 does not use stat64 # elif defined(__hexagon__) const unsigned struct_kernel_stat_sz = 128; const unsigned struct_kernel_stat64_sz = 0; +# elif defined(__loongarch__) +const unsigned struct_kernel_stat_sz = 128; +const unsigned struct_kernel_stat64_sz = 0; # endif struct __sanitizer_perf_event_attr { unsigned type; @@ -142,7 +146,7 @@ const unsigned struct_kexec_segment_sz = 4 * sizeof(unsigned long); #if SANITIZER_LINUX -#if defined(__powerpc64__) || defined(__s390__) +#if defined(__powerpc64__) || defined(__s390__) || defined(__loongarch__) const unsigned struct___old_kernel_stat_sz = 0; #elif !defined(__sparc__) const unsigned struct___old_kernel_stat_sz = 32; diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_procmaps_solaris.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_procmaps_solaris.cpp index e16c4e938cb2..6f43817aedb1 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_procmaps_solaris.cpp +++ b/compiler-rt/lib/sanitizer_common/sanitizer_procmaps_solaris.cpp @@ -9,25 +9,32 @@ // Information about the process mappings (Solaris-specific parts). //===----------------------------------------------------------------------===// -// Before Solaris 11.4, <procfs.h> doesn't work in a largefile environment. -#undef _FILE_OFFSET_BITS #include "sanitizer_platform.h" #if SANITIZER_SOLARIS -#include "sanitizer_common.h" -#include "sanitizer_procmaps.h" +# include <fcntl.h> +# include <limits.h> +# include <procfs.h> -#include <procfs.h> -#include <limits.h> +# include "sanitizer_common.h" +# include "sanitizer_procmaps.h" namespace __sanitizer { void ReadProcMaps(ProcSelfMapsBuff *proc_maps) { - if (!ReadFileToBuffer("/proc/self/xmap", &proc_maps->data, - &proc_maps->mmaped_size, &proc_maps->len)) { - proc_maps->data = nullptr; - proc_maps->mmaped_size = 0; - proc_maps->len = 0; - } + uptr fd = internal_open("/proc/self/xmap", O_RDONLY); + CHECK_NE(fd, -1); + uptr Size = internal_filesize(fd); + CHECK_GT(Size, 0); + + // Allow for additional entries by following mmap. + size_t MmapedSize = Size * 4 / 3; + void *VmMap = MmapOrDie(MmapedSize, "ReadProcMaps()"); + Size = internal_read(fd, VmMap, MmapedSize); + CHECK_NE(Size, -1); + internal_close(fd); + proc_maps->data = (char *)VmMap; + proc_maps->mmaped_size = MmapedSize; + proc_maps->len = Size; } bool MemoryMappingLayout::Next(MemoryMappedSegment *segment) { @@ -49,21 +56,28 @@ bool MemoryMappingLayout::Next(MemoryMappedSegment *segment) { segment->protection |= kProtectionWrite; if ((xmapentry->pr_mflags & MA_EXEC) != 0) segment->protection |= kProtectionExecute; + if ((xmapentry->pr_mflags & MA_SHARED) != 0) + segment->protection |= kProtectionShared; if (segment->filename != NULL && segment->filename_size > 0) { char proc_path[PATH_MAX + 1]; - internal_snprintf(proc_path, sizeof(proc_path), "/proc/self/path/%s", - xmapentry->pr_mapname); - ssize_t sz = internal_readlink(proc_path, segment->filename, - segment->filename_size - 1); - - // If readlink failed, the map is anonymous. - if (sz == -1) { + // Avoid unnecessary readlink on unnamed entires. + if (xmapentry->pr_mapname[0] == '\0') segment->filename[0] = '\0'; - } else if ((size_t)sz < segment->filename_size) - // readlink doesn't NUL-terminate. - segment->filename[sz] = '\0'; + else { + internal_snprintf(proc_path, sizeof(proc_path), "/proc/self/path/%s", + xmapentry->pr_mapname); + ssize_t sz = internal_readlink(proc_path, segment->filename, + segment->filename_size - 1); + + // If readlink failed, the map is anonymous. + if (sz == -1) + segment->filename[0] = '\0'; + else if ((size_t)sz < segment->filename_size) + // readlink doesn't NUL-terminate. + segment->filename[sz] = '\0'; + } } data_.current += sizeof(prxmap_t); diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_solaris.h b/compiler-rt/lib/sanitizer_common/sanitizer_solaris.h new file mode 100644 index 000000000000..2a21693efbf1 --- /dev/null +++ b/compiler-rt/lib/sanitizer_common/sanitizer_solaris.h @@ -0,0 +1,56 @@ +//===-- sanitizer_solaris.h -------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file is a part of Sanitizer runtime. It contains Solaris-specific +// definitions. +// +//===----------------------------------------------------------------------===// + +#ifndef SANITIZER_SOLARIS_H +#define SANITIZER_SOLARIS_H + +#include "sanitizer_internal_defs.h" + +#if SANITIZER_SOLARIS + +#include <link.h> + +namespace __sanitizer { + +// Beginning of declaration from OpenSolaris/Illumos +// $SRC/cmd/sgs/include/rtld.h. +struct Rt_map { + Link_map rt_public; + const char *rt_pathname; + ulong_t rt_padstart; + ulong_t rt_padimlen; + ulong_t rt_msize; + uint_t rt_flags; + uint_t rt_flags1; + ulong_t rt_tlsmodid; +}; + +// Structure matching the Solaris 11.4 struct dl_phdr_info used to determine +// presence of dlpi_tls_modid field at runtime. Cf. Solaris 11.4 +// dl_iterate_phdr(3C), Example 2. +struct dl_phdr_info_test { + ElfW(Addr) dlpi_addr; + const char *dlpi_name; + const ElfW(Phdr) * dlpi_phdr; + ElfW(Half) dlpi_phnum; + u_longlong_t dlpi_adds; + u_longlong_t dlpi_subs; + size_t dlpi_tls_modid; + void *dlpi_tls_data; +}; + +} // namespace __sanitizer + +#endif // SANITIZER_SOLARIS + +#endif // SANITIZER_SOLARIS_H diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_stacktrace.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_stacktrace.cpp index 3013a0c4abdf..d24fae98213a 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_stacktrace.cpp +++ b/compiler-rt/lib/sanitizer_common/sanitizer_stacktrace.cpp @@ -121,7 +121,7 @@ void BufferedStackTrace::UnwindFast(uptr pc, uptr bp, uptr stack_top, uhwptr pc1 = caller_frame[2]; #elif defined(__s390__) uhwptr pc1 = frame[14]; -#elif defined(__riscv) +#elif defined(__loongarch__) || defined(__riscv) // frame[-1] contains the return address uhwptr pc1 = frame[-1]; #else @@ -136,7 +136,7 @@ void BufferedStackTrace::UnwindFast(uptr pc, uptr bp, uptr stack_top, trace_buffer[size++] = (uptr) pc1; } bottom = (uptr)frame; -#if defined(__riscv) +#if defined(__loongarch__) || defined(__riscv) // frame[-2] contain fp of the previous frame uptr new_bp = (uptr)frame[-2]; #else diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_stoptheworld_mac.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_stoptheworld_mac.cpp index c87674ff7b76..87f5250db648 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_stoptheworld_mac.cpp +++ b/compiler-rt/lib/sanitizer_common/sanitizer_stoptheworld_mac.cpp @@ -29,7 +29,7 @@ typedef struct { class SuspendedThreadsListMac final : public SuspendedThreadsList { public: - SuspendedThreadsListMac() : threads_(1024) {} + SuspendedThreadsListMac() = default; tid_t GetThreadID(uptr index) const override; thread_t GetThread(uptr index) const; diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_posix_libcdep.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_posix_libcdep.cpp index 8be7709b6038..b223f6cd01e3 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_posix_libcdep.cpp +++ b/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_posix_libcdep.cpp @@ -72,7 +72,6 @@ static swift_demangle_ft swift_demangle_f; // symbolication. static void InitializeSwiftDemangler() { swift_demangle_f = (swift_demangle_ft)dlsym(RTLD_DEFAULT, "swift_demangle"); - (void)dlerror(); // Cleanup error message in case of failure } // Attempts to demangle a Swift name. The demangler will return nullptr if a diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_syscall_linux_loongarch64.inc b/compiler-rt/lib/sanitizer_common/sanitizer_syscall_linux_loongarch64.inc new file mode 100644 index 000000000000..97ca7f2f3f92 --- /dev/null +++ b/compiler-rt/lib/sanitizer_common/sanitizer_syscall_linux_loongarch64.inc @@ -0,0 +1,167 @@ +//===-- sanitizer_syscall_linux_loongarch64.inc -----------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Implementations of internal_syscall and internal_iserror for +// Linux/loongarch64. +// +//===----------------------------------------------------------------------===// + +// About local register variables: +// https://gcc.gnu.org/onlinedocs/gcc/Local-Register-Variables.html#Local-Register-Variables +// +// Kernel ABI... +// syscall number is passed in a7 +// (http://man7.org/linux/man-pages/man2/syscall.2.html) results are return in +// a0 and a1 (http://man7.org/linux/man-pages/man2/syscall.2.html) arguments +// are passed in: a0-a7 (confirmed by inspecting glibc sources). +#define SYSCALL(name) __NR_##name + +#define INTERNAL_SYSCALL_CLOBBERS "memory" + +static uptr __internal_syscall(u64 nr) { + register u64 a7 asm("a7") = nr; + register u64 a0 asm("a0"); + __asm__ volatile("syscall 0\n\t" + : "=r"(a0) + : "r"(a7) + : INTERNAL_SYSCALL_CLOBBERS); + return a0; +} +#define __internal_syscall0(n) (__internal_syscall)(n) + +static uptr __internal_syscall(u64 nr, u64 arg1) { + register u64 a7 asm("a7") = nr; + register u64 a0 asm("a0") = arg1; + __asm__ volatile("syscall 0\n\t" + : "+r"(a0) + : "r"(a7) + : INTERNAL_SYSCALL_CLOBBERS); + return a0; +} +#define __internal_syscall1(n, a1) (__internal_syscall)(n, (u64)(a1)) + +static uptr __internal_syscall(u64 nr, u64 arg1, long arg2) { + register u64 a7 asm("a7") = nr; + register u64 a0 asm("a0") = arg1; + register u64 a1 asm("a1") = arg2; + __asm__ volatile("syscall 0\n\t" + : "+r"(a0) + : "r"(a7), "r"(a1) + : INTERNAL_SYSCALL_CLOBBERS); + return a0; +} +#define __internal_syscall2(n, a1, a2) \ + (__internal_syscall)(n, (u64)(a1), (long)(a2)) + +static uptr __internal_syscall(u64 nr, u64 arg1, long arg2, long arg3) { + register u64 a7 asm("a7") = nr; + register u64 a0 asm("a0") = arg1; + register u64 a1 asm("a1") = arg2; + register u64 a2 asm("a2") = arg3; + __asm__ volatile("syscall 0\n\t" + : "+r"(a0) + : "r"(a7), "r"(a1), "r"(a2) + : INTERNAL_SYSCALL_CLOBBERS); + return a0; +} +#define __internal_syscall3(n, a1, a2, a3) \ + (__internal_syscall)(n, (u64)(a1), (long)(a2), (long)(a3)) + +static uptr __internal_syscall(u64 nr, u64 arg1, long arg2, long arg3, + u64 arg4) { + register u64 a7 asm("a7") = nr; + register u64 a0 asm("a0") = arg1; + register u64 a1 asm("a1") = arg2; + register u64 a2 asm("a2") = arg3; + register u64 a3 asm("a3") = arg4; + __asm__ volatile("syscall 0\n\t" + : "+r"(a0) + : "r"(a7), "r"(a1), "r"(a2), "r"(a3) + : INTERNAL_SYSCALL_CLOBBERS); + return a0; +} +#define __internal_syscall4(n, a1, a2, a3, a4) \ + (__internal_syscall)(n, (u64)(a1), (long)(a2), (long)(a3), (long)(a4)) + +static uptr __internal_syscall(u64 nr, u64 arg1, long arg2, long arg3, u64 arg4, + long arg5) { + register u64 a7 asm("a7") = nr; + register u64 a0 asm("a0") = arg1; + register u64 a1 asm("a1") = arg2; + register u64 a2 asm("a2") = arg3; + register u64 a3 asm("a3") = arg4; + register u64 a4 asm("a4") = arg5; + __asm__ volatile("syscall 0\n\t" + : "+r"(a0) + : "r"(a7), "r"(a1), "r"(a2), "r"(a3), "r"(a4) + : INTERNAL_SYSCALL_CLOBBERS); + return a0; +} +#define __internal_syscall5(n, a1, a2, a3, a4, a5) \ + (__internal_syscall)(n, (u64)(a1), (long)(a2), (long)(a3), (long)(a4), \ + (u64)(a5)) + +static uptr __internal_syscall(u64 nr, u64 arg1, long arg2, long arg3, u64 arg4, + long arg5, long arg6) { + register u64 a7 asm("a7") = nr; + register u64 a0 asm("a0") = arg1; + register u64 a1 asm("a1") = arg2; + register u64 a2 asm("a2") = arg3; + register u64 a3 asm("a3") = arg4; + register u64 a4 asm("a4") = arg5; + register u64 a5 asm("a5") = arg6; + __asm__ volatile("syscall 0\n\t" + : "+r"(a0) + : "r"(a7), "r"(a1), "r"(a2), "r"(a3), "r"(a4), "r"(a5) + : INTERNAL_SYSCALL_CLOBBERS); + return a0; +} +#define __internal_syscall6(n, a1, a2, a3, a4, a5, a6) \ + (__internal_syscall)(n, (u64)(a1), (long)(a2), (long)(a3), (long)(a4), \ + (u64)(a5), (long)(a6)) + +static uptr __internal_syscall(u64 nr, u64 arg1, long arg2, long arg3, u64 arg4, + long arg5, long arg6, long arg7) { + register u64 a7 asm("a7") = nr; + register u64 a0 asm("a0") = arg1; + register u64 a1 asm("a1") = arg2; + register u64 a2 asm("a2") = arg3; + register u64 a3 asm("a3") = arg4; + register u64 a4 asm("a4") = arg5; + register u64 a5 asm("a5") = arg6; + register u64 a6 asm("a6") = arg7; + __asm__ volatile("syscall 0\n\t" + : "+r"(a0) + : "r"(a7), "r"(a1), "r"(a2), "r"(a3), "r"(a4), "r"(a5), + "r"(a6) + : INTERNAL_SYSCALL_CLOBBERS); + return a0; +} +#define __internal_syscall7(n, a1, a2, a3, a4, a5, a6, a7) \ + (__internal_syscall)(n, (u64)(a1), (long)(a2), (long)(a3), (long)(a4), \ + (u64)(a5), (long)(a6), (long)(a7)) + +#define __SYSCALL_NARGS_X(a1, a2, a3, a4, a5, a6, a7, a8, n, ...) n +#define __SYSCALL_NARGS(...) \ + __SYSCALL_NARGS_X(__VA_ARGS__, 7, 6, 5, 4, 3, 2, 1, 0, ) +#define __SYSCALL_CONCAT_X(a, b) a##b +#define __SYSCALL_CONCAT(a, b) __SYSCALL_CONCAT_X(a, b) +#define __SYSCALL_DISP(b, ...) \ + __SYSCALL_CONCAT(b, __SYSCALL_NARGS(__VA_ARGS__))(__VA_ARGS__) + +#define internal_syscall(...) __SYSCALL_DISP(__internal_syscall, __VA_ARGS__) + +// Helper function used to avoid clobbering of errno. +bool internal_iserror(uptr retval, int *internal_errno) { + if (retval >= (uptr)-4095) { + if (internal_errno) + *internal_errno = -retval; + return true; + } + return false; +} diff --git a/compiler-rt/lib/tsan/rtl/tsan_dense_alloc.h b/compiler-rt/lib/tsan/rtl/tsan_dense_alloc.h index 7a39a39d51de..2eaff39057bc 100644 --- a/compiler-rt/lib/tsan/rtl/tsan_dense_alloc.h +++ b/compiler-rt/lib/tsan/rtl/tsan_dense_alloc.h @@ -85,14 +85,7 @@ class DenseSlabAlloc { } void FlushCache(Cache *c) { - if (!c->pos) - return; - SpinMutexLock lock(&mtx_); - while (c->pos) { - IndexT idx = c->cache[--c->pos]; - *(IndexT*)Map(idx) = freelist_; - freelist_ = idx; - } + while (c->pos) Drain(c); } void InitCache(Cache *c) { @@ -106,7 +99,7 @@ class DenseSlabAlloc { template <typename Func> void ForEach(Func func) { - SpinMutexLock lock(&mtx_); + Lock lock(&mtx_); uptr fillpos = atomic_load_relaxed(&fillpos_); for (uptr l1 = 0; l1 < fillpos; l1++) { for (IndexT l2 = l1 == 0 ? 1 : 0; l2 < kL2Size; l2++) func(&map_[l1][l2]); @@ -115,48 +108,86 @@ class DenseSlabAlloc { private: T *map_[kL1Size]; - SpinMutex mtx_; - IndexT freelist_ = {0}; + Mutex mtx_; + // The freelist is organized as a lock-free stack of batches of nodes. + // The stack itself uses Block::next links, while the batch within each + // stack node uses Block::batch links. + // Low 32-bits of freelist_ is the node index, top 32-bits is ABA-counter. + atomic_uint64_t freelist_ = {0}; atomic_uintptr_t fillpos_ = {0}; const char *const name_; - void Refill(Cache *c) { - SpinMutexLock lock(&mtx_); - if (freelist_ == 0) { - uptr fillpos = atomic_load_relaxed(&fillpos_); - if (fillpos == kL1Size) { - Printf("ThreadSanitizer: %s overflow (%zu*%zu). Dying.\n", - name_, kL1Size, kL2Size); - Die(); - } - VPrintf(2, "ThreadSanitizer: growing %s: %zu out of %zu*%zu\n", name_, - fillpos, kL1Size, kL2Size); - T *batch = (T*)MmapOrDie(kL2Size * sizeof(T), name_); - // Reserve 0 as invalid index. - IndexT start = fillpos == 0 ? 1 : 0; - for (IndexT i = start; i < kL2Size; i++) { - new(batch + i) T; - *(IndexT *)(batch + i) = i + 1 + fillpos * kL2Size; - } - *(IndexT*)(batch + kL2Size - 1) = 0; - freelist_ = fillpos * kL2Size + start; - map_[fillpos] = batch; - atomic_store_relaxed(&fillpos_, fillpos + 1); - } - for (uptr i = 0; i < Cache::kSize / 2 && freelist_ != 0; i++) { - IndexT idx = freelist_; + struct Block { + IndexT next; + IndexT batch; + }; + + Block *MapBlock(IndexT idx) { return reinterpret_cast<Block *>(Map(idx)); } + + static constexpr u64 kCounterInc = 1ull << 32; + static constexpr u64 kCounterMask = ~(kCounterInc - 1); + + NOINLINE void Refill(Cache *c) { + // Pop 1 batch of nodes from the freelist. + IndexT idx; + u64 xchg; + u64 cmp = atomic_load(&freelist_, memory_order_acquire); + do { + idx = static_cast<IndexT>(cmp); + if (!idx) + return AllocSuperBlock(c); + Block *ptr = MapBlock(idx); + xchg = ptr->next | (cmp & kCounterMask); + } while (!atomic_compare_exchange_weak(&freelist_, &cmp, xchg, + memory_order_acq_rel)); + // Unpack it into c->cache. + while (idx) { c->cache[c->pos++] = idx; - freelist_ = *(IndexT*)Map(idx); + idx = MapBlock(idx)->batch; } } - void Drain(Cache *c) { - SpinMutexLock lock(&mtx_); - for (uptr i = 0; i < Cache::kSize / 2; i++) { + NOINLINE void Drain(Cache *c) { + // Build a batch of at most Cache::kSize / 2 nodes linked by Block::batch. + IndexT head_idx = 0; + for (uptr i = 0; i < Cache::kSize / 2 && c->pos; i++) { IndexT idx = c->cache[--c->pos]; - *(IndexT*)Map(idx) = freelist_; - freelist_ = idx; + Block *ptr = MapBlock(idx); + ptr->batch = head_idx; + head_idx = idx; + } + // Push it onto the freelist stack. + Block *head = MapBlock(head_idx); + u64 xchg; + u64 cmp = atomic_load(&freelist_, memory_order_acquire); + do { + head->next = static_cast<IndexT>(cmp); + xchg = head_idx | (cmp & kCounterMask) + kCounterInc; + } while (!atomic_compare_exchange_weak(&freelist_, &cmp, xchg, + memory_order_acq_rel)); + } + + NOINLINE void AllocSuperBlock(Cache *c) { + Lock lock(&mtx_); + uptr fillpos = atomic_load_relaxed(&fillpos_); + if (fillpos == kL1Size) { + Printf("ThreadSanitizer: %s overflow (%zu*%zu). Dying.\n", name_, kL1Size, + kL2Size); + Die(); + } + VPrintf(2, "ThreadSanitizer: growing %s: %zu out of %zu*%zu\n", name_, + fillpos, kL1Size, kL2Size); + T *batch = (T *)MmapOrDie(kL2Size * sizeof(T), name_); + map_[fillpos] = batch; + // Reserve 0 as invalid index. + for (IndexT i = fillpos ? 0 : 1; i < kL2Size; i++) { + new (batch + i) T; + c->cache[c->pos++] = i + fillpos * kL2Size; + if (c->pos == Cache::kSize) + Drain(c); } + atomic_store_relaxed(&fillpos_, fillpos + 1); + CHECK(c->pos); } }; diff --git a/compiler-rt/lib/tsan/rtl/tsan_platform_mac.cpp b/compiler-rt/lib/tsan/rtl/tsan_platform_mac.cpp index 68ce5f83bdbd..1aac0fb27520 100644 --- a/compiler-rt/lib/tsan/rtl/tsan_platform_mac.cpp +++ b/compiler-rt/lib/tsan/rtl/tsan_platform_mac.cpp @@ -200,44 +200,26 @@ void WriteMemoryProfile(char *buf, uptr buf_size, u64 uptime_ns) { # if !SANITIZER_GO void InitializeShadowMemoryPlatform() { } -// On OS X, GCD worker threads are created without a call to pthread_create. We -// need to properly register these threads with ThreadCreate and ThreadStart. -// These threads don't have a parent thread, as they are created "spuriously". -// We're using a libpthread API that notifies us about a newly created thread. -// The `thread == pthread_self()` check indicates this is actually a worker -// thread. If it's just a regular thread, this hook is called on the parent -// thread. -typedef void (*pthread_introspection_hook_t)(unsigned int event, - pthread_t thread, void *addr, - size_t size); -extern "C" pthread_introspection_hook_t pthread_introspection_hook_install( - pthread_introspection_hook_t hook); -static const uptr PTHREAD_INTROSPECTION_THREAD_CREATE = 1; -static const uptr PTHREAD_INTROSPECTION_THREAD_TERMINATE = 3; -static pthread_introspection_hook_t prev_pthread_introspection_hook; -static void my_pthread_introspection_hook(unsigned int event, pthread_t thread, - void *addr, size_t size) { - if (event == PTHREAD_INTROSPECTION_THREAD_CREATE) { - if (thread == pthread_self()) { - // The current thread is a newly created GCD worker thread. - ThreadState *thr = cur_thread(); - Processor *proc = ProcCreate(); - ProcWire(proc, thr); - ThreadState *parent_thread_state = nullptr; // No parent. - Tid tid = ThreadCreate(parent_thread_state, 0, (uptr)thread, true); - CHECK_NE(tid, kMainTid); - ThreadStart(thr, tid, GetTid(), ThreadType::Worker); - } - } else if (event == PTHREAD_INTROSPECTION_THREAD_TERMINATE) { - CHECK_EQ(thread, pthread_self()); +// Register GCD worker threads, which are created without an observable call to +// pthread_create(). +static void ThreadCreateCallback(uptr thread, bool gcd_worker) { + if (gcd_worker) { ThreadState *thr = cur_thread(); - if (thr->tctx) { - DestroyThreadState(); - } + Processor *proc = ProcCreate(); + ProcWire(proc, thr); + ThreadState *parent_thread_state = nullptr; // No parent. + Tid tid = ThreadCreate(parent_thread_state, 0, (uptr)thread, true); + CHECK_NE(tid, kMainTid); + ThreadStart(thr, tid, GetTid(), ThreadType::Worker); } +} - if (prev_pthread_introspection_hook != nullptr) - prev_pthread_introspection_hook(event, thread, addr, size); +// Destroy thread state for *all* threads. +static void ThreadTerminateCallback(uptr thread) { + ThreadState *thr = cur_thread(); + if (thr->tctx) { + DestroyThreadState(); + } } #endif @@ -261,8 +243,11 @@ void InitializePlatform() { InitializeThreadStateStorage(); - prev_pthread_introspection_hook = - pthread_introspection_hook_install(&my_pthread_introspection_hook); + ThreadEventCallbacks callbacks = { + .create = ThreadCreateCallback, + .terminate = ThreadTerminateCallback, + }; + InstallPthreadIntrospectionHook(callbacks); #endif if (GetMacosAlignedVersion() >= MacosVersion(10, 14)) { diff --git a/compiler-rt/lib/tsan/rtl/tsan_rtl.cpp b/compiler-rt/lib/tsan/rtl/tsan_rtl.cpp index 607f373871b4..825a9d791ecc 100644 --- a/compiler-rt/lib/tsan/rtl/tsan_rtl.cpp +++ b/compiler-rt/lib/tsan/rtl/tsan_rtl.cpp @@ -210,17 +210,15 @@ static void DoResetImpl(uptr epoch) { // Clang does not understand locking all slots in the loop: // error: expecting mutex 'slot.mtx' to be held at start of each loop void DoReset(ThreadState* thr, uptr epoch) SANITIZER_NO_THREAD_SAFETY_ANALYSIS { - { - for (auto& slot : ctx->slots) { - slot.mtx.Lock(); - if (UNLIKELY(epoch == 0)) - epoch = ctx->global_epoch; - if (UNLIKELY(epoch != ctx->global_epoch)) { - // Epoch can't change once we've locked the first slot. - CHECK_EQ(slot.sid, 0); - slot.mtx.Unlock(); - return; - } + for (auto& slot : ctx->slots) { + slot.mtx.Lock(); + if (UNLIKELY(epoch == 0)) + epoch = ctx->global_epoch; + if (UNLIKELY(epoch != ctx->global_epoch)) { + // Epoch can't change once we've locked the first slot. + CHECK_EQ(slot.sid, 0); + slot.mtx.Unlock(); + return; } } DPrintf("#%d: DoReset epoch=%lu\n", thr ? thr->tid : -1, epoch); @@ -951,6 +949,15 @@ void TraceSwitchPartImpl(ThreadState* thr) { TraceMutexLock(thr, d.write ? EventType::kLock : EventType::kRLock, 0, d.addr, d.stack_id); } + // Callers of TraceSwitchPart expect that TraceAcquire will always succeed + // after the call. It's possible that TryTraceFunc/TraceMutexLock above + // filled the trace part exactly up to the TracePart::kAlignment gap + // and the next TraceAcquire won't succeed. Skip the gap to avoid that. + EventFunc *ev; + if (!TraceAcquire(thr, &ev)) { + CHECK(TraceSkipGap(thr)); + CHECK(TraceAcquire(thr, &ev)); + } { Lock lock(&ctx->slot_mtx); // There is a small chance that the slot may be not queued at this point. diff --git a/compiler-rt/lib/ubsan/ubsan_value.cpp b/compiler-rt/lib/ubsan/ubsan_value.cpp index 5a93a0d7fc2d..dc61e5b939d9 100644 --- a/compiler-rt/lib/ubsan/ubsan_value.cpp +++ b/compiler-rt/lib/ubsan/ubsan_value.cpp @@ -18,9 +18,7 @@ #include "sanitizer_common/sanitizer_libc.h" #include "sanitizer_common/sanitizer_mutex.h" -// TODO(dliew): Prefer '__APPLE__' here over 'SANITIZER_APPLE', as the latter is -// unclear. rdar://58124919 tracks using a more obviously portable guard. -#if defined(__APPLE__) +#if SANITIZER_APPLE #include <dlfcn.h> #endif @@ -29,7 +27,7 @@ using namespace __ubsan; typedef const char *(*ObjCGetClassNameTy)(void *); const char *__ubsan::getObjCClassName(ValueHandle Pointer) { -#if defined(__APPLE__) +#if SANITIZER_APPLE // We need to query the ObjC runtime for some information, but do not want // to introduce a static dependency from the ubsan runtime onto ObjC. Try to // grab a handle to the ObjC runtime used by the process. diff --git a/libcxx/include/__algorithm/equal_range.h b/libcxx/include/__algorithm/equal_range.h index f30f55be64fc..42d009ebbc0f 100644 --- a/libcxx/include/__algorithm/equal_range.h +++ b/libcxx/include/__algorithm/equal_range.h @@ -17,9 +17,13 @@ #include <__algorithm/upper_bound.h> #include <__config> #include <__functional/identity.h> +#include <__functional/invoke.h> #include <__iterator/advance.h> #include <__iterator/distance.h> #include <__iterator/iterator_traits.h> +#include <__type_traits/is_callable.h> +#include <__type_traits/is_copy_constructible.h> +#include <__utility/move.h> #include <__utility/pair.h> #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) @@ -28,59 +32,50 @@ _LIBCPP_BEGIN_NAMESPACE_STD -template <class _Compare, class _ForwardIterator, class _Tp> -_LIBCPP_CONSTEXPR_AFTER_CXX17 pair<_ForwardIterator, _ForwardIterator> -__equal_range(_ForwardIterator __first, _ForwardIterator __last, const _Tp& __value, _Compare __comp) -{ - typedef typename iterator_traits<_ForwardIterator>::difference_type difference_type; - difference_type __len = _VSTD::distance(__first, __last); - while (__len != 0) - { - difference_type __l2 = _VSTD::__half_positive(__len); - _ForwardIterator __m = __first; - _VSTD::advance(__m, __l2); - if (__comp(*__m, __value)) - { - __first = ++__m; - __len -= __l2 + 1; - } - else if (__comp(__value, *__m)) - { - __last = __m; - __len = __l2; - } - else - { - auto __proj = std::__identity(); - _ForwardIterator __mp1 = __m; - return pair<_ForwardIterator, _ForwardIterator> - ( - _VSTD::__lower_bound_impl<_ClassicAlgPolicy>(__first, __m, __value, __comp, __proj), - _VSTD::__upper_bound<_Compare>(++__mp1, __last, __value, __comp) - ); - } +template <class _AlgPolicy, class _Compare, class _Iter, class _Sent, class _Tp, class _Proj> +_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_AFTER_CXX17 pair<_Iter, _Iter> +__equal_range(_Iter __first, _Sent __last, const _Tp& __value, _Compare&& __comp, _Proj&& __proj) { + auto __len = _IterOps<_AlgPolicy>::distance(__first, __last); + _Iter __end = _IterOps<_AlgPolicy>::next(__first, __last); + while (__len != 0) { + auto __half_len = std::__half_positive(__len); + _Iter __mid = _IterOps<_AlgPolicy>::next(__first, __half_len); + if (std::__invoke(__comp, std::__invoke(__proj, *__mid), __value)) { + __first = ++__mid; + __len -= __half_len + 1; + } else if (std::__invoke(__comp, __value, std::__invoke(__proj, *__mid))) { + __end = __mid; + __len = __half_len; + } else { + _Iter __mp1 = __mid; + return pair<_Iter, _Iter>( + std::__lower_bound_impl<_AlgPolicy>(__first, __mid, __value, __comp, __proj), + std::__upper_bound<_AlgPolicy>(++__mp1, __end, __value, __comp, __proj)); } - return pair<_ForwardIterator, _ForwardIterator>(__first, __first); + } + return pair<_Iter, _Iter>(__first, __first); } template <class _ForwardIterator, class _Tp, class _Compare> -_LIBCPP_NODISCARD_EXT inline -_LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 -pair<_ForwardIterator, _ForwardIterator> -equal_range(_ForwardIterator __first, _ForwardIterator __last, const _Tp& __value, _Compare __comp) -{ - typedef typename __comp_ref_type<_Compare>::type _Comp_ref; - return _VSTD::__equal_range<_Comp_ref>(__first, __last, __value, __comp); +_LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_AFTER_CXX17 pair<_ForwardIterator, _ForwardIterator> +equal_range(_ForwardIterator __first, _ForwardIterator __last, const _Tp& __value, _Compare __comp) { + static_assert(__is_callable<_Compare, decltype(*__first), const _Tp&>::value, + "The comparator has to be callable"); + static_assert(is_copy_constructible<_ForwardIterator>::value, + "Iterator has to be copy constructible"); + typedef typename __comp_ref_type<_Compare>::type _Comp_ref; + return std::__equal_range<_ClassicAlgPolicy>( + std::move(__first), std::move(__last), __value, static_cast<_Comp_ref>(__comp), std::__identity()); } template <class _ForwardIterator, class _Tp> -_LIBCPP_NODISCARD_EXT inline -_LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 -pair<_ForwardIterator, _ForwardIterator> -equal_range(_ForwardIterator __first, _ForwardIterator __last, const _Tp& __value) -{ - return _VSTD::equal_range(__first, __last, __value, - __less<typename iterator_traits<_ForwardIterator>::value_type, _Tp>()); +_LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_AFTER_CXX17 pair<_ForwardIterator, _ForwardIterator> +equal_range(_ForwardIterator __first, _ForwardIterator __last, const _Tp& __value) { + return std::equal_range( + std::move(__first), + std::move(__last), + __value, + __less<typename iterator_traits<_ForwardIterator>::value_type, _Tp>()); } _LIBCPP_END_NAMESPACE_STD diff --git a/libcxx/include/__algorithm/includes.h b/libcxx/include/__algorithm/includes.h index 4c87e8d22116..102d3db39a2d 100644 --- a/libcxx/include/__algorithm/includes.h +++ b/libcxx/include/__algorithm/includes.h @@ -13,6 +13,7 @@ #include <__algorithm/comp_ref_type.h> #include <__config> #include <__iterator/iterator_traits.h> +#include <__utility/move.h> #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) # pragma GCC system_header @@ -20,41 +21,40 @@ _LIBCPP_BEGIN_NAMESPACE_STD -template <class _Compare, class _InputIterator1, class _InputIterator2> -_LIBCPP_CONSTEXPR_AFTER_CXX17 bool -__includes(_InputIterator1 __first1, _InputIterator1 __last1, _InputIterator2 __first2, _InputIterator2 __last2, - _Compare __comp) -{ - for (; __first2 != __last2; ++__first1) - { - if (__first1 == __last1 || __comp(*__first2, *__first1)) - return false; - if (!__comp(*__first1, *__first2)) - ++__first2; - } - return true; +template <class _Iter1, class _Sent1, class _Iter2, class _Sent2, class _Comp> +_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_AFTER_CXX17 bool +__includes(_Iter1 __first1, _Sent1 __last1, _Iter2 __first2, _Sent2 __last2, _Comp&& __comp) { + for (; __first2 != __last2; ++__first1) { + if (__first1 == __last1 || __comp(*__first2, *__first1)) + return false; + if (!__comp(*__first1, *__first2)) + ++__first2; + } + return true; } template <class _InputIterator1, class _InputIterator2, class _Compare> -_LIBCPP_NODISCARD_EXT inline -_LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 -bool -includes(_InputIterator1 __first1, _InputIterator1 __last1, _InputIterator2 __first2, _InputIterator2 __last2, - _Compare __comp) -{ - typedef typename __comp_ref_type<_Compare>::type _Comp_ref; - return _VSTD::__includes<_Comp_ref>(__first1, __last1, __first2, __last2, __comp); +_LIBCPP_NODISCARD_EXT inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_AFTER_CXX17 bool includes( + _InputIterator1 __first1, + _InputIterator1 __last1, + _InputIterator2 __first2, + _InputIterator2 __last2, + _Compare __comp) { + typedef typename __comp_ref_type<_Compare>::type _Comp_ref; + return std::__includes( + std::move(__first1), std::move(__last1), std::move(__first2), std::move(__last2), static_cast<_Comp_ref>(__comp)); } template <class _InputIterator1, class _InputIterator2> -_LIBCPP_NODISCARD_EXT inline -_LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 -bool -includes(_InputIterator1 __first1, _InputIterator1 __last1, _InputIterator2 __first2, _InputIterator2 __last2) -{ - return _VSTD::includes(__first1, __last1, __first2, __last2, - __less<typename iterator_traits<_InputIterator1>::value_type, - typename iterator_traits<_InputIterator2>::value_type>()); +_LIBCPP_NODISCARD_EXT inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_AFTER_CXX17 bool +includes(_InputIterator1 __first1, _InputIterator1 __last1, _InputIterator2 __first2, _InputIterator2 __last2) { + return std::includes( + std::move(__first1), + std::move(__last1), + std::move(__first2), + std::move(__last2), + __less<typename iterator_traits<_InputIterator1>::value_type, + typename iterator_traits<_InputIterator2>::value_type>()); } _LIBCPP_END_NAMESPACE_STD diff --git a/libcxx/include/__algorithm/inplace_merge.h b/libcxx/include/__algorithm/inplace_merge.h index 58919ddbae76..f4364969b8f9 100644 --- a/libcxx/include/__algorithm/inplace_merge.h +++ b/libcxx/include/__algorithm/inplace_merge.h @@ -11,17 +11,18 @@ #include <__algorithm/comp.h> #include <__algorithm/comp_ref_type.h> +#include <__algorithm/iterator_operations.h> #include <__algorithm/lower_bound.h> #include <__algorithm/min.h> #include <__algorithm/move.h> #include <__algorithm/rotate.h> #include <__algorithm/upper_bound.h> #include <__config> +#include <__functional/identity.h> #include <__iterator/advance.h> #include <__iterator/distance.h> #include <__iterator/iterator_traits.h> #include <__iterator/reverse_iterator.h> -#include <__utility/swap.h> #include <memory> #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) @@ -53,7 +54,7 @@ public: bool operator()(const _T1& __x, const _T2& __y) {return __p_(__y, __x);} }; -template <class _Compare, class _InputIterator1, class _InputIterator2, +template <class _AlgPolicy, class _Compare, class _InputIterator1, class _InputIterator2, class _OutputIterator> void __half_inplace_merge(_InputIterator1 __first1, _InputIterator1 __last1, _InputIterator2 __first2, _InputIterator2 __last2, @@ -63,25 +64,26 @@ void __half_inplace_merge(_InputIterator1 __first1, _InputIterator1 __last1, { if (__first2 == __last2) { + // TODO(alg-policy): pass `_AlgPolicy` once it's supported by `move`. _VSTD::move(__first1, __last1, __result); return; } if (__comp(*__first2, *__first1)) { - *__result = _VSTD::move(*__first2); + *__result = _IterOps<_AlgPolicy>::__iter_move(__first2); ++__first2; } else { - *__result = _VSTD::move(*__first1); + *__result = _IterOps<_AlgPolicy>::__iter_move(__first1); ++__first1; } } // __first2 through __last2 are already in the right spot. } -template <class _Compare, class _BidirectionalIterator> +template <class _AlgPolicy, class _Compare, class _BidirectionalIterator> void __buffered_inplace_merge(_BidirectionalIterator __first, _BidirectionalIterator __middle, _BidirectionalIterator __last, _Compare __comp, typename iterator_traits<_BidirectionalIterator>::difference_type __len1, @@ -95,30 +97,32 @@ __buffered_inplace_merge(_BidirectionalIterator __first, _BidirectionalIterator { value_type* __p = __buff; for (_BidirectionalIterator __i = __first; __i != __middle; __d.template __incr<value_type>(), (void) ++__i, (void) ++__p) - ::new ((void*)__p) value_type(_VSTD::move(*__i)); - _VSTD::__half_inplace_merge<_Compare>(__buff, __p, __middle, __last, __first, __comp); + ::new ((void*)__p) value_type(_IterOps<_AlgPolicy>::__iter_move(__i)); + std::__half_inplace_merge<_AlgPolicy, _Compare>(__buff, __p, __middle, __last, __first, __comp); } else { value_type* __p = __buff; for (_BidirectionalIterator __i = __middle; __i != __last; __d.template __incr<value_type>(), (void) ++__i, (void) ++__p) - ::new ((void*)__p) value_type(_VSTD::move(*__i)); + ::new ((void*)__p) value_type(_IterOps<_AlgPolicy>::__iter_move(__i)); typedef reverse_iterator<_BidirectionalIterator> _RBi; typedef reverse_iterator<value_type*> _Rv; typedef __invert<_Compare> _Inverted; - _VSTD::__half_inplace_merge<_Inverted>(_Rv(__p), _Rv(__buff), + std::__half_inplace_merge<_AlgPolicy, _Inverted>(_Rv(__p), _Rv(__buff), _RBi(__middle), _RBi(__first), _RBi(__last), _Inverted(__comp)); } } -template <class _Compare, class _BidirectionalIterator> +template <class _AlgPolicy, class _Compare, class _BidirectionalIterator> void __inplace_merge(_BidirectionalIterator __first, _BidirectionalIterator __middle, _BidirectionalIterator __last, _Compare __comp, typename iterator_traits<_BidirectionalIterator>::difference_type __len1, typename iterator_traits<_BidirectionalIterator>::difference_type __len2, typename iterator_traits<_BidirectionalIterator>::value_type* __buff, ptrdiff_t __buff_size) { + using _Ops = _IterOps<_AlgPolicy>; + typedef typename iterator_traits<_BidirectionalIterator>::difference_type difference_type; while (true) { @@ -126,7 +130,7 @@ __inplace_merge(_BidirectionalIterator __first, _BidirectionalIterator __middle, if (__len2 == 0) return; if (__len1 <= __buff_size || __len2 <= __buff_size) - return _VSTD::__buffered_inplace_merge<_Compare> + return std::__buffered_inplace_merge<_AlgPolicy, _Compare> (__first, __middle, __last, __comp, __len1, __len2, __buff); // shrink [__first, __middle) as much as possible (with no moves), returning if it shrinks to 0 for (; true; ++__first, (void) --__len1) @@ -153,35 +157,38 @@ __inplace_merge(_BidirectionalIterator __first, _BidirectionalIterator __middle, { // __len >= 1, __len2 >= 2 __len21 = __len2 / 2; __m2 = __middle; - _VSTD::advance(__m2, __len21); - __m1 = _VSTD::__upper_bound<_Compare>(__first, __middle, *__m2, __comp); - __len11 = _VSTD::distance(__first, __m1); + _Ops::advance(__m2, __len21); + // TODO: replace _ClassicAlgPolicy and __identity with _AlgPolicy and projection + __m1 = std::__upper_bound<_ClassicAlgPolicy>(__first, __middle, *__m2, __comp, std::__identity()); + __len11 = _Ops::distance(__first, __m1); } else { if (__len1 == 1) { // __len1 >= __len2 && __len2 > 0, therefore __len2 == 1 // It is known *__first > *__middle - swap(*__first, *__middle); + _Ops::iter_swap(__first, __middle); return; } // __len1 >= 2, __len2 >= 1 __len11 = __len1 / 2; __m1 = __first; - _VSTD::advance(__m1, __len11); + _Ops::advance(__m1, __len11); __m2 = std::lower_bound(__middle, __last, *__m1, __comp); - __len21 = _VSTD::distance(__middle, __m2); + __len21 = _Ops::distance(__middle, __m2); } difference_type __len12 = __len1 - __len11; // distance(__m1, __middle) difference_type __len22 = __len2 - __len21; // distance(__m2, __last) // [__first, __m1) [__m1, __middle) [__middle, __m2) [__m2, __last) // swap middle two partitions + // TODO(alg-policy): pass `_AlgPolicy` once it's supported by `rotate`. __middle = _VSTD::rotate(__m1, __middle, __m2); // __len12 and __len21 now have swapped meanings // merge smaller range with recursive call and larger with tail recursion elimination if (__len11 + __len21 < __len12 + __len22) { - _VSTD::__inplace_merge<_Compare>(__first, __m1, __middle, __comp, __len11, __len21, __buff, __buff_size); + std::__inplace_merge<_AlgPolicy, _Compare>( + __first, __m1, __middle, __comp, __len11, __len21, __buff, __buff_size); // _VSTD::__inplace_merge<_Compare>(__middle, __m2, __last, __comp, __len12, __len22, __buff, __buff_size); __first = __middle; __middle = __m2; @@ -190,7 +197,8 @@ __inplace_merge(_BidirectionalIterator __first, _BidirectionalIterator __middle, } else { - _VSTD::__inplace_merge<_Compare>(__middle, __m2, __last, __comp, __len12, __len22, __buff, __buff_size); + std::__inplace_merge<_AlgPolicy, _Compare>( + __middle, __m2, __last, __comp, __len12, __len22, __buff, __buff_size); // _VSTD::__inplace_merge<_Compare>(__first, __m1, __middle, __comp, __len11, __len21, __buff, __buff_size); __last = __middle; __middle = __m1; @@ -217,7 +225,7 @@ _LIBCPP_SUPPRESS_DEPRECATED_PUSH _LIBCPP_SUPPRESS_DEPRECATED_POP unique_ptr<value_type, __return_temporary_buffer> __h(__buf.first); typedef typename __comp_ref_type<_Compare>::type _Comp_ref; - return _VSTD::__inplace_merge<_Comp_ref>(__first, __middle, __last, __comp, __len1, __len2, + return _VSTD::__inplace_merge<_ClassicAlgPolicy, _Comp_ref>(__first, __middle, __last, __comp, __len1, __len2, __buf.first, __buf.second); } diff --git a/libcxx/include/__algorithm/iterator_operations.h b/libcxx/include/__algorithm/iterator_operations.h index eb627e1ace7a..8307d71214e5 100644 --- a/libcxx/include/__algorithm/iterator_operations.h +++ b/libcxx/include/__algorithm/iterator_operations.h @@ -41,6 +41,7 @@ struct _IterOps<_RangeAlgPolicy> { static constexpr auto next = ranges::next; static constexpr auto __advance_to = ranges::advance; }; + #endif struct _ClassicAlgPolicy {}; @@ -65,11 +66,24 @@ struct _IterOps<_ClassicAlgPolicy> { // iter_move template <class _Iter> _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_AFTER_CXX11 - // Declaring the return type is necessary for the C++03 mode (which doesn't support placeholder return types). - static typename iterator_traits<__uncvref_t<_Iter> >::value_type&& __iter_move(_Iter&& __i) { + // Declaring the return type is necessary for C++03, so we basically mirror what `decltype(auto)` would deduce. + static __enable_if_t< + is_reference<typename iterator_traits<__uncvref_t<_Iter> >::reference>::value, + typename remove_reference< typename iterator_traits<__uncvref_t<_Iter> >::reference >::type&&> + __iter_move(_Iter&& __i) { return std::move(*std::forward<_Iter>(__i)); } + template <class _Iter> + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_AFTER_CXX11 + // Declaring the return type is necessary for C++03, so we basically mirror what `decltype(auto)` would deduce. + static __enable_if_t< + !is_reference<typename iterator_traits<__uncvref_t<_Iter> >::reference>::value, + typename iterator_traits<__uncvref_t<_Iter> >::reference> + __iter_move(_Iter&& __i) { + return *std::forward<_Iter>(__i); + } + // iter_swap template <class _Iter1, class _Iter2> _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_AFTER_CXX11 @@ -85,7 +99,15 @@ struct _IterOps<_ClassicAlgPolicy> { } template <class _Iter> - _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR_AFTER_CXX11 void __advance_to(_Iter& __first, _Iter __last) { + _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR_AFTER_CXX11 + __uncvref_t<_Iter> next(_Iter&& __it, + typename iterator_traits<__uncvref_t<_Iter> >::difference_type __n = 1){ + return std::next(std::forward<_Iter>(__it), __n); + } + + template <class _Iter> + _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR_AFTER_CXX11 + void __advance_to(_Iter& __first, _Iter __last) { __first = __last; } }; diff --git a/libcxx/include/__algorithm/make_heap.h b/libcxx/include/__algorithm/make_heap.h index bc39d82bf916..bf9dd96756af 100644 --- a/libcxx/include/__algorithm/make_heap.h +++ b/libcxx/include/__algorithm/make_heap.h @@ -11,6 +11,7 @@ #include <__algorithm/comp.h> #include <__algorithm/comp_ref_type.h> +#include <__algorithm/iterator_operations.h> #include <__algorithm/sift_down.h> #include <__config> #include <__iterator/iterator_traits.h> @@ -22,7 +23,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD -template <class _Compare, class _RandomAccessIterator> +template <class _AlgPolicy, class _Compare, class _RandomAccessIterator> inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_AFTER_CXX11 void __make_heap(_RandomAccessIterator __first, _RandomAccessIterator __last, _Compare& __comp) { using _CompRef = typename __comp_ref_type<_Compare>::type; @@ -33,7 +34,7 @@ void __make_heap(_RandomAccessIterator __first, _RandomAccessIterator __last, _C if (__n > 1) { // start from the first parent, there is no need to consider children for (difference_type __start = (__n - 2) / 2; __start >= 0; --__start) { - std::__sift_down<_CompRef>(__first, __comp_ref, __n, __first + __start); + std::__sift_down<_AlgPolicy, _CompRef>(__first, __comp_ref, __n, __first + __start); } } } @@ -41,7 +42,7 @@ void __make_heap(_RandomAccessIterator __first, _RandomAccessIterator __last, _C template <class _RandomAccessIterator, class _Compare> inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_AFTER_CXX17 void make_heap(_RandomAccessIterator __first, _RandomAccessIterator __last, _Compare __comp) { - std::__make_heap(std::move(__first), std::move(__last), __comp); + std::__make_heap<_ClassicAlgPolicy>(std::move(__first), std::move(__last), __comp); } template <class _RandomAccessIterator> diff --git a/libcxx/include/__algorithm/make_projected.h b/libcxx/include/__algorithm/make_projected.h index 6d8ebfd3d90e..64fc3dfb6a12 100644 --- a/libcxx/include/__algorithm/make_projected.h +++ b/libcxx/include/__algorithm/make_projected.h @@ -27,6 +27,21 @@ _LIBCPP_BEGIN_NAMESPACE_STD namespace ranges { +template <class _Pred, class _Proj> +_LIBCPP_HIDE_FROM_ABI constexpr static +decltype(auto) __make_projected_pred(_Pred& __pred, _Proj& __proj) { + if constexpr (same_as<decay_t<_Proj>, identity> && !is_member_pointer_v<decay_t<_Pred>>) { + // Avoid creating the lambda and just use the pristine predicate -- for certain algorithms, this would enable + // optimizations that rely on the type of the predicate. + return __pred; + + } else { + return [&](auto&& __x) { + return std::invoke(__pred, std::invoke(__proj, std::forward<decltype(__x)>(__x))); + }; + } +} + template <class _Comp, class _Proj> _LIBCPP_HIDE_FROM_ABI constexpr static decltype(auto) __make_projected_comp(_Comp& __comp, _Proj& __proj) { diff --git a/libcxx/include/__algorithm/min_element.h b/libcxx/include/__algorithm/min_element.h index 129833d42bda..17b242c341e6 100644 --- a/libcxx/include/__algorithm/min_element.h +++ b/libcxx/include/__algorithm/min_element.h @@ -12,7 +12,11 @@ #include <__algorithm/comp.h> #include <__algorithm/comp_ref_type.h> #include <__config> +#include <__functional/identity.h> +#include <__functional/invoke.h> #include <__iterator/iterator_traits.h> +#include <__type_traits/is_callable.h> +#include <__utility/move.h> #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) # pragma GCC system_header @@ -20,28 +24,38 @@ _LIBCPP_BEGIN_NAMESPACE_STD -template <class _Compare, class _ForwardIterator> -inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_AFTER_CXX11 _ForwardIterator -__min_element(_ForwardIterator __first, _ForwardIterator __last, _Compare __comp) -{ - static_assert(__is_cpp17_forward_iterator<_ForwardIterator>::value, - "std::min_element requires a ForwardIterator"); - if (__first != __last) - { - _ForwardIterator __i = __first; - while (++__i != __last) - if (__comp(*__i, *__first)) - __first = __i; - } +template <class _Comp, class _Iter, class _Sent, class _Proj> +inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_AFTER_CXX11 +_Iter __min_element(_Iter __first, _Sent __last, _Comp __comp, _Proj& __proj) { + if (__first == __last) return __first; + + _Iter __i = __first; + while (++__i != __last) + if (std::__invoke(__comp, std::__invoke(__proj, *__i), std::__invoke(__proj, *__first))) + __first = __i; + + return __first; +} + +template <class _Comp, class _Iter, class _Sent> +_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_AFTER_CXX11 +_Iter __min_element(_Iter __first, _Sent __last, _Comp __comp) { + auto __proj = __identity(); + return std::__min_element<_Comp>(std::move(__first), std::move(__last), __comp, __proj); } template <class _ForwardIterator, class _Compare> _LIBCPP_NODISCARD_EXT inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_AFTER_CXX11 _ForwardIterator min_element(_ForwardIterator __first, _ForwardIterator __last, _Compare __comp) { - typedef typename __comp_ref_type<_Compare>::type _Comp_ref; - return _VSTD::__min_element<_Comp_ref>(__first, __last, __comp); + static_assert(__is_cpp17_forward_iterator<_ForwardIterator>::value, + "std::min_element requires a ForwardIterator"); + static_assert(__is_callable<_Compare, decltype(*__first), decltype(*__first)>::value, + "The comparator has to be callable"); + + typedef typename __comp_ref_type<_Compare>::type _Comp_ref; + return std::__min_element<_Comp_ref>(std::move(__first), std::move(__last), __comp); } template <class _ForwardIterator> diff --git a/libcxx/include/__algorithm/nth_element.h b/libcxx/include/__algorithm/nth_element.h index c7cdef5be817..688398dee814 100644 --- a/libcxx/include/__algorithm/nth_element.h +++ b/libcxx/include/__algorithm/nth_element.h @@ -11,13 +11,13 @@ #include <__algorithm/comp.h> #include <__algorithm/comp_ref_type.h> +#include <__algorithm/iterator_operations.h> #include <__algorithm/sort.h> #include <__config> #include <__debug> #include <__debug_utils/randomize_range.h> #include <__iterator/iterator_traits.h> #include <__utility/move.h> -#include <__utility/swap.h> #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) # pragma GCC system_header @@ -41,10 +41,12 @@ __nth_element_find_guard(_RandomAccessIterator& __i, _RandomAccessIterator& __j, } } -template <class _Compare, class _RandomAccessIterator> +template <class _AlgPolicy, class _Compare, class _RandomAccessIterator> _LIBCPP_CONSTEXPR_AFTER_CXX11 void __nth_element(_RandomAccessIterator __first, _RandomAccessIterator __nth, _RandomAccessIterator __last, _Compare __comp) { + using _Ops = _IterOps<_AlgPolicy>; + // _Compare is known to be a reference type typedef typename iterator_traits<_RandomAccessIterator>::difference_type difference_type; const difference_type __limit = 7; @@ -60,24 +62,24 @@ __nth_element(_RandomAccessIterator __first, _RandomAccessIterator __nth, _Rando return; case 2: if (__comp(*--__last, *__first)) - swap(*__first, *__last); + _Ops::iter_swap(__first, __last); return; case 3: { _RandomAccessIterator __m = __first; - _VSTD::__sort3<_Compare>(__first, ++__m, --__last, __comp); + std::__sort3<_AlgPolicy, _Compare>(__first, ++__m, --__last, __comp); return; } } if (__len <= __limit) { - _VSTD::__selection_sort<_Compare>(__first, __last, __comp); + std::__selection_sort<_AlgPolicy, _Compare>(__first, __last, __comp); return; } // __len > __limit >= 3 _RandomAccessIterator __m = __first + __len/2; _RandomAccessIterator __lm1 = __last; - unsigned __n_swaps = _VSTD::__sort3<_Compare>(__first, __m, --__lm1, __comp); + unsigned __n_swaps = std::__sort3<_AlgPolicy, _Compare>(__first, __m, --__lm1, __comp); // *__m is median // partition [__first, __m) < *__m and *__m <= [__m, __last) // (this inhibits tossing elements equivalent to __m around unnecessarily) @@ -90,7 +92,7 @@ __nth_element(_RandomAccessIterator __first, _RandomAccessIterator __nth, _Rando { // *__first == *__m, *__first doesn't go in first part if (_VSTD::__nth_element_find_guard<_Compare>(__i, __j, __m, __comp)) { - swap(*__i, *__j); + _Ops::iter_swap(__i, __j); ++__n_swaps; } else { // *__first == *__m, *__m <= all other elements @@ -102,7 +104,7 @@ __nth_element(_RandomAccessIterator __first, _RandomAccessIterator __nth, _Rando if (__i == __j) { return; // [__first, __last) all equivalent elements } else if (__comp(*__first, *__i)) { - swap(*__i, *__j); + _Ops::iter_swap(__i, __j); ++__n_swaps; ++__i; break; @@ -121,7 +123,7 @@ __nth_element(_RandomAccessIterator __first, _RandomAccessIterator __nth, _Rando ; if (__i >= __j) break; - swap(*__i, *__j); + _Ops::iter_swap(__i, __j); ++__n_swaps; ++__i; } @@ -152,7 +154,7 @@ __nth_element(_RandomAccessIterator __first, _RandomAccessIterator __nth, _Rando ; if (__i >= __j) break; - swap(*__i, *__j); + _Ops::iter_swap(__i, __j); ++__n_swaps; // It is known that __m != __j // If __m just moved, follow it @@ -164,7 +166,7 @@ __nth_element(_RandomAccessIterator __first, _RandomAccessIterator __nth, _Rando // [__first, __i) < *__m and *__m <= [__i, __last) if (__i != __m && __comp(*__m, *__i)) { - swap(*__i, *__m); + _Ops::iter_swap(__i, __m); ++__n_swaps; } // [__first, __i) < *__i and *__i <= [__i+1, __last) @@ -220,21 +222,21 @@ __nth_element(_RandomAccessIterator __first, _RandomAccessIterator __nth, _Rando } } -template <class _RandomAccessIterator, class _Compare> +template <class _AlgPolicy, class _RandomAccessIterator, class _Compare> inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_AFTER_CXX17 void __nth_element_impl(_RandomAccessIterator __first, _RandomAccessIterator __nth, _RandomAccessIterator __last, _Compare& __comp) { if (__nth == __last) return; - std::__debug_randomize_range(__first, __last); + std::__debug_randomize_range<_AlgPolicy>(__first, __last); using _Comp_ref = typename __comp_ref_type<_Compare>::type; - std::__nth_element<_Comp_ref>(__first, __nth, __last, __comp); + std::__nth_element<_AlgPolicy, _Comp_ref>(__first, __nth, __last, __comp); - std::__debug_randomize_range(__first, __nth); + std::__debug_randomize_range<_AlgPolicy>(__first, __nth); if (__nth != __last) { - std::__debug_randomize_range(++__nth, __last); + std::__debug_randomize_range<_AlgPolicy>(++__nth, __last); } } @@ -242,7 +244,7 @@ template <class _RandomAccessIterator, class _Compare> inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_AFTER_CXX17 void nth_element(_RandomAccessIterator __first, _RandomAccessIterator __nth, _RandomAccessIterator __last, _Compare __comp) { - std::__nth_element_impl(std::move(__first), std::move(__nth), std::move(__last), __comp); + std::__nth_element_impl<_ClassicAlgPolicy>(std::move(__first), std::move(__nth), std::move(__last), __comp); } template <class _RandomAccessIterator> diff --git a/libcxx/include/__algorithm/partial_sort.h b/libcxx/include/__algorithm/partial_sort.h index e008c0c99679..24016e5cf5a5 100644 --- a/libcxx/include/__algorithm/partial_sort.h +++ b/libcxx/include/__algorithm/partial_sort.h @@ -11,6 +11,7 @@ #include <__algorithm/comp.h> #include <__algorithm/comp_ref_type.h> +#include <__algorithm/iterator_operations.h> #include <__algorithm/make_heap.h> #include <__algorithm/sift_down.h> #include <__algorithm/sort_heap.h> @@ -18,7 +19,8 @@ #include <__debug> #include <__debug_utils/randomize_range.h> #include <__iterator/iterator_traits.h> -#include <__utility/swap.h> +#include <__utility/move.h> +#include <type_traits> #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) # pragma GCC system_header @@ -26,24 +28,47 @@ _LIBCPP_BEGIN_NAMESPACE_STD -template <class _Compare, class _RandomAccessIterator> -_LIBCPP_CONSTEXPR_AFTER_CXX17 void -__partial_sort(_RandomAccessIterator __first, _RandomAccessIterator __middle, _RandomAccessIterator __last, - _Compare __comp) -{ - if (__first == __middle) - return; - _VSTD::__make_heap<_Compare>(__first, __middle, __comp); - typename iterator_traits<_RandomAccessIterator>::difference_type __len = __middle - __first; - for (_RandomAccessIterator __i = __middle; __i != __last; ++__i) - { - if (__comp(*__i, *__first)) - { - swap(*__i, *__first); - _VSTD::__sift_down<_Compare>(__first, __comp, __len, __first); - } - } - _VSTD::__sort_heap<_Compare>(__first, __middle, __comp); +template <class _AlgPolicy, class _Compare, class _RandomAccessIterator, class _Sentinel> +_LIBCPP_CONSTEXPR_AFTER_CXX17 +_RandomAccessIterator __partial_sort_impl( + _RandomAccessIterator __first, _RandomAccessIterator __middle, _Sentinel __last, _Compare __comp) { + if (__first == __middle) { + return _IterOps<_AlgPolicy>::next(__middle, __last); + } + + std::__make_heap<_AlgPolicy, _Compare>(__first, __middle, __comp); + + typename iterator_traits<_RandomAccessIterator>::difference_type __len = __middle - __first; + _RandomAccessIterator __i = __middle; + for (; __i != __last; ++__i) + { + if (__comp(*__i, *__first)) + { + _IterOps<_AlgPolicy>::iter_swap(__i, __first); + std::__sift_down<_AlgPolicy, _Compare>(__first, __comp, __len, __first); + } + + } + std::__sort_heap<_AlgPolicy, _Compare>(std::move(__first), std::move(__middle), __comp); + + return __i; +} + +template <class _AlgPolicy, class _Compare, class _RandomAccessIterator, class _Sentinel> +_LIBCPP_CONSTEXPR_AFTER_CXX17 +_RandomAccessIterator __partial_sort(_RandomAccessIterator __first, _RandomAccessIterator __middle, _Sentinel __last, + _Compare& __comp) { + if (__first == __middle) + return _IterOps<_AlgPolicy>::next(__middle, __last); + + std::__debug_randomize_range<_AlgPolicy>(__first, __last); + + using _Comp_ref = typename __comp_ref_type<_Compare>::type; + auto __last_iter = std::__partial_sort_impl<_AlgPolicy, _Comp_ref>(__first, __middle, __last, __comp); + + std::__debug_randomize_range<_AlgPolicy>(__middle, __last); + + return __last_iter; } template <class _RandomAccessIterator, class _Compare> @@ -52,10 +77,10 @@ void partial_sort(_RandomAccessIterator __first, _RandomAccessIterator __middle, _RandomAccessIterator __last, _Compare __comp) { - std::__debug_randomize_range(__first, __last); - typedef typename __comp_ref_type<_Compare>::type _Comp_ref; - _VSTD::__partial_sort<_Comp_ref>(__first, __middle, __last, __comp); - std::__debug_randomize_range(__middle, __last); + static_assert(std::is_copy_constructible<_RandomAccessIterator>::value, "Iterators must be copy constructible."); + static_assert(std::is_copy_assignable<_RandomAccessIterator>::value, "Iterators must be copy assignable."); + + (void)std::__partial_sort<_ClassicAlgPolicy>(std::move(__first), std::move(__middle), std::move(__last), __comp); } template <class _RandomAccessIterator> diff --git a/libcxx/include/__algorithm/partial_sort_copy.h b/libcxx/include/__algorithm/partial_sort_copy.h index 7ed1e538e9b8..3556764e652d 100644 --- a/libcxx/include/__algorithm/partial_sort_copy.h +++ b/libcxx/include/__algorithm/partial_sort_copy.h @@ -11,6 +11,7 @@ #include <__algorithm/comp.h> #include <__algorithm/comp_ref_type.h> +#include <__algorithm/iterator_operations.h> #include <__algorithm/make_heap.h> #include <__algorithm/sift_down.h> #include <__algorithm/sort_heap.h> @@ -23,7 +24,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD -template <class _Compare, class _InputIterator, class _RandomAccessIterator> +template <class _AlgPolicy, class _Compare, class _InputIterator, class _RandomAccessIterator> _LIBCPP_CONSTEXPR_AFTER_CXX17 _RandomAccessIterator __partial_sort_copy(_InputIterator __first, _InputIterator __last, _RandomAccessIterator __result_first, _RandomAccessIterator __result_last, _Compare __comp) @@ -33,15 +34,15 @@ __partial_sort_copy(_InputIterator __first, _InputIterator __last, { for (; __first != __last && __r != __result_last; ++__first, (void) ++__r) *__r = *__first; - _VSTD::__make_heap<_Compare>(__result_first, __r, __comp); + std::__make_heap<_AlgPolicy, _Compare>(__result_first, __r, __comp); typename iterator_traits<_RandomAccessIterator>::difference_type __len = __r - __result_first; for (; __first != __last; ++__first) if (__comp(*__first, *__result_first)) { *__result_first = *__first; - _VSTD::__sift_down<_Compare>(__result_first, __comp, __len, __result_first); + std::__sift_down<_AlgPolicy, _Compare>(__result_first, __comp, __len, __result_first); } - _VSTD::__sort_heap<_Compare>(__result_first, __r, __comp); + std::__sort_heap<_AlgPolicy, _Compare>(__result_first, __r, __comp); } return __r; } @@ -53,7 +54,8 @@ partial_sort_copy(_InputIterator __first, _InputIterator __last, _RandomAccessIterator __result_first, _RandomAccessIterator __result_last, _Compare __comp) { typedef typename __comp_ref_type<_Compare>::type _Comp_ref; - return _VSTD::__partial_sort_copy<_Comp_ref>(__first, __last, __result_first, __result_last, __comp); + return std::__partial_sort_copy<_ClassicAlgPolicy, _Comp_ref>( + __first, __last, __result_first, __result_last, __comp); } template <class _InputIterator, class _RandomAccessIterator> diff --git a/libcxx/include/__algorithm/partition.h b/libcxx/include/__algorithm/partition.h index 73d94831ed87..60b4e290ebeb 100644 --- a/libcxx/include/__algorithm/partition.h +++ b/libcxx/include/__algorithm/partition.h @@ -9,9 +9,12 @@ #ifndef _LIBCPP___ALGORITHM_PARTITION_H #define _LIBCPP___ALGORITHM_PARTITION_H +#include <__algorithm/iterator_operations.h> #include <__config> #include <__iterator/iterator_traits.h> -#include <__utility/swap.h> +#include <__utility/move.h> +#include <__utility/pair.h> +#include <type_traits> #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) # pragma GCC system_header @@ -19,40 +22,45 @@ _LIBCPP_BEGIN_NAMESPACE_STD -template <class _Predicate, class _ForwardIterator> -_LIBCPP_CONSTEXPR_AFTER_CXX17 _ForwardIterator -__partition(_ForwardIterator __first, _ForwardIterator __last, _Predicate __pred, forward_iterator_tag) +template <class _Predicate, class _AlgPolicy, class _ForwardIterator, class _Sentinel> +_LIBCPP_CONSTEXPR_AFTER_CXX17 pair<_ForwardIterator, _ForwardIterator> +__partition_impl(_ForwardIterator __first, _Sentinel __last, _Predicate __pred, forward_iterator_tag) { while (true) { if (__first == __last) - return __first; + return std::make_pair(std::move(__first), std::move(__first)); if (!__pred(*__first)) break; ++__first; } - for (_ForwardIterator __p = __first; ++__p != __last;) + + _ForwardIterator __p = __first; + while (++__p != __last) { if (__pred(*__p)) { - swap(*__first, *__p); + _IterOps<_AlgPolicy>::iter_swap(__first, __p); ++__first; } } - return __first; + return std::make_pair(std::move(__first), std::move(__p)); } -template <class _Predicate, class _BidirectionalIterator> -_LIBCPP_CONSTEXPR_AFTER_CXX17 _BidirectionalIterator -__partition(_BidirectionalIterator __first, _BidirectionalIterator __last, _Predicate __pred, +template <class _Predicate, class _AlgPolicy, class _BidirectionalIterator, class _Sentinel> +_LIBCPP_CONSTEXPR_AFTER_CXX17 pair<_BidirectionalIterator, _BidirectionalIterator> +__partition_impl(_BidirectionalIterator __first, _Sentinel __sentinel, _Predicate __pred, bidirectional_iterator_tag) { + _BidirectionalIterator __original_last = _IterOps<_AlgPolicy>::next(__first, __sentinel); + _BidirectionalIterator __last = __original_last; + while (true) { while (true) { if (__first == __last) - return __first; + return std::make_pair(std::move(__first), std::move(__original_last)); if (!__pred(*__first)) break; ++__first; @@ -60,20 +68,29 @@ __partition(_BidirectionalIterator __first, _BidirectionalIterator __last, _Pred do { if (__first == --__last) - return __first; + return std::make_pair(std::move(__first), std::move(__original_last)); } while (!__pred(*__last)); - swap(*__first, *__last); + _IterOps<_AlgPolicy>::iter_swap(__first, __last); ++__first; } } +template <class _AlgPolicy, class _ForwardIterator, class _Sentinel, class _Predicate, class _IterCategory> +inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_AFTER_CXX17 +pair<_ForwardIterator, _ForwardIterator> __partition( + _ForwardIterator __first, _Sentinel __last, _Predicate&& __pred, _IterCategory __iter_category) { + return std::__partition_impl<__uncvref_t<_Predicate>&, _AlgPolicy>( + std::move(__first), std::move(__last), __pred, __iter_category); +} + template <class _ForwardIterator, class _Predicate> inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 _ForwardIterator partition(_ForwardIterator __first, _ForwardIterator __last, _Predicate __pred) { - return _VSTD::__partition<_Predicate&>( - __first, __last, __pred, typename iterator_traits<_ForwardIterator>::iterator_category()); + using _IterCategory = typename iterator_traits<_ForwardIterator>::iterator_category; + auto __result = std::__partition<_ClassicAlgPolicy>(std::move(__first), std::move(__last), __pred, _IterCategory()); + return __result.first; } _LIBCPP_END_NAMESPACE_STD diff --git a/libcxx/include/__algorithm/pop_heap.h b/libcxx/include/__algorithm/pop_heap.h index cadda81f6c88..870af50c133e 100644 --- a/libcxx/include/__algorithm/pop_heap.h +++ b/libcxx/include/__algorithm/pop_heap.h @@ -11,12 +11,14 @@ #include <__algorithm/comp.h> #include <__algorithm/comp_ref_type.h> +#include <__algorithm/iterator_operations.h> #include <__algorithm/push_heap.h> #include <__algorithm/sift_down.h> #include <__assert> #include <__config> #include <__iterator/iterator_traits.h> #include <__utility/move.h> +#include <type_traits> #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) # pragma GCC system_header @@ -24,7 +26,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD -template <class _Compare, class _RandomAccessIterator> +template <class _AlgPolicy, class _Compare, class _RandomAccessIterator> inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_AFTER_CXX11 void __pop_heap(_RandomAccessIterator __first, _RandomAccessIterator __last, _Compare& __comp, typename iterator_traits<_RandomAccessIterator>::difference_type __len) { @@ -35,17 +37,17 @@ void __pop_heap(_RandomAccessIterator __first, _RandomAccessIterator __last, _Co using value_type = typename iterator_traits<_RandomAccessIterator>::value_type; if (__len > 1) { - value_type __top = std::move(*__first); // create a hole at __first - _RandomAccessIterator __hole = std::__floyd_sift_down<_CompRef>(__first, __comp_ref, __len); + value_type __top = _IterOps<_AlgPolicy>::__iter_move(__first); // create a hole at __first + _RandomAccessIterator __hole = std::__floyd_sift_down<_AlgPolicy, _CompRef>(__first, __comp_ref, __len); --__last; if (__hole == __last) { *__hole = std::move(__top); } else { - *__hole = std::move(*__last); + *__hole = _IterOps<_AlgPolicy>::__iter_move(__last); ++__hole; *__last = std::move(__top); - std::__sift_up<_CompRef>(__first, __hole, __comp_ref, __hole - __first); + std::__sift_up<_AlgPolicy, _CompRef>(__first, __hole, __comp_ref, __hole - __first); } } } @@ -53,8 +55,11 @@ void __pop_heap(_RandomAccessIterator __first, _RandomAccessIterator __last, _Co template <class _RandomAccessIterator, class _Compare> inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_AFTER_CXX17 void pop_heap(_RandomAccessIterator __first, _RandomAccessIterator __last, _Compare __comp) { + static_assert(std::is_copy_constructible<_RandomAccessIterator>::value, "Iterators must be copy constructible."); + static_assert(std::is_copy_assignable<_RandomAccessIterator>::value, "Iterators must be copy assignable."); + typename iterator_traits<_RandomAccessIterator>::difference_type __len = __last - __first; - std::__pop_heap(std::move(__first), std::move(__last), __comp, __len); + std::__pop_heap<_ClassicAlgPolicy>(std::move(__first), std::move(__last), __comp, __len); } template <class _RandomAccessIterator> diff --git a/libcxx/include/__algorithm/push_heap.h b/libcxx/include/__algorithm/push_heap.h index 1e3eec373d4f..716670b76788 100644 --- a/libcxx/include/__algorithm/push_heap.h +++ b/libcxx/include/__algorithm/push_heap.h @@ -11,9 +11,11 @@ #include <__algorithm/comp.h> #include <__algorithm/comp_ref_type.h> +#include <__algorithm/iterator_operations.h> #include <__config> #include <__iterator/iterator_traits.h> #include <__utility/move.h> +#include <type_traits> #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) # pragma GCC system_header @@ -21,7 +23,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD -template <class _Compare, class _RandomAccessIterator> +template <class _AlgPolicy, class _Compare, class _RandomAccessIterator> _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_AFTER_CXX11 void __sift_up(_RandomAccessIterator __first, _RandomAccessIterator __last, _Compare __comp, typename iterator_traits<_RandomAccessIterator>::difference_type __len) { @@ -32,9 +34,9 @@ void __sift_up(_RandomAccessIterator __first, _RandomAccessIterator __last, _Com _RandomAccessIterator __ptr = __first + __len; if (__comp(*__ptr, *--__last)) { - value_type __t(std::move(*__last)); + value_type __t(_IterOps<_AlgPolicy>::__iter_move(__last)); do { - *__last = std::move(*__ptr); + *__last = _IterOps<_AlgPolicy>::__iter_move(__ptr); __last = __ptr; if (__len == 0) break; @@ -47,18 +49,21 @@ void __sift_up(_RandomAccessIterator __first, _RandomAccessIterator __last, _Com } } -template <class _RandomAccessIterator, class _Compare> +template <class _AlgPolicy, class _RandomAccessIterator, class _Compare> inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_AFTER_CXX11 void __push_heap(_RandomAccessIterator __first, _RandomAccessIterator __last, _Compare& __comp) { using _CompRef = typename __comp_ref_type<_Compare>::type; typename iterator_traits<_RandomAccessIterator>::difference_type __len = __last - __first; - std::__sift_up<_CompRef>(std::move(__first), std::move(__last), __comp, __len); + std::__sift_up<_AlgPolicy, _CompRef>(std::move(__first), std::move(__last), __comp, __len); } template <class _RandomAccessIterator, class _Compare> inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_AFTER_CXX17 void push_heap(_RandomAccessIterator __first, _RandomAccessIterator __last, _Compare __comp) { - std::__push_heap(std::move(__first), std::move(__last), __comp); + static_assert(std::is_copy_constructible<_RandomAccessIterator>::value, "Iterators must be copy constructible."); + static_assert(std::is_copy_assignable<_RandomAccessIterator>::value, "Iterators must be copy assignable."); + + std::__push_heap<_ClassicAlgPolicy>(std::move(__first), std::move(__last), __comp); } template <class _RandomAccessIterator> diff --git a/libcxx/include/__algorithm/ranges_equal_range.h b/libcxx/include/__algorithm/ranges_equal_range.h index 28d721530bda..dd4b377df1a1 100644 --- a/libcxx/include/__algorithm/ranges_equal_range.h +++ b/libcxx/include/__algorithm/ranges_equal_range.h @@ -10,7 +10,7 @@ #define _LIBCPP___ALGORITHM_RANGES_EQUAL_RANGE_H #include <__algorithm/equal_range.h> -#include <__algorithm/make_projected.h> +#include <__algorithm/iterator_operations.h> #include <__config> #include <__functional/identity.h> #include <__functional/invoke.h> @@ -37,27 +37,30 @@ namespace ranges { namespace __equal_range { struct __fn { - - template <forward_iterator _Iter, sentinel_for<_Iter> _Sent, class _Tp, class _Proj = identity, - indirect_strict_weak_order<const _Tp*, projected<_Iter, _Proj>> _Comp = ranges::less> - _LIBCPP_HIDE_FROM_ABI constexpr - subrange<_Iter> operator()(_Iter __first, _Sent __last, const _Tp& __value, _Comp __comp = {}, - _Proj __proj = {}) const { - // TODO: implement - (void)__first; (void)__last; (void)__value; (void)__comp; (void)__proj; - return {}; + template < + forward_iterator _Iter, + sentinel_for<_Iter> _Sent, + class _Tp, + class _Proj = identity, + indirect_strict_weak_order<const _Tp*, projected<_Iter, _Proj>> _Comp = ranges::less> + _LIBCPP_HIDE_FROM_ABI constexpr subrange<_Iter> + operator()(_Iter __first, _Sent __last, const _Tp& __value, _Comp __comp = {}, _Proj __proj = {}) const { + auto __ret = std::__equal_range<_RangeAlgPolicy>( + std::move(__first), std::move(__last), __value, __comp, __proj); + return {std::move(__ret.first), std::move(__ret.second)}; } - template <forward_range _Range, class _Tp, class _Proj = identity, - indirect_strict_weak_order<const _Tp*, projected<iterator_t<_Range>, _Proj>> _Comp = ranges::less> - _LIBCPP_HIDE_FROM_ABI constexpr - borrowed_subrange_t<_Range> operator()(_Range&& __range, const _Tp& __value, _Comp __comp = {}, - _Proj __proj = {}) const { - // TODO: implement - (void)__range; (void)__value; (void)__comp; (void)__proj; - return {}; + template < + forward_range _Range, + class _Tp, + class _Proj = identity, + indirect_strict_weak_order<const _Tp*, projected<iterator_t<_Range>, _Proj>> _Comp = ranges::less> + _LIBCPP_HIDE_FROM_ABI constexpr borrowed_subrange_t<_Range> + operator()(_Range&& __range, const _Tp& __value, _Comp __comp = {}, _Proj __proj = {}) const { + auto __ret = std::__equal_range<_RangeAlgPolicy>( + ranges::begin(__range), ranges::end(__range), __value, __comp, __proj); + return {std::move(__ret.first), std::move(__ret.second)}; } - }; } // namespace __equal_range diff --git a/libcxx/include/__algorithm/ranges_fill.h b/libcxx/include/__algorithm/ranges_fill.h index 846e31885141..7ce4a76ba9e9 100644 --- a/libcxx/include/__algorithm/ranges_fill.h +++ b/libcxx/include/__algorithm/ranges_fill.h @@ -30,7 +30,7 @@ struct __fn { template <class _Type, output_iterator<const _Type&> _Iter, sentinel_for<_Iter> _Sent> _LIBCPP_HIDE_FROM_ABI constexpr _Iter operator()(_Iter __first, _Sent __last, const _Type& __value) const { - if constexpr(random_access_iterator<_Iter>) { + if constexpr(random_access_iterator<_Iter> && sized_sentinel_for<_Sent, _Iter>) { return ranges::fill_n(__first, __last - __first, __value); } else { for (; __first != __last; ++__first) diff --git a/libcxx/include/__algorithm/ranges_find_end.h b/libcxx/include/__algorithm/ranges_find_end.h index fec709e79f5a..270b00649848 100644 --- a/libcxx/include/__algorithm/ranges_find_end.h +++ b/libcxx/include/__algorithm/ranges_find_end.h @@ -11,6 +11,7 @@ #include <__algorithm/find_end.h> #include <__algorithm/iterator_operations.h> +#include <__algorithm/ranges_iterator_concept.h> #include <__config> #include <__functional/identity.h> #include <__functional/ranges_operations.h> @@ -29,23 +30,6 @@ _LIBCPP_BEGIN_NAMESPACE_STD -template <class _Iter> -consteval auto __get_iterator_concept() { - if constexpr (contiguous_iterator<_Iter>) - return contiguous_iterator_tag(); - else if constexpr (random_access_iterator<_Iter>) - return random_access_iterator_tag(); - else if constexpr (bidirectional_iterator<_Iter>) - return bidirectional_iterator_tag(); - else if constexpr (forward_iterator<_Iter>) - return forward_iterator_tag(); - else if constexpr (input_iterator<_Iter>) - return input_iterator_tag(); -} - -template <class _Iter> -using __iterator_concept = decltype(__get_iterator_concept<_Iter>()); - namespace ranges { namespace __find_end { struct __fn { diff --git a/libcxx/include/__algorithm/ranges_for_each_n.h b/libcxx/include/__algorithm/ranges_for_each_n.h index ddf8b047cdb2..013afbd19389 100644 --- a/libcxx/include/__algorithm/ranges_for_each_n.h +++ b/libcxx/include/__algorithm/ranges_for_each_n.h @@ -18,7 +18,6 @@ #include <__iterator/iterator_traits.h> #include <__iterator/projected.h> #include <__ranges/concepts.h> -#include <__ranges/dangling.h> #include <__utility/move.h> #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) diff --git a/libcxx/include/__algorithm/ranges_generate_n.h b/libcxx/include/__algorithm/ranges_generate_n.h index bcf50e025ecc..7bde5fb4e579 100644 --- a/libcxx/include/__algorithm/ranges_generate_n.h +++ b/libcxx/include/__algorithm/ranges_generate_n.h @@ -23,7 +23,6 @@ #include <__iterator/projected.h> #include <__ranges/access.h> #include <__ranges/concepts.h> -#include <__ranges/dangling.h> #include <__utility/forward.h> #include <__utility/move.h> diff --git a/libcxx/include/__algorithm/ranges_includes.h b/libcxx/include/__algorithm/ranges_includes.h index 19c17870ed6f..ba054e6fd89d 100644 --- a/libcxx/include/__algorithm/ranges_includes.h +++ b/libcxx/include/__algorithm/ranges_includes.h @@ -9,8 +9,8 @@ #ifndef _LIBCPP___ALGORITHM_RANGES_INCLUDES_H #define _LIBCPP___ALGORITHM_RANGES_INCLUDES_H -#include <__algorithm/make_projected.h> #include <__algorithm/includes.h> +#include <__algorithm/make_projected.h> #include <__config> #include <__functional/identity.h> #include <__functional/invoke.h> @@ -20,7 +20,6 @@ #include <__iterator/projected.h> #include <__ranges/access.h> #include <__ranges/concepts.h> -#include <__ranges/dangling.h> #include <__utility/forward.h> #include <__utility/move.h> @@ -36,29 +35,46 @@ namespace ranges { namespace __includes { struct __fn { - - template <input_iterator _Iter1, sentinel_for<_Iter1> _Sent1, input_iterator _Iter2, sentinel_for<_Iter2> _Sent2, - class _Proj1 = identity, class _Proj2 = identity, - indirect_strict_weak_order<projected<_Iter1, _Proj1>, projected<_Iter2, _Proj2>> _Comp = ranges::less> - _LIBCPP_HIDE_FROM_ABI constexpr - bool operator()(_Iter1 __first1, _Sent1 __last1, _Iter2 __first2, _Sent2 __last2, _Comp __comp = {}, - _Proj1 __proj1 = {}, _Proj2 __proj2 = {}) const { - // TODO: implement - (void)__first1; (void)__last1; (void)__first2; (void)__last2; (void)__comp; (void)__proj1; (void)__proj2; - return {}; + template < + input_iterator _Iter1, + sentinel_for<_Iter1> _Sent1, + input_iterator _Iter2, + sentinel_for<_Iter2> _Sent2, + class _Proj1 = identity, + class _Proj2 = identity, + indirect_strict_weak_order<projected<_Iter1, _Proj1>, projected<_Iter2, _Proj2>> _Comp = ranges::less> + _LIBCPP_HIDE_FROM_ABI constexpr bool operator()( + _Iter1 __first1, + _Sent1 __last1, + _Iter2 __first2, + _Sent2 __last2, + _Comp __comp = {}, + _Proj1 __proj1 = {}, + _Proj2 __proj2 = {}) const { + return std::__includes( + std::move(__first1), + std::move(__last1), + std::move(__first2), + std::move(__last2), + ranges::__make_projected_comp(__comp, __proj1, __proj2)); } - template <input_range _Range1, input_range _Range2, class _Proj1 = identity, class _Proj2 = identity, - indirect_strict_weak_order<projected<iterator_t<_Range1>, _Proj1>, - projected<iterator_t<_Range2>, _Proj2>> _Comp = ranges::less> - _LIBCPP_HIDE_FROM_ABI constexpr - bool operator()(_Range1&& __range1, _Range2&& __range2, _Comp __comp = {}, - _Proj1 __proj1 = {}, _Proj2 __proj2 = {}) const { - // TODO: implement - (void)__range1; (void)__range2; (void)__comp; (void)__proj1; (void)__proj2; - return {}; + template < + input_range _Range1, + input_range _Range2, + class _Proj1 = identity, + class _Proj2 = identity, + indirect_strict_weak_order<projected<iterator_t<_Range1>, _Proj1>, projected<iterator_t<_Range2>, _Proj2>> + _Comp = ranges::less> + _LIBCPP_HIDE_FROM_ABI constexpr bool operator()( + _Range1&& __range1, _Range2&& __range2, _Comp __comp = {}, _Proj1 __proj1 = {}, _Proj2 __proj2 = {}) const { + return std::__includes( + ranges::begin(__range1), + ranges::end(__range1), + ranges::begin(__range2), + ranges::end(__range2), + ranges::__make_projected_comp(__comp, __proj1, __proj2)); } - }; } // namespace __includes diff --git a/libcxx/include/__algorithm/ranges_is_heap.h b/libcxx/include/__algorithm/ranges_is_heap.h index 0f10fa4dcec9..00105189fed7 100644 --- a/libcxx/include/__algorithm/ranges_is_heap.h +++ b/libcxx/include/__algorithm/ranges_is_heap.h @@ -20,7 +20,6 @@ #include <__iterator/projected.h> #include <__ranges/access.h> #include <__ranges/concepts.h> -#include <__ranges/dangling.h> #include <__utility/forward.h> #include <__utility/move.h> diff --git a/libcxx/include/__algorithm/ranges_iterator_concept.h b/libcxx/include/__algorithm/ranges_iterator_concept.h new file mode 100644 index 000000000000..3323119317ae --- /dev/null +++ b/libcxx/include/__algorithm/ranges_iterator_concept.h @@ -0,0 +1,51 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___ALGORITHM_RANGES_ITERATOR_CONCEPT_H +#define _LIBCPP___ALGORITHM_RANGES_ITERATOR_CONCEPT_H + +#include <__config> +#include <__iterator/concepts.h> +#include <__iterator/iterator_traits.h> +#include <type_traits> + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +# pragma GCC system_header +#endif + +#if _LIBCPP_STD_VER > 17 && !defined(_LIBCPP_HAS_NO_INCOMPLETE_RANGES) + +_LIBCPP_BEGIN_NAMESPACE_STD + +namespace ranges { + +template <class _IterMaybeQualified> +consteval auto __get_iterator_concept() { + using _Iter = __uncvref_t<_IterMaybeQualified>; + + if constexpr (contiguous_iterator<_Iter>) + return contiguous_iterator_tag(); + else if constexpr (random_access_iterator<_Iter>) + return random_access_iterator_tag(); + else if constexpr (bidirectional_iterator<_Iter>) + return bidirectional_iterator_tag(); + else if constexpr (forward_iterator<_Iter>) + return forward_iterator_tag(); + else if constexpr (input_iterator<_Iter>) + return input_iterator_tag(); +} + +template <class _Iter> +using __iterator_concept = decltype(__get_iterator_concept<_Iter>()); + +} // namespace ranges +_LIBCPP_END_NAMESPACE_STD + +#endif // _LIBCPP_STD_VER > 17 && !defined(_LIBCPP_HAS_NO_INCOMPLETE_RANGES) + +#endif // _LIBCPP___ALGORITHM_RANGES_ITERATOR_CONCEPT_H diff --git a/libcxx/include/__algorithm/ranges_make_heap.h b/libcxx/include/__algorithm/ranges_make_heap.h index fd488dc11a4b..8eabdd12cd2f 100644 --- a/libcxx/include/__algorithm/ranges_make_heap.h +++ b/libcxx/include/__algorithm/ranges_make_heap.h @@ -9,6 +9,7 @@ #ifndef _LIBCPP___ALGORITHM_RANGES_MAKE_HEAP_H #define _LIBCPP___ALGORITHM_RANGES_MAKE_HEAP_H +#include <__algorithm/iterator_operations.h> #include <__algorithm/make_heap.h> #include <__algorithm/make_projected.h> #include <__concepts/same_as.h> @@ -45,7 +46,7 @@ struct __fn { auto __last_iter = ranges::next(__first, __last); auto&& __projected_comp = ranges::__make_projected_comp(__comp, __proj); - std::__make_heap(std::move(__first), __last_iter, __projected_comp); + std::__make_heap<_RangeAlgPolicy>(std::move(__first), __last_iter, __projected_comp); return __last_iter; } diff --git a/libcxx/include/__algorithm/ranges_min_element.h b/libcxx/include/__algorithm/ranges_min_element.h index ae82dceb9ad8..26f95fe3a6d2 100644 --- a/libcxx/include/__algorithm/ranges_min_element.h +++ b/libcxx/include/__algorithm/ranges_min_element.h @@ -30,6 +30,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD namespace ranges { +// TODO(ranges): `ranges::min_element` can now simply delegate to `std::__min_element`. template <class _Ip, class _Sp, class _Proj, class _Comp> _LIBCPP_HIDE_FROM_ABI static constexpr _Ip __min_element_impl(_Ip __first, _Sp __last, _Comp& __comp, _Proj& __proj) { diff --git a/libcxx/include/__algorithm/ranges_nth_element.h b/libcxx/include/__algorithm/ranges_nth_element.h index 2a929eacb89d..b15eb816b918 100644 --- a/libcxx/include/__algorithm/ranges_nth_element.h +++ b/libcxx/include/__algorithm/ranges_nth_element.h @@ -9,6 +9,7 @@ #ifndef _LIBCPP___ALGORITHM_RANGES_NTH_ELEMENT_H #define _LIBCPP___ALGORITHM_RANGES_NTH_ELEMENT_H +#include <__algorithm/iterator_operations.h> #include <__algorithm/make_projected.h> #include <__algorithm/nth_element.h> #include <__config> @@ -44,7 +45,7 @@ struct __fn { auto __last_iter = ranges::next(__first, __last); auto&& __projected_comp = ranges::__make_projected_comp(__comp, __proj); - std::__nth_element_impl(std::move(__first), std::move(__nth), __last_iter, __projected_comp); + std::__nth_element_impl<_RangeAlgPolicy>(std::move(__first), std::move(__nth), __last_iter, __projected_comp); return __last_iter; } diff --git a/libcxx/include/__algorithm/ranges_partial_sort.h b/libcxx/include/__algorithm/ranges_partial_sort.h new file mode 100644 index 000000000000..5e82bc6fcc32 --- /dev/null +++ b/libcxx/include/__algorithm/ranges_partial_sort.h @@ -0,0 +1,77 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___ALGORITHM_RANGES_PARTIAL_SORT_H +#define _LIBCPP___ALGORITHM_RANGES_PARTIAL_SORT_H + +#include <__algorithm/iterator_operations.h> +#include <__algorithm/make_projected.h> +#include <__algorithm/partial_sort.h> +#include <__concepts/same_as.h> +#include <__config> +#include <__functional/identity.h> +#include <__functional/invoke.h> +#include <__functional/ranges_operations.h> +#include <__iterator/concepts.h> +#include <__iterator/iterator_traits.h> +#include <__iterator/next.h> +#include <__iterator/projected.h> +#include <__iterator/sortable.h> +#include <__ranges/access.h> +#include <__ranges/concepts.h> +#include <__ranges/dangling.h> +#include <__utility/forward.h> +#include <__utility/move.h> + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +# pragma GCC system_header +#endif + +#if _LIBCPP_STD_VER > 17 && !defined(_LIBCPP_HAS_NO_INCOMPLETE_RANGES) + +_LIBCPP_BEGIN_NAMESPACE_STD + +namespace ranges { +namespace __partial_sort { + +struct __fn { + template <class _Iter, class _Sent, class _Comp, class _Proj> + _LIBCPP_HIDE_FROM_ABI constexpr static + _Iter __partial_sort_fn_impl(_Iter __first, _Iter __middle, _Sent __last, _Comp& __comp, _Proj& __proj) { + auto&& __projected_comp = ranges::__make_projected_comp(__comp, __proj); + return std::__partial_sort<_RangeAlgPolicy>(std::move(__first), std::move(__middle), __last, __projected_comp); + } + + template <random_access_iterator _Iter, sentinel_for<_Iter> _Sent, class _Comp = ranges::less, class _Proj = identity> + requires sortable<_Iter, _Comp, _Proj> + _LIBCPP_HIDE_FROM_ABI constexpr + _Iter operator()(_Iter __first, _Iter __middle, _Sent __last, _Comp __comp = {}, _Proj __proj = {}) const { + return __partial_sort_fn_impl(std::move(__first), std::move(__middle), std::move(__last), __comp, __proj); + } + + template <random_access_range _Range, class _Comp = ranges::less, class _Proj = identity> + requires sortable<iterator_t<_Range>, _Comp, _Proj> + _LIBCPP_HIDE_FROM_ABI constexpr + borrowed_iterator_t<_Range> operator()(_Range&& __r, iterator_t<_Range> __middle, _Comp __comp = {}, + _Proj __proj = {}) const { + return __partial_sort_fn_impl(ranges::begin(__r), std::move(__middle), ranges::end(__r), __comp, __proj); + } +}; + +} // namespace __partial_sort + +inline namespace __cpo { + inline constexpr auto partial_sort = __partial_sort::__fn{}; +} // namespace __cpo +} // namespace ranges + +_LIBCPP_END_NAMESPACE_STD + +#endif // _LIBCPP_STD_VER > 17 && !defined(_LIBCPP_HAS_NO_INCOMPLETE_RANGES) + +#endif // _LIBCPP___ALGORITHM_RANGES_PARTIAL_SORT_H diff --git a/libcxx/include/__algorithm/ranges_partition.h b/libcxx/include/__algorithm/ranges_partition.h index c145e7bdb4a2..60bee699d90e 100644 --- a/libcxx/include/__algorithm/ranges_partition.h +++ b/libcxx/include/__algorithm/ranges_partition.h @@ -9,8 +9,10 @@ #ifndef _LIBCPP___ALGORITHM_RANGES_PARTITION_H #define _LIBCPP___ALGORITHM_RANGES_PARTITION_H +#include <__algorithm/iterator_operations.h> #include <__algorithm/make_projected.h> #include <__algorithm/partition.h> +#include <__algorithm/ranges_iterator_concept.h> #include <__config> #include <__functional/identity.h> #include <__functional/invoke.h> @@ -21,10 +23,10 @@ #include <__iterator/projected.h> #include <__ranges/access.h> #include <__ranges/concepts.h> -#include <__ranges/dangling.h> #include <__ranges/subrange.h> #include <__utility/forward.h> #include <__utility/move.h> +#include <type_traits> #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) # pragma GCC system_header @@ -39,13 +41,21 @@ namespace __partition { struct __fn { + template <class _Iter, class _Sent, class _Proj, class _Pred> + _LIBCPP_HIDE_FROM_ABI static constexpr + subrange<__uncvref_t<_Iter>> __partition_fn_impl(_Iter&& __first, _Sent&& __last, _Pred&& __pred, _Proj&& __proj) { + auto&& __projected_pred = ranges::__make_projected_pred(__pred, __proj); + auto __result = std::__partition<_RangeAlgPolicy>( + std::move(__first), std::move(__last), __projected_pred, __iterator_concept<_Iter>()); + + return {std::move(__result.first), std::move(__result.second)}; + } + template <permutable _Iter, sentinel_for<_Iter> _Sent, class _Proj = identity, indirect_unary_predicate<projected<_Iter, _Proj>> _Pred> _LIBCPP_HIDE_FROM_ABI constexpr subrange<_Iter> operator()(_Iter __first, _Sent __last, _Pred __pred, _Proj __proj = {}) const { - // TODO: implement - (void)__first; (void)__last; (void)__pred; (void)__proj; - return {}; + return __partition_fn_impl(__first, __last, __pred, __proj); } template <forward_range _Range, class _Proj = identity, @@ -53,9 +63,7 @@ struct __fn { requires permutable<iterator_t<_Range>> _LIBCPP_HIDE_FROM_ABI constexpr borrowed_subrange_t<_Range> operator()(_Range&& __range, _Pred __pred, _Proj __proj = {}) const { - // TODO: implement - (void)__range; (void)__pred; (void)__proj; - return {}; + return __partition_fn_impl(ranges::begin(__range), ranges::end(__range), __pred, __proj); } }; diff --git a/libcxx/include/__algorithm/ranges_partition_copy.h b/libcxx/include/__algorithm/ranges_partition_copy.h index f55089b94ea5..7201a8cbfe45 100644 --- a/libcxx/include/__algorithm/ranges_partition_copy.h +++ b/libcxx/include/__algorithm/ranges_partition_copy.h @@ -10,20 +10,17 @@ #define _LIBCPP___ALGORITHM_RANGES_PARTITION_COPY_H #include <__algorithm/in_out_out_result.h> -#include <__algorithm/make_projected.h> -#include <__algorithm/partition_copy.h> #include <__config> #include <__functional/identity.h> #include <__functional/invoke.h> -#include <__functional/ranges_operations.h> #include <__iterator/concepts.h> #include <__iterator/iterator_traits.h> #include <__iterator/projected.h> #include <__ranges/access.h> #include <__ranges/concepts.h> #include <__ranges/dangling.h> -#include <__utility/forward.h> #include <__utility/move.h> +#include <type_traits> #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) # pragma GCC system_header @@ -42,6 +39,27 @@ namespace __partition_copy { struct __fn { + // TODO(ranges): delegate to the classic algorithm. + template <class _InIter, class _Sent, class _OutIter1, class _OutIter2, class _Proj, class _Pred> + _LIBCPP_HIDE_FROM_ABI constexpr + static partition_copy_result< + __uncvref_t<_InIter>, __uncvref_t<_OutIter1>, __uncvref_t<_OutIter2> + > __partition_copy_fn_impl( _InIter&& __first, _Sent&& __last, _OutIter1&& __out_true, _OutIter2&& __out_false, + _Pred& __pred, _Proj& __proj) { + for (; __first != __last; ++__first) { + if (std::invoke(__pred, std::invoke(__proj, *__first))) { + *__out_true = *__first; + ++__out_true; + + } else { + *__out_false = *__first; + ++__out_false; + } + } + + return {std::move(__first), std::move(__out_true), std::move(__out_false)}; + } + template <input_iterator _InIter, sentinel_for<_InIter> _Sent, weakly_incrementable _OutIter1, weakly_incrementable _OutIter2, class _Proj = identity, indirect_unary_predicate<projected<_InIter, _Proj>> _Pred> @@ -50,9 +68,8 @@ struct __fn { partition_copy_result<_InIter, _OutIter1, _OutIter2> operator()(_InIter __first, _Sent __last, _OutIter1 __out_true, _OutIter2 __out_false, _Pred __pred, _Proj __proj = {}) const { - // TODO: implement - (void)__first; (void)__last; (void)__out_true; (void)__out_false; (void)__pred; (void)__proj; - return {}; + return __partition_copy_fn_impl( + std::move(__first), std::move(__last), std::move(__out_true), std::move(__out_false), __pred, __proj); } template <input_range _Range, weakly_incrementable _OutIter1, weakly_incrementable _OutIter2, @@ -61,9 +78,8 @@ struct __fn { _LIBCPP_HIDE_FROM_ABI constexpr partition_copy_result<borrowed_iterator_t<_Range>, _OutIter1, _OutIter2> operator()(_Range&& __range, _OutIter1 __out_true, _OutIter2 __out_false, _Pred __pred, _Proj __proj = {}) const { - // TODO: implement - (void)__range; (void)__out_true; (void)__out_false; (void)__pred; (void)__proj; - return {}; + return __partition_copy_fn_impl( + ranges::begin(__range), ranges::end(__range), std::move(__out_true), std::move(__out_false), __pred, __proj); } }; diff --git a/libcxx/include/__algorithm/ranges_partition_point.h b/libcxx/include/__algorithm/ranges_partition_point.h index 336b29f63284..6614a0bb50fd 100644 --- a/libcxx/include/__algorithm/ranges_partition_point.h +++ b/libcxx/include/__algorithm/ranges_partition_point.h @@ -9,19 +9,18 @@ #ifndef _LIBCPP___ALGORITHM_RANGES_PARTITION_POINT_H #define _LIBCPP___ALGORITHM_RANGES_PARTITION_POINT_H -#include <__algorithm/make_projected.h> -#include <__algorithm/partition_point.h> +#include <__algorithm/half_positive.h> #include <__config> #include <__functional/identity.h> #include <__functional/invoke.h> -#include <__functional/ranges_operations.h> #include <__iterator/concepts.h> +#include <__iterator/distance.h> #include <__iterator/iterator_traits.h> +#include <__iterator/next.h> #include <__iterator/projected.h> #include <__ranges/access.h> #include <__ranges/concepts.h> #include <__ranges/dangling.h> -#include <__utility/forward.h> #include <__utility/move.h> #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) @@ -37,22 +36,40 @@ namespace __partition_point { struct __fn { + // TODO(ranges): delegate to the classic algorithm. + template <class _Iter, class _Sent, class _Proj, class _Pred> + _LIBCPP_HIDE_FROM_ABI constexpr + static _Iter __partition_point_fn_impl(_Iter&& __first, _Sent&& __last, _Pred& __pred, _Proj& __proj) { + auto __len = ranges::distance(__first, __last); + + while (__len != 0) { + auto __half_len = std::__half_positive(__len); + auto __mid = ranges::next(__first, __half_len); + + if (std::invoke(__pred, std::invoke(__proj, *__mid))) { + __first = ++__mid; + __len -= __half_len + 1; + + } else { + __len = __half_len; + } + } + + return __first; + } + template <forward_iterator _Iter, sentinel_for<_Iter> _Sent, class _Proj = identity, indirect_unary_predicate<projected<_Iter, _Proj>> _Pred> _LIBCPP_HIDE_FROM_ABI constexpr _Iter operator()(_Iter __first, _Sent __last, _Pred __pred, _Proj __proj = {}) const { - // TODO: implement - (void)__first; (void)__last; (void)__pred; (void)__proj; - return {}; + return __partition_point_fn_impl(std::move(__first), std::move(__last), __pred, __proj); } template <forward_range _Range, class _Proj = identity, indirect_unary_predicate<projected<iterator_t<_Range>, _Proj>> _Pred> _LIBCPP_HIDE_FROM_ABI constexpr borrowed_iterator_t<_Range> operator()(_Range&& __range, _Pred __pred, _Proj __proj = {}) const { - // TODO: implement - (void)__range; (void)__pred; (void)__proj; - return {}; + return __partition_point_fn_impl(ranges::begin(__range), ranges::end(__range), __pred, __proj); } }; diff --git a/libcxx/include/__algorithm/ranges_pop_heap.h b/libcxx/include/__algorithm/ranges_pop_heap.h index d0b8314e5b0a..92df6119d34a 100644 --- a/libcxx/include/__algorithm/ranges_pop_heap.h +++ b/libcxx/include/__algorithm/ranges_pop_heap.h @@ -9,6 +9,7 @@ #ifndef _LIBCPP___ALGORITHM_RANGES_POP_HEAP_H #define _LIBCPP___ALGORITHM_RANGES_POP_HEAP_H +#include <__algorithm/iterator_operations.h> #include <__algorithm/make_projected.h> #include <__algorithm/pop_heap.h> #include <__concepts/same_as.h> @@ -46,7 +47,7 @@ struct __fn { auto __len = __last_iter - __first; auto&& __projected_comp = ranges::__make_projected_comp(__comp, __proj); - std::__pop_heap(std::move(__first), __last_iter, __projected_comp, __len); + std::__pop_heap<_RangeAlgPolicy>(std::move(__first), __last_iter, __projected_comp, __len); return __last_iter; } diff --git a/libcxx/include/__algorithm/ranges_push_heap.h b/libcxx/include/__algorithm/ranges_push_heap.h index e46ad19cfed7..4c41b00128de 100644 --- a/libcxx/include/__algorithm/ranges_push_heap.h +++ b/libcxx/include/__algorithm/ranges_push_heap.h @@ -9,6 +9,7 @@ #ifndef _LIBCPP___ALGORITHM_RANGES_PUSH_HEAP_H #define _LIBCPP___ALGORITHM_RANGES_PUSH_HEAP_H +#include <__algorithm/iterator_operations.h> #include <__algorithm/make_projected.h> #include <__algorithm/push_heap.h> #include <__concepts/same_as.h> @@ -45,7 +46,7 @@ struct __fn { auto __last_iter = ranges::next(__first, __last); auto&& __projected_comp = ranges::__make_projected_comp(__comp, __proj); - std::__push_heap(std::move(__first), __last_iter, __projected_comp); + std::__push_heap<_RangeAlgPolicy>(std::move(__first), __last_iter, __projected_comp); return __last_iter; } diff --git a/libcxx/include/__algorithm/ranges_set_union.h b/libcxx/include/__algorithm/ranges_set_union.h index 39537503b98f..3826e55688f7 100644 --- a/libcxx/include/__algorithm/ranges_set_union.h +++ b/libcxx/include/__algorithm/ranges_set_union.h @@ -42,34 +42,68 @@ using set_union_result = in_in_out_result<_InIter1, _InIter2, _OutIter>; namespace __set_union { struct __fn { - - template <input_iterator _InIter1, sentinel_for<_InIter1> _Sent1, - input_iterator _InIter2, sentinel_for<_InIter2> _Sent2, - weakly_incrementable _OutIter, class _Comp = ranges::less, - class _Proj1 = identity, class _Proj2 = identity> - requires mergeable<_InIter1, _InIter2, _OutIter, _Comp, _Proj1, _Proj2> - _LIBCPP_HIDE_FROM_ABI constexpr - set_union_result<_InIter1, _InIter2, _OutIter> - operator()(_InIter1 __first1, _Sent1 __last1, _InIter2 __first2, _Sent2 __last2, _OutIter __result, _Comp __comp = {}, - _Proj1 __proj1 = {}, _Proj2 __proj2 = {}) const { - // TODO: implement - (void)__first1; (void)__last1; (void)__first2; (void)__last2; (void)__result; (void)__comp; (void)__proj1; - (void)__proj2; - return {}; + template < + input_iterator _InIter1, + sentinel_for<_InIter1> _Sent1, + input_iterator _InIter2, + sentinel_for<_InIter2> _Sent2, + weakly_incrementable _OutIter, + class _Comp = ranges::less, + class _Proj1 = identity, + class _Proj2 = identity> + requires mergeable<_InIter1, _InIter2, _OutIter, _Comp, _Proj1, _Proj2> + _LIBCPP_HIDE_FROM_ABI constexpr set_union_result<_InIter1, _InIter2, _OutIter> operator()( + _InIter1 __first1, + _Sent1 __last1, + _InIter2 __first2, + _Sent2 __last2, + _OutIter __result, + _Comp __comp = {}, + _Proj1 __proj1 = {}, + _Proj2 __proj2 = {}) const { + auto __ret = std::__set_union( + std::move(__first1), + std::move(__last1), + std::move(__first2), + std::move(__last2), + std::move(__result), + ranges::__make_projected_comp(__comp, __proj1, __proj2)); + return {std::move(__ret.__in1_), std::move(__ret.__in2_), std::move(__ret.__out_)}; } - template <input_range _Range1, input_range _Range2, weakly_incrementable _OutIter, - class _Comp = ranges::less, class _Proj1 = identity, class _Proj2 = identity> - requires mergeable<iterator_t<_Range1>, iterator_t<_Range2>, _OutIter, _Comp, _Proj1, _Proj2> - _LIBCPP_HIDE_FROM_ABI constexpr - set_union_result<borrowed_iterator_t<_Range1>, borrowed_iterator_t<_Range2>, _OutIter> - operator()(_Range1&& __range1, _Range2&& __range2, _OutIter __result, _Comp __comp = {}, - _Proj1 __proj1 = {}, _Proj2 __proj2 = {}) const { - // TODO: implement - (void)__range1; (void)__range2; (void)__result; (void)__comp; (void)__proj1; (void)__proj2; - return {}; + template < + input_range _Range1, + input_range _Range2, + weakly_incrementable _OutIter, + class _Comp = ranges::less, + class _Proj1 = identity, + class _Proj2 = identity> + requires mergeable< + iterator_t<_Range1>, + iterator_t<_Range2>, + _OutIter, + _Comp, + _Proj1, + _Proj2> + _LIBCPP_HIDE_FROM_ABI constexpr set_union_result<borrowed_iterator_t<_Range1>, + borrowed_iterator_t<_Range2>, + _OutIter> + operator()( + _Range1&& __range1, + _Range2&& __range2, + _OutIter __result, + _Comp __comp = {}, + _Proj1 __proj1 = {}, + _Proj2 __proj2 = {}) const { + auto __ret = std::__set_union( + ranges::begin(__range1), + ranges::end(__range1), + ranges::begin(__range2), + ranges::end(__range2), + std::move(__result), + ranges::__make_projected_comp(__comp, __proj1, __proj2)); + return {std::move(__ret.__in1_), std::move(__ret.__in2_), std::move(__ret.__out_)}; } - }; } // namespace __set_union diff --git a/libcxx/include/__algorithm/ranges_shuffle.h b/libcxx/include/__algorithm/ranges_shuffle.h index bf9c28b4ce26..b101a8582eac 100644 --- a/libcxx/include/__algorithm/ranges_shuffle.h +++ b/libcxx/include/__algorithm/ranges_shuffle.h @@ -9,23 +9,22 @@ #ifndef _LIBCPP___ALGORITHM_RANGES_SHUFFLE_H #define _LIBCPP___ALGORITHM_RANGES_SHUFFLE_H -#include <__algorithm/make_projected.h> +#include <__algorithm/iterator_operations.h> #include <__algorithm/shuffle.h> #include <__config> -#include <__functional/identity.h> #include <__functional/invoke.h> #include <__functional/ranges_operations.h> #include <__iterator/concepts.h> #include <__iterator/iterator_traits.h> +#include <__iterator/next.h> #include <__iterator/permutable.h> -#include <__iterator/projected.h> #include <__random/uniform_random_bit_generator.h> #include <__ranges/access.h> #include <__ranges/concepts.h> #include <__ranges/dangling.h> -#include <__type_traits/remove_reference.h> #include <__utility/forward.h> #include <__utility/move.h> +#include <type_traits> #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) # pragma GCC system_header @@ -33,29 +32,57 @@ #if _LIBCPP_STD_VER > 17 && !defined(_LIBCPP_HAS_NO_INCOMPLETE_RANGES) +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + _LIBCPP_BEGIN_NAMESPACE_STD namespace ranges { namespace __shuffle { struct __fn { + // `std::shuffle` is more constrained than `std::ranges::shuffle`. `std::ranges::shuffle` only requires the given + // generator to satisfy the `std::uniform_random_bit_generator` concept. `std::shuffle` requires the given + // generator to meet the uniform random bit generator requirements; these requirements include satisfying + // `std::uniform_random_bit_generator` and add a requirement for the generator to provide a nested `result_type` + // typedef (see `[rand.req.urng]`). + // + // To reuse the implementation from `std::shuffle`, make the given generator meet the classic requirements by wrapping + // it into an adaptor type that forwards all of its interface and adds the required typedef. + template <class _Gen> + class _ClassicGenAdaptor { + private: + // The generator is not required to be copyable or movable, so it has to be stored as a reference. + _Gen& __gen; + + public: + using result_type = invoke_result_t<_Gen&>; + + _LIBCPP_HIDE_FROM_ABI + static constexpr auto min() { return __uncvref_t<_Gen>::min(); } + _LIBCPP_HIDE_FROM_ABI + static constexpr auto max() { return __uncvref_t<_Gen>::max(); } + + _LIBCPP_HIDE_FROM_ABI + constexpr explicit _ClassicGenAdaptor(_Gen& __g) : __gen(__g) {} + + _LIBCPP_HIDE_FROM_ABI + constexpr auto operator()() const { return __gen(); } + }; template <random_access_iterator _Iter, sentinel_for<_Iter> _Sent, class _Gen> requires permutable<_Iter> && uniform_random_bit_generator<remove_reference_t<_Gen>> _LIBCPP_HIDE_FROM_ABI _Iter operator()(_Iter __first, _Sent __last, _Gen&& __gen) const { - // TODO: implement - (void)__first; (void)__last; (void)__gen; - return {}; + _ClassicGenAdaptor<_Gen> __adapted_gen(__gen); + return std::__shuffle<_RangeAlgPolicy>(std::move(__first), std::move(__last), __adapted_gen); } template<random_access_range _Range, class _Gen> requires permutable<iterator_t<_Range>> && uniform_random_bit_generator<remove_reference_t<_Gen>> _LIBCPP_HIDE_FROM_ABI borrowed_iterator_t<_Range> operator()(_Range&& __range, _Gen&& __gen) const { - // TODO: implement - (void)__range; (void)__gen; - return {}; + return (*this)(ranges::begin(__range), ranges::end(__range), std::forward<_Gen>(__gen)); } }; @@ -69,6 +96,8 @@ inline namespace __cpo { _LIBCPP_END_NAMESPACE_STD +_LIBCPP_POP_MACROS + #endif // _LIBCPP_STD_VER > 17 && !defined(_LIBCPP_HAS_NO_INCOMPLETE_RANGES) #endif // _LIBCPP___ALGORITHM_RANGES_SHUFFLE_H diff --git a/libcxx/include/__algorithm/ranges_sort.h b/libcxx/include/__algorithm/ranges_sort.h index 8297940df237..ef14db64295d 100644 --- a/libcxx/include/__algorithm/ranges_sort.h +++ b/libcxx/include/__algorithm/ranges_sort.h @@ -9,6 +9,7 @@ #ifndef _LIBCPP___ALGORITHM_RANGES_SORT_H #define _LIBCPP___ALGORITHM_RANGES_SORT_H +#include <__algorithm/iterator_operations.h> #include <__algorithm/make_projected.h> #include <__algorithm/sort.h> #include <__config> @@ -44,7 +45,7 @@ struct __fn { auto __last_iter = ranges::next(__first, __last); auto&& __projected_comp = ranges::__make_projected_comp(__comp, __proj); - std::__sort_impl(std::move(__first), __last_iter, __projected_comp); + std::__sort_impl<_RangeAlgPolicy>(std::move(__first), __last_iter, __projected_comp); return __last_iter; } diff --git a/libcxx/include/__algorithm/ranges_sort_heap.h b/libcxx/include/__algorithm/ranges_sort_heap.h index c753e20c44a6..eb6a30dcd3d0 100644 --- a/libcxx/include/__algorithm/ranges_sort_heap.h +++ b/libcxx/include/__algorithm/ranges_sort_heap.h @@ -9,6 +9,7 @@ #ifndef _LIBCPP___ALGORITHM_RANGES_SORT_HEAP_H #define _LIBCPP___ALGORITHM_RANGES_SORT_HEAP_H +#include <__algorithm/iterator_operations.h> #include <__algorithm/make_projected.h> #include <__algorithm/sort_heap.h> #include <__concepts/same_as.h> @@ -45,7 +46,7 @@ struct __fn { auto __last_iter = ranges::next(__first, __last); auto&& __projected_comp = ranges::__make_projected_comp(__comp, __proj); - std::__sort_heap(std::move(__first), __last_iter, __projected_comp); + std::__sort_heap<_RangeAlgPolicy>(std::move(__first), __last_iter, __projected_comp); return __last_iter; } diff --git a/libcxx/include/__algorithm/ranges_stable_partition.h b/libcxx/include/__algorithm/ranges_stable_partition.h index 178c953ebdae..27957db8829f 100644 --- a/libcxx/include/__algorithm/ranges_stable_partition.h +++ b/libcxx/include/__algorithm/ranges_stable_partition.h @@ -9,7 +9,9 @@ #ifndef _LIBCPP___ALGORITHM_RANGES_STABLE_PARTITION_H #define _LIBCPP___ALGORITHM_RANGES_STABLE_PARTITION_H +#include <__algorithm/iterator_operations.h> #include <__algorithm/make_projected.h> +#include <__algorithm/ranges_iterator_concept.h> #include <__algorithm/stable_partition.h> #include <__config> #include <__functional/identity.h> @@ -17,6 +19,7 @@ #include <__functional/ranges_operations.h> #include <__iterator/concepts.h> #include <__iterator/iterator_traits.h> +#include <__iterator/next.h> #include <__iterator/permutable.h> #include <__iterator/projected.h> #include <__ranges/access.h> @@ -25,6 +28,7 @@ #include <__ranges/subrange.h> #include <__utility/forward.h> #include <__utility/move.h> +#include <type_traits> #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) # pragma GCC system_header @@ -39,14 +43,25 @@ namespace __stable_partition { struct __fn { + template <class _Iter, class _Sent, class _Proj, class _Pred> + _LIBCPP_HIDE_FROM_ABI static + subrange<__uncvref_t<_Iter>> __stable_partition_fn_impl( + _Iter&& __first, _Sent&& __last, _Pred&& __pred, _Proj&& __proj) { + auto __last_iter = ranges::next(__first, __last); + + auto&& __projected_pred = ranges::__make_projected_pred(__pred, __proj); + auto __result = std::__stable_partition<_RangeAlgPolicy>( + std::move(__first), __last_iter, __projected_pred, __iterator_concept<_Iter>()); + + return {std::move(__result), std::move(__last_iter)}; + } + template <bidirectional_iterator _Iter, sentinel_for<_Iter> _Sent, class _Proj = identity, indirect_unary_predicate<projected<_Iter, _Proj>> _Pred> requires permutable<_Iter> _LIBCPP_HIDE_FROM_ABI subrange<_Iter> operator()(_Iter __first, _Sent __last, _Pred __pred, _Proj __proj = {}) const { - // TODO: implement - (void)__first; (void)__last; (void)__pred; (void)__proj; - return {}; + return __stable_partition_fn_impl(__first, __last, __pred, __proj); } template <bidirectional_range _Range, class _Proj = identity, @@ -54,9 +69,7 @@ struct __fn { requires permutable<iterator_t<_Range>> _LIBCPP_HIDE_FROM_ABI borrowed_subrange_t<_Range> operator()(_Range&& __range, _Pred __pred, _Proj __proj = {}) const { - // TODO: implement - (void)__range; (void)__pred; (void)__proj; - return {}; + return __stable_partition_fn_impl(ranges::begin(__range), ranges::end(__range), __pred, __proj); } }; diff --git a/libcxx/include/__algorithm/ranges_stable_sort.h b/libcxx/include/__algorithm/ranges_stable_sort.h index 20e840426434..de48416a41be 100644 --- a/libcxx/include/__algorithm/ranges_stable_sort.h +++ b/libcxx/include/__algorithm/ranges_stable_sort.h @@ -9,6 +9,7 @@ #ifndef _LIBCPP___ALGORITHM_RANGES_STABLE_SORT_H #define _LIBCPP___ALGORITHM_RANGES_STABLE_SORT_H +#include <__algorithm/iterator_operations.h> #include <__algorithm/make_projected.h> #include <__algorithm/stable_sort.h> #include <__config> @@ -44,7 +45,7 @@ struct __fn { auto __last_iter = ranges::next(__first, __last); auto&& __projected_comp = ranges::__make_projected_comp(__comp, __proj); - std::__stable_sort_impl(std::move(__first), __last_iter, __projected_comp); + std::__stable_sort_impl<_RangeAlgPolicy>(std::move(__first), __last_iter, __projected_comp); return __last_iter; } diff --git a/libcxx/include/__algorithm/rotate.h b/libcxx/include/__algorithm/rotate.h index c9ea5bad4c5a..fcf8444a65a0 100644 --- a/libcxx/include/__algorithm/rotate.h +++ b/libcxx/include/__algorithm/rotate.h @@ -9,6 +9,7 @@ #ifndef _LIBCPP___ALGORITHM_ROTATE_H #define _LIBCPP___ALGORITHM_ROTATE_H +#include <__algorithm/iterator_operations.h> #include <__algorithm/move.h> #include <__algorithm/move_backward.h> #include <__algorithm/swap_ranges.h> @@ -26,37 +27,40 @@ _LIBCPP_BEGIN_NAMESPACE_STD -template <class _ForwardIterator> +template <class _AlgPolicy, class _ForwardIterator> _LIBCPP_CONSTEXPR_AFTER_CXX11 _ForwardIterator __rotate_left(_ForwardIterator __first, _ForwardIterator __last) { typedef typename iterator_traits<_ForwardIterator>::value_type value_type; - value_type __tmp = _VSTD::move(*__first); + value_type __tmp = _IterOps<_AlgPolicy>::__iter_move(__first); + // TODO(ranges): pass `_AlgPolicy` to `move`. _ForwardIterator __lm1 = _VSTD::move(_VSTD::next(__first), __last, __first); *__lm1 = _VSTD::move(__tmp); return __lm1; } -template <class _BidirectionalIterator> +template <class _AlgPolicy, class _BidirectionalIterator> _LIBCPP_CONSTEXPR_AFTER_CXX11 _BidirectionalIterator __rotate_right(_BidirectionalIterator __first, _BidirectionalIterator __last) { typedef typename iterator_traits<_BidirectionalIterator>::value_type value_type; + // TODO(ranges): pass `_AlgPolicy` to `prev`. _BidirectionalIterator __lm1 = _VSTD::prev(__last); - value_type __tmp = _VSTD::move(*__lm1); + value_type __tmp = _IterOps<_AlgPolicy>::__iter_move(__lm1); + // TODO(ranges): pass `_AlgPolicy` to `move_backward`. _BidirectionalIterator __fp1 = _VSTD::move_backward(__first, __lm1, __last); *__first = _VSTD::move(__tmp); return __fp1; } -template <class _ForwardIterator> +template <class _AlgPolicy, class _ForwardIterator> _LIBCPP_CONSTEXPR_AFTER_CXX14 _ForwardIterator __rotate_forward(_ForwardIterator __first, _ForwardIterator __middle, _ForwardIterator __last) { _ForwardIterator __i = __middle; while (true) { - swap(*__first, *__i); + _IterOps<_AlgPolicy>::iter_swap(__first, __i); ++__first; if (++__i == __last) break; @@ -69,7 +73,7 @@ __rotate_forward(_ForwardIterator __first, _ForwardIterator __middle, _ForwardIt __i = __middle; while (true) { - swap(*__first, *__i); + _IterOps<_AlgPolicy>::iter_swap(__first, __i); ++__first; if (++__i == __last) { @@ -98,7 +102,7 @@ __algo_gcd(_Integral __x, _Integral __y) return __x; } -template<typename _RandomAccessIterator> +template <class _AlgPolicy, typename _RandomAccessIterator> _LIBCPP_CONSTEXPR_AFTER_CXX14 _RandomAccessIterator __rotate_gcd(_RandomAccessIterator __first, _RandomAccessIterator __middle, _RandomAccessIterator __last) { @@ -109,18 +113,19 @@ __rotate_gcd(_RandomAccessIterator __first, _RandomAccessIterator __middle, _Ran const difference_type __m2 = __last - __middle; if (__m1 == __m2) { + // TODO(ranges): pass `_AlgPolicy` to `swap_ranges`. _VSTD::swap_ranges(__first, __middle, __middle); return __middle; } const difference_type __g = _VSTD::__algo_gcd(__m1, __m2); for (_RandomAccessIterator __p = __first + __g; __p != __first;) { - value_type __t(_VSTD::move(*--__p)); + value_type __t(_IterOps<_AlgPolicy>::__iter_move(--__p)); _RandomAccessIterator __p1 = __p; _RandomAccessIterator __p2 = __p1 + __m1; do { - *__p1 = _VSTD::move(*__p2); + *__p1 = _IterOps<_AlgPolicy>::__iter_move(__p2); __p1 = __p2; const difference_type __d = __last - __p2; if (__m1 < __d) @@ -133,54 +138,66 @@ __rotate_gcd(_RandomAccessIterator __first, _RandomAccessIterator __middle, _Ran return __first + __m2; } -template <class _ForwardIterator> +template <class _AlgPolicy, class _ForwardIterator> inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX11 _ForwardIterator -__rotate(_ForwardIterator __first, _ForwardIterator __middle, _ForwardIterator __last, +__rotate_impl(_ForwardIterator __first, _ForwardIterator __middle, _ForwardIterator __last, _VSTD::forward_iterator_tag) { typedef typename iterator_traits<_ForwardIterator>::value_type value_type; if (is_trivially_move_assignable<value_type>::value) { - if (_VSTD::next(__first) == __middle) - return _VSTD::__rotate_left(__first, __last); + if (_IterOps<_AlgPolicy>::next(__first) == __middle) + return std::__rotate_left<_AlgPolicy>(__first, __last); } - return _VSTD::__rotate_forward(__first, __middle, __last); + return std::__rotate_forward<_AlgPolicy>(__first, __middle, __last); } -template <class _BidirectionalIterator> +template <class _AlgPolicy, class _BidirectionalIterator> inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX11 _BidirectionalIterator -__rotate(_BidirectionalIterator __first, _BidirectionalIterator __middle, _BidirectionalIterator __last, +__rotate_impl(_BidirectionalIterator __first, _BidirectionalIterator __middle, _BidirectionalIterator __last, bidirectional_iterator_tag) { typedef typename iterator_traits<_BidirectionalIterator>::value_type value_type; if (is_trivially_move_assignable<value_type>::value) { - if (_VSTD::next(__first) == __middle) - return _VSTD::__rotate_left(__first, __last); - if (_VSTD::next(__middle) == __last) - return _VSTD::__rotate_right(__first, __last); + if (_IterOps<_AlgPolicy>::next(__first) == __middle) + return std::__rotate_left<_AlgPolicy>(__first, __last); + if (_IterOps<_AlgPolicy>::next(__middle) == __last) + return std::__rotate_right<_AlgPolicy>(__first, __last); } - return _VSTD::__rotate_forward(__first, __middle, __last); + return std::__rotate_forward<_AlgPolicy>(__first, __middle, __last); } -template <class _RandomAccessIterator> +template <class _AlgPolicy, class _RandomAccessIterator> inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX11 _RandomAccessIterator -__rotate(_RandomAccessIterator __first, _RandomAccessIterator __middle, _RandomAccessIterator __last, +__rotate_impl(_RandomAccessIterator __first, _RandomAccessIterator __middle, _RandomAccessIterator __last, random_access_iterator_tag) { typedef typename iterator_traits<_RandomAccessIterator>::value_type value_type; if (is_trivially_move_assignable<value_type>::value) { - if (_VSTD::next(__first) == __middle) - return _VSTD::__rotate_left(__first, __last); - if (_VSTD::next(__middle) == __last) - return _VSTD::__rotate_right(__first, __last); - return _VSTD::__rotate_gcd(__first, __middle, __last); + if (_IterOps<_AlgPolicy>::next(__first) == __middle) + return std::__rotate_left<_AlgPolicy>(__first, __last); + if (_IterOps<_AlgPolicy>::next(__middle) == __last) + return std::__rotate_right<_AlgPolicy>(__first, __last); + return std::__rotate_gcd<_AlgPolicy>(__first, __middle, __last); } - return _VSTD::__rotate_forward(__first, __middle, __last); + return std::__rotate_forward<_AlgPolicy>(__first, __middle, __last); +} + +template <class _AlgPolicy, class _RandomAccessIterator, class _IterCategory> +_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_AFTER_CXX11 +_RandomAccessIterator __rotate(_RandomAccessIterator __first, _RandomAccessIterator __middle, + _RandomAccessIterator __last, _IterCategory __iter_category) { + if (__first == __middle) + return __last; + if (__middle == __last) + return __first; + + return std::__rotate_impl<_AlgPolicy>(std::move(__first), std::move(__middle), std::move(__last), __iter_category); } template <class _ForwardIterator> @@ -188,12 +205,8 @@ inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 _ForwardIterator rotate(_ForwardIterator __first, _ForwardIterator __middle, _ForwardIterator __last) { - if (__first == __middle) - return __last; - if (__middle == __last) - return __first; - return _VSTD::__rotate(__first, __middle, __last, - typename iterator_traits<_ForwardIterator>::iterator_category()); + return std::__rotate<_ClassicAlgPolicy>(__first, __middle, __last, + typename iterator_traits<_ForwardIterator>::iterator_category()); } _LIBCPP_END_NAMESPACE_STD diff --git a/libcxx/include/__algorithm/search_n.h b/libcxx/include/__algorithm/search_n.h index 2a0547565ee9..ccb8e845f5b1 100644 --- a/libcxx/include/__algorithm/search_n.h +++ b/libcxx/include/__algorithm/search_n.h @@ -163,7 +163,7 @@ _ForwardIterator search_n(_ForwardIterator __first, _ForwardIterator __last, _Size __count, const _Tp& __value, _BinaryPredicate __pred) { - static_assert(__is_callable<_BinaryPredicate, decltype(*__first), decltype(*__last)>::value, + static_assert(__is_callable<_BinaryPredicate, decltype(*__first), const _Tp&>::value, "BinaryPredicate has to be callable"); auto __proj = __identity(); return std::__search_n_impl(__first, __last, std::__convert_to_integral(__count), __value, __pred, __proj).first; diff --git a/libcxx/include/__algorithm/set_union.h b/libcxx/include/__algorithm/set_union.h index 0ec6b09380ed..3bd437980161 100644 --- a/libcxx/include/__algorithm/set_union.h +++ b/libcxx/include/__algorithm/set_union.h @@ -14,6 +14,7 @@ #include <__algorithm/copy.h> #include <__config> #include <__iterator/iterator_traits.h> +#include <__utility/move.h> #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) # pragma GCC system_header @@ -21,50 +22,77 @@ _LIBCPP_BEGIN_NAMESPACE_STD -template <class _Compare, class _InputIterator1, class _InputIterator2, class _OutputIterator> -_LIBCPP_CONSTEXPR_AFTER_CXX17 _OutputIterator -__set_union(_InputIterator1 __first1, _InputIterator1 __last1, - _InputIterator2 __first2, _InputIterator2 __last2, _OutputIterator __result, _Compare __comp) -{ - for (; __first1 != __last1; ++__result) - { - if (__first2 == __last2) - return _VSTD::copy(__first1, __last1, __result); - if (__comp(*__first2, *__first1)) - { - *__result = *__first2; - ++__first2; - } - else - { - if (!__comp(*__first1, *__first2)) - ++__first2; - *__result = *__first1; - ++__first1; - } +template <class _InIter1, class _InIter2, class _OutIter> +struct __set_union_result { + _InIter1 __in1_; + _InIter2 __in2_; + _OutIter __out_; + + // need a constructor as C++03 aggregate init is hard + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_AFTER_CXX17 + __set_union_result(_InIter1&& __in_iter1, _InIter2&& __in_iter2, _OutIter&& __out_iter) + : __in1_(std::move(__in_iter1)), __in2_(std::move(__in_iter2)), __out_(std::move(__out_iter)) {} +}; + +template <class _Compare, class _InIter1, class _Sent1, class _InIter2, class _Sent2, class _OutIter> +_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_AFTER_CXX17 __set_union_result<_InIter1, _InIter2, _OutIter> __set_union( + _InIter1 __first1, _Sent1 __last1, _InIter2 __first2, _Sent2 __last2, _OutIter __result, _Compare&& __comp) { + for (; __first1 != __last1; ++__result) { + if (__first2 == __last2) { + auto __ret1 = std::__copy_impl(std::move(__first1), std::move(__last1), std::move(__result)); + return __set_union_result<_InIter1, _InIter2, _OutIter>( + std::move(__ret1.first), std::move(__first2), std::move((__ret1.second))); + } + if (__comp(*__first2, *__first1)) { + *__result = *__first2; + ++__first2; + } else { + if (!__comp(*__first1, *__first2)) { + ++__first2; + } + *__result = *__first1; + ++__first1; } - return _VSTD::copy(__first2, __last2, __result); + } + auto __ret2 = std::__copy_impl(std::move(__first2), std::move(__last2), std::move(__result)); + return __set_union_result<_InIter1, _InIter2, _OutIter>( + std::move(__first1), std::move(__ret2.first), std::move((__ret2.second))); } template <class _InputIterator1, class _InputIterator2, class _OutputIterator, class _Compare> -inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 -_OutputIterator -set_union(_InputIterator1 __first1, _InputIterator1 __last1, - _InputIterator2 __first2, _InputIterator2 __last2, _OutputIterator __result, _Compare __comp) -{ - typedef typename __comp_ref_type<_Compare>::type _Comp_ref; - return _VSTD::__set_union<_Comp_ref>(__first1, __last1, __first2, __last2, __result, __comp); +_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_AFTER_CXX17 _OutputIterator set_union( + _InputIterator1 __first1, + _InputIterator1 __last1, + _InputIterator2 __first2, + _InputIterator2 __last2, + _OutputIterator __result, + _Compare __comp) { + typedef typename __comp_ref_type<_Compare>::type _Comp_ref; + return std::__set_union<_Comp_ref>( + std::move(__first1), + std::move(__last1), + std::move(__first2), + std::move(__last2), + std::move(__result), + __comp) + .__out_; } template <class _InputIterator1, class _InputIterator2, class _OutputIterator> -inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 -_OutputIterator -set_union(_InputIterator1 __first1, _InputIterator1 __last1, - _InputIterator2 __first2, _InputIterator2 __last2, _OutputIterator __result) -{ - return _VSTD::set_union(__first1, __last1, __first2, __last2, __result, - __less<typename iterator_traits<_InputIterator1>::value_type, - typename iterator_traits<_InputIterator2>::value_type>()); +_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_AFTER_CXX17 _OutputIterator set_union( + _InputIterator1 __first1, + _InputIterator1 __last1, + _InputIterator2 __first2, + _InputIterator2 __last2, + _OutputIterator __result) { + return std::set_union( + std::move(__first1), + std::move(__last1), + std::move(__first2), + std::move(__last2), + std::move(__result), + __less<typename iterator_traits<_InputIterator1>::value_type, + typename iterator_traits<_InputIterator2>::value_type>()); } _LIBCPP_END_NAMESPACE_STD diff --git a/libcxx/include/__algorithm/shuffle.h b/libcxx/include/__algorithm/shuffle.h index 6c6ff5675dad..e32c6a7608ba 100644 --- a/libcxx/include/__algorithm/shuffle.h +++ b/libcxx/include/__algorithm/shuffle.h @@ -9,11 +9,13 @@ #ifndef _LIBCPP___ALGORITHM_SHUFFLE_H #define _LIBCPP___ALGORITHM_SHUFFLE_H +#include <__algorithm/iterator_operations.h> #include <__config> #include <__debug> #include <__iterator/iterator_traits.h> #include <__random/uniform_int_distribution.h> -#include <__utility/swap.h> +#include <__utility/forward.h> +#include <__utility/move.h> #include <cstddef> #include <cstdint> @@ -134,13 +136,15 @@ random_shuffle(_RandomAccessIterator __first, _RandomAccessIterator __last, } #endif -template<class _RandomAccessIterator, class _UniformRandomNumberGenerator> - void shuffle(_RandomAccessIterator __first, _RandomAccessIterator __last, - _UniformRandomNumberGenerator&& __g) -{ +template <class _AlgPolicy, class _RandomAccessIterator, class _Sentinel, class _UniformRandomNumberGenerator> +_RandomAccessIterator __shuffle( + _RandomAccessIterator __first, _Sentinel __last_sentinel, _UniformRandomNumberGenerator&& __g) { typedef typename iterator_traits<_RandomAccessIterator>::difference_type difference_type; typedef uniform_int_distribution<ptrdiff_t> _Dp; typedef typename _Dp::param_type _Pp; + + auto __original_last = _IterOps<_AlgPolicy>::next(__first, __last_sentinel); + auto __last = __original_last; difference_type __d = __last - __first; if (__d > 1) { @@ -149,9 +153,18 @@ template<class _RandomAccessIterator, class _UniformRandomNumberGenerator> { difference_type __i = __uid(__g, _Pp(0, __d)); if (__i != difference_type(0)) - swap(*__first, *(__first + __i)); + _IterOps<_AlgPolicy>::iter_swap(__first, __first + __i); } } + + return __original_last; +} + +template <class _RandomAccessIterator, class _UniformRandomNumberGenerator> +void shuffle(_RandomAccessIterator __first, _RandomAccessIterator __last, + _UniformRandomNumberGenerator&& __g) { + (void)std::__shuffle<_ClassicAlgPolicy>( + std::move(__first), std::move(__last), std::forward<_UniformRandomNumberGenerator>(__g)); } _LIBCPP_END_NAMESPACE_STD diff --git a/libcxx/include/__algorithm/sift_down.h b/libcxx/include/__algorithm/sift_down.h index 0351a1c578b0..be2eb29dd53a 100644 --- a/libcxx/include/__algorithm/sift_down.h +++ b/libcxx/include/__algorithm/sift_down.h @@ -9,6 +9,7 @@ #ifndef _LIBCPP___ALGORITHM_SIFT_DOWN_H #define _LIBCPP___ALGORITHM_SIFT_DOWN_H +#include <__algorithm/iterator_operations.h> #include <__assert> #include <__config> #include <__iterator/iterator_traits.h> @@ -20,12 +21,14 @@ _LIBCPP_BEGIN_NAMESPACE_STD -template <class _Compare, class _RandomAccessIterator> +template <class _AlgPolicy, class _Compare, class _RandomAccessIterator> _LIBCPP_CONSTEXPR_AFTER_CXX11 void __sift_down(_RandomAccessIterator __first, _Compare __comp, typename iterator_traits<_RandomAccessIterator>::difference_type __len, _RandomAccessIterator __start) { + using _Ops = _IterOps<_AlgPolicy>; + typedef typename iterator_traits<_RandomAccessIterator>::difference_type difference_type; typedef typename iterator_traits<_RandomAccessIterator>::value_type value_type; // left-child of __start is at 2 * __start + 1 @@ -49,11 +52,11 @@ __sift_down(_RandomAccessIterator __first, _Compare __comp, // we are, __start is larger than its largest child return; - value_type __top(_VSTD::move(*__start)); + value_type __top(_Ops::__iter_move(__start)); do { // we are not in heap-order, swap the parent with its largest child - *__start = _VSTD::move(*__child_i); + *__start = _Ops::__iter_move(__child_i); __start = __child_i; if ((__len - 2) / 2 < __child) @@ -74,7 +77,7 @@ __sift_down(_RandomAccessIterator __first, _Compare __comp, *__start = _VSTD::move(__top); } -template <class _Compare, class _RandomAccessIterator> +template <class _AlgPolicy, class _Compare, class _RandomAccessIterator> _LIBCPP_CONSTEXPR_AFTER_CXX11 _RandomAccessIterator __floyd_sift_down(_RandomAccessIterator __first, _Compare __comp, typename iterator_traits<_RandomAccessIterator>::difference_type __len) @@ -97,7 +100,7 @@ __floyd_sift_down(_RandomAccessIterator __first, _Compare __comp, } // swap __hole with its largest child - *__hole = std::move(*__child_i); + *__hole = _IterOps<_AlgPolicy>::__iter_move(__child_i); __hole = __child_i; // if __hole is now a leaf, we're done diff --git a/libcxx/include/__algorithm/sort.h b/libcxx/include/__algorithm/sort.h index 76a18215731b..1ca2f1b81712 100644 --- a/libcxx/include/__algorithm/sort.h +++ b/libcxx/include/__algorithm/sort.h @@ -11,6 +11,7 @@ #include <__algorithm/comp.h> #include <__algorithm/comp_ref_type.h> +#include <__algorithm/iterator_operations.h> #include <__algorithm/min_element.h> #include <__algorithm/partial_sort.h> #include <__algorithm/unwrap_iter.h> @@ -21,7 +22,6 @@ #include <__functional/operations.h> #include <__functional/ranges_operations.h> #include <__iterator/iterator_traits.h> -#include <__utility/swap.h> #include <climits> #include <memory> @@ -31,37 +31,85 @@ _LIBCPP_BEGIN_NAMESPACE_STD +// Wraps an algorithm policy tag and a comparator in a single struct, used to pass the policy tag around without +// changing the number of template arguments (to keep the ABI stable). This is only used for the "range" policy tag. +// +// To create an object of this type, use `_WrapAlgPolicy<T, C>::type` -- see the specialization below for the rationale. +template <class _PolicyT, class _CompT, class = void> +struct _WrapAlgPolicy { + using type = _WrapAlgPolicy; + + using _AlgPolicy = _PolicyT; + using _Comp = _CompT; + _Comp& __comp; + + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_AFTER_CXX11 + _WrapAlgPolicy(_Comp& __c) : __comp(__c) {} +}; + +// Specialization for the "classic" policy tag that avoids creating a struct and simply defines an alias for the +// comparator. When unwrapping, a pristine comparator is always considered to have the "classic" tag attached. Passing +// the pristine comparator where possible allows using template instantiations from the dylib. +template <class _PolicyT, class _CompT> +struct _WrapAlgPolicy<_PolicyT, _CompT, __enable_if_t<std::is_same<_PolicyT, _ClassicAlgPolicy>::value> > { + using type = _CompT; +}; + +// Unwraps a pristine functor (e.g. `std::less`) as if it were wrapped using `_WrapAlgPolicy`. The policy tag is always +// set to "classic". +template <class _CompT> +struct _UnwrapAlgPolicy { + using _AlgPolicy = _ClassicAlgPolicy; + using _Comp = _CompT; + + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_AFTER_CXX11 static + _Comp __get_comp(_Comp __comp) { return __comp; } +}; + +// Unwraps a `_WrapAlgPolicy` struct. +template <class... _Ts> +struct _UnwrapAlgPolicy<_WrapAlgPolicy<_Ts...> > { + using _Wrapped = _WrapAlgPolicy<_Ts...>; + using _AlgPolicy = typename _Wrapped::_AlgPolicy; + using _Comp = typename _Wrapped::_Comp; + + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_AFTER_CXX11 static + _Comp __get_comp(_Wrapped& __w) { return __w.__comp; } +}; + // stable, 2-3 compares, 0-2 swaps -template <class _Compare, class _ForwardIterator> +template <class _AlgPolicy, class _Compare, class _ForwardIterator> _LIBCPP_CONSTEXPR_AFTER_CXX11 unsigned __sort3(_ForwardIterator __x, _ForwardIterator __y, _ForwardIterator __z, _Compare __c) { + using _Ops = _IterOps<_AlgPolicy>; + unsigned __r = 0; if (!__c(*__y, *__x)) // if x <= y { if (!__c(*__z, *__y)) // if y <= z return __r; // x <= y && y <= z // x <= y && y > z - swap(*__y, *__z); // x <= z && y < z + _Ops::iter_swap(__y, __z); // x <= z && y < z __r = 1; if (__c(*__y, *__x)) // if x > y { - swap(*__x, *__y); // x < y && y <= z + _Ops::iter_swap(__x, __y); // x < y && y <= z __r = 2; } return __r; // x <= y && y < z } if (__c(*__z, *__y)) // x > y, if y > z { - swap(*__x, *__z); // x < y && y < z + _Ops::iter_swap(__x, __z); // x < y && y < z __r = 1; return __r; } - swap(*__x, *__y); // x > y && y <= z + _Ops::iter_swap(__x, __y); // x > y && y <= z __r = 1; // x < y && x <= z if (__c(*__z, *__y)) // if y > z { - swap(*__y, *__z); // x <= y && y < z + _Ops::iter_swap(__y, __z); // x <= y && y < z __r = 2; } return __r; @@ -69,18 +117,20 @@ _LIBCPP_CONSTEXPR_AFTER_CXX11 unsigned __sort3(_ForwardIterator __x, _ForwardIte // stable, 3-6 compares, 0-5 swaps -template <class _Compare, class _ForwardIterator> +template <class _AlgPolicy, class _Compare, class _ForwardIterator> unsigned __sort4(_ForwardIterator __x1, _ForwardIterator __x2, _ForwardIterator __x3, _ForwardIterator __x4, _Compare __c) { - unsigned __r = _VSTD::__sort3<_Compare>(__x1, __x2, __x3, __c); + using _Ops = _IterOps<_AlgPolicy>; + + unsigned __r = std::__sort3<_AlgPolicy, _Compare>(__x1, __x2, __x3, __c); if (__c(*__x4, *__x3)) { - swap(*__x3, *__x4); + _Ops::iter_swap(__x3, __x4); ++__r; if (__c(*__x3, *__x2)) { - swap(*__x2, *__x3); + _Ops::iter_swap(__x2, __x3); ++__r; if (__c(*__x2, *__x1)) { - swap(*__x1, *__x2); + _Ops::iter_swap(__x1, __x2); ++__r; } } @@ -90,21 +140,28 @@ unsigned __sort4(_ForwardIterator __x1, _ForwardIterator __x2, _ForwardIterator // stable, 4-10 compares, 0-9 swaps -template <class _Compare, class _ForwardIterator> +template <class _WrappedComp, class _ForwardIterator> _LIBCPP_HIDDEN unsigned __sort5(_ForwardIterator __x1, _ForwardIterator __x2, _ForwardIterator __x3, - _ForwardIterator __x4, _ForwardIterator __x5, _Compare __c) { - unsigned __r = _VSTD::__sort4<_Compare>(__x1, __x2, __x3, __x4, __c); + _ForwardIterator __x4, _ForwardIterator __x5, _WrappedComp __wrapped_comp) { + using _Unwrap = _UnwrapAlgPolicy<_WrappedComp>; + using _AlgPolicy = typename _Unwrap::_AlgPolicy; + using _Ops = _IterOps<_AlgPolicy>; + + using _Compare = typename _Unwrap::_Comp; + _Compare __c = _Unwrap::__get_comp(__wrapped_comp); + + unsigned __r = std::__sort4<_AlgPolicy, _Compare>(__x1, __x2, __x3, __x4, __c); if (__c(*__x5, *__x4)) { - swap(*__x4, *__x5); + _Ops::iter_swap(__x4, __x5); ++__r; if (__c(*__x4, *__x3)) { - swap(*__x3, *__x4); + _Ops::iter_swap(__x3, __x4); ++__r; if (__c(*__x3, *__x2)) { - swap(*__x2, *__x3); + _Ops::iter_swap(__x2, __x3); ++__r; if (__c(*__x2, *__x1)) { - swap(*__x1, *__x2); + _Ops::iter_swap(__x1, __x2); ++__r; } } @@ -113,6 +170,16 @@ _LIBCPP_HIDDEN unsigned __sort5(_ForwardIterator __x1, _ForwardIterator __x2, _F return __r; } +template <class _AlgPolicy, class _Compare, class _ForwardIterator> +_LIBCPP_HIDDEN unsigned __sort5_wrap_policy( + _ForwardIterator __x1, _ForwardIterator __x2, _ForwardIterator __x3, _ForwardIterator __x4, _ForwardIterator __x5, + _Compare __c) { + using _WrappedComp = typename _WrapAlgPolicy<_AlgPolicy, _Compare>::type; + _WrappedComp __wrapped_comp(__c); + return std::__sort5<_WrappedComp>( + std::move(__x1), std::move(__x2), std::move(__x3), std::move(__x4), std::move(__x5), __wrapped_comp); +} + // The comparator being simple is a prerequisite for using the branchless optimization. template <class _Tp> struct __is_simple_comparator : false_type {}; @@ -137,6 +204,7 @@ using __use_branchless_sort = // Ensures that __c(*__x, *__y) is true by swapping *__x and *__y if necessary. template <class _Compare, class _RandomAccessIterator> inline _LIBCPP_HIDE_FROM_ABI void __cond_swap(_RandomAccessIterator __x, _RandomAccessIterator __y, _Compare __c) { + // Note: this function behaves correctly even with proxy iterators (because it relies on `value_type`). using value_type = typename iterator_traits<_RandomAccessIterator>::value_type; bool __r = __c(*__x, *__y); value_type __tmp = __r ? *__x : *__y; @@ -149,6 +217,7 @@ inline _LIBCPP_HIDE_FROM_ABI void __cond_swap(_RandomAccessIterator __x, _Random template <class _Compare, class _RandomAccessIterator> inline _LIBCPP_HIDE_FROM_ABI void __partially_sorted_swap(_RandomAccessIterator __x, _RandomAccessIterator __y, _RandomAccessIterator __z, _Compare __c) { + // Note: this function behaves correctly even with proxy iterators (because it relies on `value_type`). using value_type = typename iterator_traits<_RandomAccessIterator>::value_type; bool __r = __c(*__z, *__x); value_type __tmp = __r ? *__z : *__x; @@ -158,7 +227,7 @@ inline _LIBCPP_HIDE_FROM_ABI void __partially_sorted_swap(_RandomAccessIterator *__y = __r ? *__y : __tmp; } -template <class _Compare, class _RandomAccessIterator> +template <class, class _Compare, class _RandomAccessIterator> inline _LIBCPP_HIDE_FROM_ABI __enable_if_t<__use_branchless_sort<_Compare, _RandomAccessIterator>::value, void> __sort3_maybe_branchless(_RandomAccessIterator __x1, _RandomAccessIterator __x2, _RandomAccessIterator __x3, _Compare __c) { @@ -166,14 +235,14 @@ __sort3_maybe_branchless(_RandomAccessIterator __x1, _RandomAccessIterator __x2, _VSTD::__partially_sorted_swap<_Compare>(__x1, __x2, __x3, __c); } -template <class _Compare, class _RandomAccessIterator> +template <class _AlgPolicy, class _Compare, class _RandomAccessIterator> inline _LIBCPP_HIDE_FROM_ABI __enable_if_t<!__use_branchless_sort<_Compare, _RandomAccessIterator>::value, void> __sort3_maybe_branchless(_RandomAccessIterator __x1, _RandomAccessIterator __x2, _RandomAccessIterator __x3, _Compare __c) { - _VSTD::__sort3<_Compare>(__x1, __x2, __x3, __c); + std::__sort3<_AlgPolicy, _Compare>(__x1, __x2, __x3, __c); } -template <class _Compare, class _RandomAccessIterator> +template <class, class _Compare, class _RandomAccessIterator> inline _LIBCPP_HIDE_FROM_ABI __enable_if_t<__use_branchless_sort<_Compare, _RandomAccessIterator>::value, void> __sort4_maybe_branchless(_RandomAccessIterator __x1, _RandomAccessIterator __x2, _RandomAccessIterator __x3, _RandomAccessIterator __x4, _Compare __c) { @@ -184,14 +253,14 @@ __sort4_maybe_branchless(_RandomAccessIterator __x1, _RandomAccessIterator __x2, _VSTD::__cond_swap<_Compare>(__x2, __x3, __c); } -template <class _Compare, class _RandomAccessIterator> +template <class _AlgPolicy, class _Compare, class _RandomAccessIterator> inline _LIBCPP_HIDE_FROM_ABI __enable_if_t<!__use_branchless_sort<_Compare, _RandomAccessIterator>::value, void> __sort4_maybe_branchless(_RandomAccessIterator __x1, _RandomAccessIterator __x2, _RandomAccessIterator __x3, _RandomAccessIterator __x4, _Compare __c) { - _VSTD::__sort4<_Compare>(__x1, __x2, __x3, __x4, __c); + std::__sort4<_AlgPolicy, _Compare>(__x1, __x2, __x3, __x4, __c); } -template <class _Compare, class _RandomAccessIterator> +template <class, class _Compare, class _RandomAccessIterator> inline _LIBCPP_HIDE_FROM_ABI __enable_if_t<__use_branchless_sort<_Compare, _RandomAccessIterator>::value, void> __sort5_maybe_branchless(_RandomAccessIterator __x1, _RandomAccessIterator __x2, _RandomAccessIterator __x3, _RandomAccessIterator __x4, _RandomAccessIterator __x5, _Compare __c) { @@ -203,53 +272,57 @@ __sort5_maybe_branchless(_RandomAccessIterator __x1, _RandomAccessIterator __x2, _VSTD::__partially_sorted_swap<_Compare>(__x2, __x3, __x4, __c); } -template <class _Compare, class _RandomAccessIterator> +template <class _AlgPolicy, class _Compare, class _RandomAccessIterator> inline _LIBCPP_HIDE_FROM_ABI __enable_if_t<!__use_branchless_sort<_Compare, _RandomAccessIterator>::value, void> __sort5_maybe_branchless(_RandomAccessIterator __x1, _RandomAccessIterator __x2, _RandomAccessIterator __x3, _RandomAccessIterator __x4, _RandomAccessIterator __x5, _Compare __c) { - _VSTD::__sort5<_Compare>(__x1, __x2, __x3, __x4, __x5, __c); + std::__sort5_wrap_policy<_AlgPolicy, _Compare>(__x1, __x2, __x3, __x4, __x5, __c); } // Assumes size > 0 -template <class _Compare, class _BidirectionalIterator> +template <class _AlgPolicy, class _Compare, class _BidirectionalIterator> _LIBCPP_CONSTEXPR_AFTER_CXX11 void __selection_sort(_BidirectionalIterator __first, _BidirectionalIterator __last, _Compare __comp) { _BidirectionalIterator __lm1 = __last; for (--__lm1; __first != __lm1; ++__first) { - _BidirectionalIterator __i = _VSTD::min_element(__first, __last, __comp); + _BidirectionalIterator __i = std::__min_element<_Compare>(__first, __last, __comp); if (__i != __first) - swap(*__first, *__i); + _IterOps<_AlgPolicy>::iter_swap(__first, __i); } } -template <class _Compare, class _BidirectionalIterator> +template <class _AlgPolicy, class _Compare, class _BidirectionalIterator> void __insertion_sort(_BidirectionalIterator __first, _BidirectionalIterator __last, _Compare __comp) { + using _Ops = _IterOps<_AlgPolicy>; + typedef typename iterator_traits<_BidirectionalIterator>::value_type value_type; if (__first != __last) { _BidirectionalIterator __i = __first; for (++__i; __i != __last; ++__i) { _BidirectionalIterator __j = __i; - value_type __t(_VSTD::move(*__j)); + value_type __t(_Ops::__iter_move(__j)); for (_BidirectionalIterator __k = __i; __k != __first && __comp(__t, *--__k); --__j) - *__j = _VSTD::move(*__k); + *__j = _Ops::__iter_move(__k); *__j = _VSTD::move(__t); } } } -template <class _Compare, class _RandomAccessIterator> +template <class _AlgPolicy, class _Compare, class _RandomAccessIterator> void __insertion_sort_3(_RandomAccessIterator __first, _RandomAccessIterator __last, _Compare __comp) { + using _Ops = _IterOps<_AlgPolicy>; + typedef typename iterator_traits<_RandomAccessIterator>::difference_type difference_type; typedef typename iterator_traits<_RandomAccessIterator>::value_type value_type; _RandomAccessIterator __j = __first + difference_type(2); - _VSTD::__sort3_maybe_branchless<_Compare>(__first, __first + difference_type(1), __j, __comp); + std::__sort3_maybe_branchless<_AlgPolicy, _Compare>(__first, __first + difference_type(1), __j, __comp); for (_RandomAccessIterator __i = __j + difference_type(1); __i != __last; ++__i) { if (__comp(*__i, *__j)) { - value_type __t(_VSTD::move(*__i)); + value_type __t(_Ops::__iter_move(__i)); _RandomAccessIterator __k = __j; __j = __i; do { - *__j = _VSTD::move(*__k); + *__j = _Ops::__iter_move(__k); __j = __k; } while (__j != __first && __comp(__t, *--__k)); *__j = _VSTD::move(__t); @@ -258,8 +331,16 @@ void __insertion_sort_3(_RandomAccessIterator __first, _RandomAccessIterator __l } } -template <class _Compare, class _RandomAccessIterator> -bool __insertion_sort_incomplete(_RandomAccessIterator __first, _RandomAccessIterator __last, _Compare __comp) { +template <class _WrappedComp, class _RandomAccessIterator> +bool __insertion_sort_incomplete( + _RandomAccessIterator __first, _RandomAccessIterator __last, _WrappedComp __wrapped_comp) { + using _Unwrap = _UnwrapAlgPolicy<_WrappedComp>; + using _AlgPolicy = typename _Unwrap::_AlgPolicy; + using _Ops = _IterOps<_AlgPolicy>; + + using _Compare = typename _Unwrap::_Comp; + _Compare __comp = _Unwrap::__get_comp(__wrapped_comp); + typedef typename iterator_traits<_RandomAccessIterator>::difference_type difference_type; switch (__last - __first) { case 0: @@ -267,32 +348,33 @@ bool __insertion_sort_incomplete(_RandomAccessIterator __first, _RandomAccessIte return true; case 2: if (__comp(*--__last, *__first)) - swap(*__first, *__last); + _IterOps<_AlgPolicy>::iter_swap(__first, __last); return true; case 3: - _VSTD::__sort3_maybe_branchless<_Compare>(__first, __first + difference_type(1), --__last, __comp); + std::__sort3_maybe_branchless<_AlgPolicy, _Compare>(__first, __first + difference_type(1), --__last, __comp); return true; case 4: - _VSTD::__sort4_maybe_branchless<_Compare>(__first, __first + difference_type(1), __first + difference_type(2), - --__last, __comp); + std::__sort4_maybe_branchless<_AlgPolicy, _Compare>( + __first, __first + difference_type(1), __first + difference_type(2), --__last, __comp); return true; case 5: - _VSTD::__sort5_maybe_branchless<_Compare>(__first, __first + difference_type(1), __first + difference_type(2), - __first + difference_type(3), --__last, __comp); + std::__sort5_maybe_branchless<_AlgPolicy, _Compare>( + __first, __first + difference_type(1), __first + difference_type(2), __first + difference_type(3), + --__last, __comp); return true; } typedef typename iterator_traits<_RandomAccessIterator>::value_type value_type; _RandomAccessIterator __j = __first + difference_type(2); - _VSTD::__sort3_maybe_branchless<_Compare>(__first, __first + difference_type(1), __j, __comp); + std::__sort3_maybe_branchless<_AlgPolicy, _Compare>(__first, __first + difference_type(1), __j, __comp); const unsigned __limit = 8; unsigned __count = 0; for (_RandomAccessIterator __i = __j + difference_type(1); __i != __last; ++__i) { if (__comp(*__i, *__j)) { - value_type __t(_VSTD::move(*__i)); + value_type __t(_Ops::__iter_move(__i)); _RandomAccessIterator __k = __j; __j = __i; do { - *__j = _VSTD::move(*__k); + *__j = _Ops::__iter_move(__k); __j = __k; } while (__j != __first && __comp(__t, *--__k)); *__j = _VSTD::move(__t); @@ -304,27 +386,29 @@ bool __insertion_sort_incomplete(_RandomAccessIterator __first, _RandomAccessIte return true; } -template <class _Compare, class _BidirectionalIterator> +template <class _AlgPolicy, class _Compare, class _BidirectionalIterator> void __insertion_sort_move(_BidirectionalIterator __first1, _BidirectionalIterator __last1, typename iterator_traits<_BidirectionalIterator>::value_type* __first2, _Compare __comp) { + using _Ops = _IterOps<_AlgPolicy>; + typedef typename iterator_traits<_BidirectionalIterator>::value_type value_type; if (__first1 != __last1) { __destruct_n __d(0); unique_ptr<value_type, __destruct_n&> __h(__first2, __d); value_type* __last2 = __first2; - ::new ((void*)__last2) value_type(_VSTD::move(*__first1)); + ::new ((void*)__last2) value_type(_Ops::__iter_move(__first1)); __d.template __incr<value_type>(); for (++__last2; ++__first1 != __last1; ++__last2) { value_type* __j2 = __last2; value_type* __i2 = __j2; if (__comp(*__first1, *--__i2)) { - ::new ((void*)__j2) value_type(_VSTD::move(*__i2)); + ::new ((void*)__j2) value_type(std::move(*__i2)); __d.template __incr<value_type>(); for (--__j2; __i2 != __first2 && __comp(*__first1, *--__i2); --__j2) - *__j2 = _VSTD::move(*__i2); - *__j2 = _VSTD::move(*__first1); + *__j2 = std::move(*__i2); + *__j2 = _Ops::__iter_move(__first1); } else { - ::new ((void*)__j2) value_type(_VSTD::move(*__first1)); + ::new ((void*)__j2) value_type(_Ops::__iter_move(__first1)); __d.template __incr<value_type>(); } } @@ -332,9 +416,11 @@ void __insertion_sort_move(_BidirectionalIterator __first1, _BidirectionalIterat } } -template <class _Compare, class _RandomAccessIterator> +template <class _AlgPolicy, class _Compare, class _RandomAccessIterator> void __introsort(_RandomAccessIterator __first, _RandomAccessIterator __last, _Compare __comp, typename iterator_traits<_RandomAccessIterator>::difference_type __depth) { + using _Ops = _IterOps<_AlgPolicy>; + typedef typename iterator_traits<_RandomAccessIterator>::difference_type difference_type; typedef typename iterator_traits<_RandomAccessIterator>::value_type value_type; const difference_type __limit = @@ -348,28 +434,29 @@ void __introsort(_RandomAccessIterator __first, _RandomAccessIterator __last, _C return; case 2: if (__comp(*--__last, *__first)) - swap(*__first, *__last); + _IterOps<_AlgPolicy>::iter_swap(__first, __last); return; case 3: - _VSTD::__sort3_maybe_branchless<_Compare>(__first, __first + difference_type(1), --__last, __comp); + std::__sort3_maybe_branchless<_AlgPolicy, _Compare>(__first, __first + difference_type(1), --__last, __comp); return; case 4: - _VSTD::__sort4_maybe_branchless<_Compare>(__first, __first + difference_type(1), __first + difference_type(2), - --__last, __comp); + std::__sort4_maybe_branchless<_AlgPolicy, _Compare>( + __first, __first + difference_type(1), __first + difference_type(2), --__last, __comp); return; case 5: - _VSTD::__sort5_maybe_branchless<_Compare>(__first, __first + difference_type(1), __first + difference_type(2), - __first + difference_type(3), --__last, __comp); + std::__sort5_maybe_branchless<_AlgPolicy, _Compare>( + __first, __first + difference_type(1), __first + difference_type(2), __first + difference_type(3), + --__last, __comp); return; } if (__len <= __limit) { - _VSTD::__insertion_sort_3<_Compare>(__first, __last, __comp); + std::__insertion_sort_3<_AlgPolicy, _Compare>(__first, __last, __comp); return; } // __len > 5 if (__depth == 0) { // Fallback to heap sort as Introsort suggests. - _VSTD::__partial_sort<_Compare>(__first, __last, __last, __comp); + std::__partial_sort<_AlgPolicy, _Compare>(__first, __last, __last, __comp); return; } --__depth; @@ -383,11 +470,12 @@ void __introsort(_RandomAccessIterator __first, _RandomAccessIterator __last, _C __delta = __len / 2; __m += __delta; __delta /= 2; - __n_swaps = _VSTD::__sort5<_Compare>(__first, __first + __delta, __m, __m + __delta, __lm1, __comp); + __n_swaps = std::__sort5_wrap_policy<_AlgPolicy, _Compare>( + __first, __first + __delta, __m, __m + __delta, __lm1, __comp); } else { __delta = __len / 2; __m += __delta; - __n_swaps = _VSTD::__sort3<_Compare>(__first, __m, __lm1, __comp); + __n_swaps = std::__sort3<_AlgPolicy, _Compare>(__first, __m, __lm1, __comp); } } // *__m is median @@ -414,7 +502,7 @@ void __introsort(_RandomAccessIterator __first, _RandomAccessIterator __last, _C if (__i == __j) return; // [__first, __last) all equivalent elements if (__comp(*__first, *__i)) { - swap(*__i, *__j); + _Ops::iter_swap(__i, __j); ++__n_swaps; ++__i; break; @@ -432,7 +520,7 @@ void __introsort(_RandomAccessIterator __first, _RandomAccessIterator __last, _C ; if (__i >= __j) break; - swap(*__i, *__j); + _Ops::iter_swap(__i, __j); ++__n_swaps; ++__i; } @@ -443,7 +531,7 @@ void __introsort(_RandomAccessIterator __first, _RandomAccessIterator __last, _C goto __restart; } if (__comp(*__j, *__m)) { - swap(*__i, *__j); + _Ops::iter_swap(__i, __j); ++__n_swaps; break; // found guard for downward moving __j, now use unguarded partition } @@ -465,7 +553,7 @@ void __introsort(_RandomAccessIterator __first, _RandomAccessIterator __last, _C ; if (__i > __j) break; - swap(*__i, *__j); + _Ops::iter_swap(__i, __j); ++__n_swaps; // It is known that __m != __j // If __m just moved, follow it @@ -476,14 +564,16 @@ void __introsort(_RandomAccessIterator __first, _RandomAccessIterator __last, _C } // [__first, __i) < *__m and *__m <= [__i, __last) if (__i != __m && __comp(*__m, *__i)) { - swap(*__i, *__m); + _Ops::iter_swap(__i, __m); ++__n_swaps; } // [__first, __i) < *__i and *__i <= [__i+1, __last) // If we were given a perfect partition, see if insertion sort is quick... if (__n_swaps == 0) { - bool __fs = _VSTD::__insertion_sort_incomplete<_Compare>(__first, __i, __comp); - if (_VSTD::__insertion_sort_incomplete<_Compare>(__i + difference_type(1), __last, __comp)) { + using _WrappedComp = typename _WrapAlgPolicy<_AlgPolicy, _Compare>::type; + _WrappedComp __wrapped_comp(__comp); + bool __fs = std::__insertion_sort_incomplete<_WrappedComp>(__first, __i, __wrapped_comp); + if (std::__insertion_sort_incomplete<_WrappedComp>(__i + difference_type(1), __last, __wrapped_comp)) { if (__fs) return; __last = __i; @@ -497,10 +587,10 @@ void __introsort(_RandomAccessIterator __first, _RandomAccessIterator __last, _C } // sort smaller range with recursive call and larger with tail recursion elimination if (__i - __first < __last - __i) { - _VSTD::__introsort<_Compare>(__first, __i, __comp, __depth); + std::__introsort<_AlgPolicy, _Compare>(__first, __i, __comp, __depth); __first = ++__i; } else { - _VSTD::__introsort<_Compare>(__i + difference_type(1), __last, __comp, __depth); + std::__introsort<_AlgPolicy, _Compare>(__i + difference_type(1), __last, __comp, __depth); __last = __i; } } @@ -525,17 +615,22 @@ inline _LIBCPP_HIDE_FROM_ABI _Number __log2i(_Number __n) { return __log2; } -template <class _Compare, class _RandomAccessIterator> -void __sort(_RandomAccessIterator __first, _RandomAccessIterator __last, _Compare __comp) { +template <class _WrappedComp, class _RandomAccessIterator> +void __sort(_RandomAccessIterator __first, _RandomAccessIterator __last, _WrappedComp __wrapped_comp) { typedef typename iterator_traits<_RandomAccessIterator>::difference_type difference_type; difference_type __depth_limit = 2 * __log2i(__last - __first); - _VSTD::__introsort<_Compare>(__first, __last, __comp, __depth_limit); + + using _Unwrap = _UnwrapAlgPolicy<_WrappedComp>; + using _AlgPolicy = typename _Unwrap::_AlgPolicy; + using _Compare = typename _Unwrap::_Comp; + _Compare __comp = _Unwrap::__get_comp(__wrapped_comp); + std::__introsort<_AlgPolicy, _Compare>(__first, __last, __comp, __depth_limit); } template <class _Compare, class _Tp> inline _LIBCPP_INLINE_VISIBILITY void __sort(_Tp** __first, _Tp** __last, __less<_Tp*>&) { __less<uintptr_t> __comp; - _VSTD::__sort<__less<uintptr_t>&, uintptr_t*>((uintptr_t*)__first, (uintptr_t*)__last, __comp); + std::__sort<__less<uintptr_t>&, uintptr_t*>((uintptr_t*)__first, (uintptr_t*)__last, __comp); } extern template _LIBCPP_FUNC_VIS void __sort<__less<char>&, char*>(char*, char*, __less<char>&); @@ -576,22 +671,27 @@ extern template _LIBCPP_FUNC_VIS bool __insertion_sort_incomplete<__less<long do extern template _LIBCPP_FUNC_VIS unsigned __sort5<__less<long double>&, long double*>(long double*, long double*, long double*, long double*, long double*, __less<long double>&); -template <class _RandomAccessIterator, class _Comp> +template <class _AlgPolicy, class _RandomAccessIterator, class _Comp> inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_AFTER_CXX17 void __sort_impl(_RandomAccessIterator __first, _RandomAccessIterator __last, _Comp& __comp) { - std::__debug_randomize_range(__first, __last); + std::__debug_randomize_range<_AlgPolicy>(__first, __last); + using _Comp_ref = typename __comp_ref_type<_Comp>::type; if (__libcpp_is_constant_evaluated()) { - std::__partial_sort<_Comp_ref>(__first, __last, __last, _Comp_ref(__comp)); + std::__partial_sort<_AlgPolicy>(__first, __last, __last, __comp); + } else { - std::__sort<_Comp_ref>(std::__unwrap_iter(__first), std::__unwrap_iter(__last), _Comp_ref(__comp)); + using _WrappedComp = typename _WrapAlgPolicy<_AlgPolicy, _Comp_ref>::type; + _Comp_ref __comp_ref(__comp); + _WrappedComp __wrapped_comp(__comp_ref); + std::__sort<_WrappedComp>(std::__unwrap_iter(__first), std::__unwrap_iter(__last), __wrapped_comp); } } template <class _RandomAccessIterator, class _Comp> inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_AFTER_CXX17 void sort(_RandomAccessIterator __first, _RandomAccessIterator __last, _Comp __comp) { - std::__sort_impl(std::move(__first), std::move(__last), __comp); + std::__sort_impl<_ClassicAlgPolicy>(std::move(__first), std::move(__last), __comp); } template <class _RandomAccessIterator> diff --git a/libcxx/include/__algorithm/sort_heap.h b/libcxx/include/__algorithm/sort_heap.h index 261adedd0eaf..b9f0b2c9690d 100644 --- a/libcxx/include/__algorithm/sort_heap.h +++ b/libcxx/include/__algorithm/sort_heap.h @@ -11,11 +11,12 @@ #include <__algorithm/comp.h> #include <__algorithm/comp_ref_type.h> +#include <__algorithm/iterator_operations.h> #include <__algorithm/pop_heap.h> #include <__config> #include <__iterator/iterator_traits.h> #include <__utility/move.h> -#include <type_traits> // swap +#include <type_traits> #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) # pragma GCC system_header @@ -23,7 +24,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD -template <class _Compare, class _RandomAccessIterator> +template <class _AlgPolicy, class _Compare, class _RandomAccessIterator> inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_AFTER_CXX11 void __sort_heap(_RandomAccessIterator __first, _RandomAccessIterator __last, _Compare& __comp) { using _CompRef = typename __comp_ref_type<_Compare>::type; @@ -31,13 +32,16 @@ void __sort_heap(_RandomAccessIterator __first, _RandomAccessIterator __last, _C using difference_type = typename iterator_traits<_RandomAccessIterator>::difference_type; for (difference_type __n = __last - __first; __n > 1; --__last, (void) --__n) - std::__pop_heap<_CompRef>(__first, __last, __comp_ref, __n); + std::__pop_heap<_AlgPolicy, _CompRef>(__first, __last, __comp_ref, __n); } template <class _RandomAccessIterator, class _Compare> inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_AFTER_CXX17 void sort_heap(_RandomAccessIterator __first, _RandomAccessIterator __last, _Compare __comp) { - std::__sort_heap(std::move(__first), std::move(__last), __comp); + static_assert(std::is_copy_constructible<_RandomAccessIterator>::value, "Iterators must be copy constructible."); + static_assert(std::is_copy_assignable<_RandomAccessIterator>::value, "Iterators must be copy assignable."); + + std::__sort_heap<_ClassicAlgPolicy>(std::move(__first), std::move(__last), __comp); } template <class _RandomAccessIterator> diff --git a/libcxx/include/__algorithm/stable_partition.h b/libcxx/include/__algorithm/stable_partition.h index 969ac7a6173e..e5ad48b2ed51 100644 --- a/libcxx/include/__algorithm/stable_partition.h +++ b/libcxx/include/__algorithm/stable_partition.h @@ -9,13 +9,14 @@ #ifndef _LIBCPP___ALGORITHM_STABLE_PARTITION_H #define _LIBCPP___ALGORITHM_STABLE_PARTITION_H +#include <__algorithm/iterator_operations.h> #include <__algorithm/rotate.h> #include <__config> #include <__iterator/advance.h> #include <__iterator/distance.h> #include <__iterator/iterator_traits.h> -#include <__utility/swap.h> #include <memory> +#include <type_traits> #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) # pragma GCC system_header @@ -23,11 +24,13 @@ _LIBCPP_BEGIN_NAMESPACE_STD -template <class _Predicate, class _ForwardIterator, class _Distance, class _Pair> +template <class _AlgPolicy, class _Predicate, class _ForwardIterator, class _Distance, class _Pair> _ForwardIterator -__stable_partition(_ForwardIterator __first, _ForwardIterator __last, _Predicate __pred, +__stable_partition_impl(_ForwardIterator __first, _ForwardIterator __last, _Predicate __pred, _Distance __len, _Pair __p, forward_iterator_tag __fit) { + using _Ops = _IterOps<_AlgPolicy>; + // *__first is known to be false // __len >= 1 if (__len == 1) @@ -37,7 +40,7 @@ __stable_partition(_ForwardIterator __first, _ForwardIterator __last, _Predicate _ForwardIterator __m = __first; if (__pred(*++__m)) { - swap(*__first, *__m); + _Ops::iter_swap(__first, __m); return __m; } return __first; @@ -50,7 +53,7 @@ __stable_partition(_ForwardIterator __first, _ForwardIterator __last, _Predicate // Move the falses into the temporary buffer, and the trues to the front of the line // Update __first to always point to the end of the trues value_type* __t = __p.first; - ::new ((void*)__t) value_type(_VSTD::move(*__first)); + ::new ((void*)__t) value_type(_Ops::__iter_move(__first)); __d.template __incr<value_type>(); ++__t; _ForwardIterator __i = __first; @@ -58,12 +61,12 @@ __stable_partition(_ForwardIterator __first, _ForwardIterator __last, _Predicate { if (__pred(*__i)) { - *__first = _VSTD::move(*__i); + *__first = _Ops::__iter_move(__i); ++__first; } else { - ::new ((void*)__t) value_type(_VSTD::move(*__i)); + ::new ((void*)__t) value_type(_Ops::__iter_move(__i)); __d.template __incr<value_type>(); ++__t; } @@ -72,7 +75,7 @@ __stable_partition(_ForwardIterator __first, _ForwardIterator __last, _Predicate // Move falses back into range, but don't mess up __first which points to first false __i = __first; for (value_type* __t2 = __p.first; __t2 < __t; ++__t2, (void) ++__i) - *__i = _VSTD::move(*__t2); + *__i = _Ops::__iter_move(__t2); // __h destructs moved-from values out of the temp buffer, but doesn't deallocate buffer return __first; } @@ -80,11 +83,12 @@ __stable_partition(_ForwardIterator __first, _ForwardIterator __last, _Predicate // __len >= 3 _ForwardIterator __m = __first; _Distance __len2 = __len / 2; // __len2 >= 2 - _VSTD::advance(__m, __len2); + _Ops::advance(__m, __len2); // recurse on [__first, __m), *__first know to be false // F????????????????? // f m l - _ForwardIterator __first_false = _VSTD::__stable_partition<_Predicate&>(__first, __m, __pred, __len2, __p, __fit); + _ForwardIterator __first_false = std::__stable_partition_impl<_AlgPolicy, _Predicate&>( + __first, __m, __pred, __len2, __p, __fit); // TTTFFFFF?????????? // f ff m l // recurse on [__m, __last], except increase __m until *(__m) is false, *__last know to be true @@ -99,18 +103,19 @@ __stable_partition(_ForwardIterator __first, _ForwardIterator __last, _Predicate } // TTTFFFFFTTTF?????? // f ff m m1 l - __second_false = _VSTD::__stable_partition<_Predicate&>(__m1, __last, __pred, __len_half, __p, __fit); + __second_false = std::__stable_partition_impl<_AlgPolicy, _Predicate&>( + __m1, __last, __pred, __len_half, __p, __fit); __second_half_done: // TTTFFFFFTTTTTFFFFF // f ff m sf l - return _VSTD::rotate(__first_false, __m, __second_false); + return std::__rotate<_AlgPolicy>(__first_false, __m, __second_false, __fit); // TTTTTTTTFFFFFFFFFF // | } -template <class _Predicate, class _ForwardIterator> +template <class _AlgPolicy, class _Predicate, class _ForwardIterator> _ForwardIterator -__stable_partition(_ForwardIterator __first, _ForwardIterator __last, _Predicate __pred, +__stable_partition_impl(_ForwardIterator __first, _ForwardIterator __last, _Predicate __pred, forward_iterator_tag) { const unsigned __alloc_limit = 3; // might want to make this a function of trivial assignment @@ -127,7 +132,7 @@ __stable_partition(_ForwardIterator __first, _ForwardIterator __last, _Predicate // *__first is known to be false typedef typename iterator_traits<_ForwardIterator>::difference_type difference_type; typedef typename iterator_traits<_ForwardIterator>::value_type value_type; - difference_type __len = _VSTD::distance(__first, __last); + difference_type __len = _IterOps<_AlgPolicy>::distance(__first, __last); pair<value_type*, ptrdiff_t> __p(0, 0); unique_ptr<value_type, __return_temporary_buffer> __h; if (__len >= __alloc_limit) @@ -138,20 +143,23 @@ _LIBCPP_SUPPRESS_DEPRECATED_PUSH _LIBCPP_SUPPRESS_DEPRECATED_POP __h.reset(__p.first); } - return _VSTD::__stable_partition<_Predicate&>(__first, __last, __pred, __len, __p, forward_iterator_tag()); + return std::__stable_partition_impl<_AlgPolicy, _Predicate&>( + std::move(__first), std::move(__last), __pred, __len, __p, forward_iterator_tag()); } -template <class _Predicate, class _BidirectionalIterator, class _Distance, class _Pair> +template <class _AlgPolicy, class _Predicate, class _BidirectionalIterator, class _Distance, class _Pair> _BidirectionalIterator -__stable_partition(_BidirectionalIterator __first, _BidirectionalIterator __last, _Predicate __pred, +__stable_partition_impl(_BidirectionalIterator __first, _BidirectionalIterator __last, _Predicate __pred, _Distance __len, _Pair __p, bidirectional_iterator_tag __bit) { + using _Ops = _IterOps<_AlgPolicy>; + // *__first is known to be false // *__last is known to be true // __len >= 2 if (__len == 2) { - swap(*__first, *__last); + _Ops::iter_swap(__first, __last); return __last; } if (__len == 3) @@ -159,12 +167,12 @@ __stable_partition(_BidirectionalIterator __first, _BidirectionalIterator __last _BidirectionalIterator __m = __first; if (__pred(*++__m)) { - swap(*__first, *__m); - swap(*__m, *__last); + _Ops::iter_swap(__first, __m); + _Ops::iter_swap(__m, __last); return __last; } - swap(*__m, *__last); - swap(*__first, *__m); + _Ops::iter_swap(__m, __last); + _Ops::iter_swap(__first, __m); return __m; } if (__len <= __p.second) @@ -175,7 +183,7 @@ __stable_partition(_BidirectionalIterator __first, _BidirectionalIterator __last // Move the falses into the temporary buffer, and the trues to the front of the line // Update __first to always point to the end of the trues value_type* __t = __p.first; - ::new ((void*)__t) value_type(_VSTD::move(*__first)); + ::new ((void*)__t) value_type(_Ops::__iter_move(__first)); __d.template __incr<value_type>(); ++__t; _BidirectionalIterator __i = __first; @@ -183,23 +191,23 @@ __stable_partition(_BidirectionalIterator __first, _BidirectionalIterator __last { if (__pred(*__i)) { - *__first = _VSTD::move(*__i); + *__first = _Ops::__iter_move(__i); ++__first; } else { - ::new ((void*)__t) value_type(_VSTD::move(*__i)); + ::new ((void*)__t) value_type(_Ops::__iter_move(__i)); __d.template __incr<value_type>(); ++__t; } } // move *__last, known to be true - *__first = _VSTD::move(*__i); + *__first = _Ops::__iter_move(__i); __i = ++__first; // All trues now at start of range, all falses in buffer // Move falses back into range, but don't mess up __first which points to first false for (value_type* __t2 = __p.first; __t2 < __t; ++__t2, (void) ++__i) - *__i = _VSTD::move(*__t2); + *__i = _Ops::__iter_move(__t2); // __h destructs moved-from values out of the temp buffer, but doesn't deallocate buffer return __first; } @@ -207,7 +215,7 @@ __stable_partition(_BidirectionalIterator __first, _BidirectionalIterator __last // __len >= 4 _BidirectionalIterator __m = __first; _Distance __len2 = __len / 2; // __len2 >= 2 - _VSTD::advance(__m, __len2); + _Ops::advance(__m, __len2); // recurse on [__first, __m-1], except reduce __m-1 until *(__m-1) is true, *__first know to be false // F????????????????T // f m l @@ -222,7 +230,8 @@ __stable_partition(_BidirectionalIterator __first, _BidirectionalIterator __last } // F???TFFF?????????T // f m1 m l - __first_false = _VSTD::__stable_partition<_Predicate&>(__first, __m1, __pred, __len_half, __p, __bit); + __first_false = std::__stable_partition_impl<_AlgPolicy, _Predicate&>( + __first, __m1, __pred, __len_half, __p, __bit); __first_half_done: // TTTFFFFF?????????T // f ff m l @@ -239,18 +248,19 @@ __first_half_done: } // TTTFFFFFTTTF?????T // f ff m m1 l - __second_false = _VSTD::__stable_partition<_Predicate&>(__m1, __last, __pred, __len_half, __p, __bit); + __second_false = std::__stable_partition_impl<_AlgPolicy, _Predicate&>( + __m1, __last, __pred, __len_half, __p, __bit); __second_half_done: // TTTFFFFFTTTTTFFFFF // f ff m sf l - return _VSTD::rotate(__first_false, __m, __second_false); + return std::__rotate<_AlgPolicy>(__first_false, __m, __second_false, __bit); // TTTTTTTTFFFFFFFFFF // | } -template <class _Predicate, class _BidirectionalIterator> +template <class _AlgPolicy, class _Predicate, class _BidirectionalIterator> _BidirectionalIterator -__stable_partition(_BidirectionalIterator __first, _BidirectionalIterator __last, _Predicate __pred, +__stable_partition_impl(_BidirectionalIterator __first, _BidirectionalIterator __last, _Predicate __pred, bidirectional_iterator_tag) { typedef typename iterator_traits<_BidirectionalIterator>::difference_type difference_type; @@ -276,7 +286,7 @@ __stable_partition(_BidirectionalIterator __first, _BidirectionalIterator __last // *__first is known to be false // *__last is known to be true // __len >= 2 - difference_type __len = _VSTD::distance(__first, __last) + 1; + difference_type __len = _IterOps<_AlgPolicy>::distance(__first, __last) + 1; pair<value_type*, ptrdiff_t> __p(0, 0); unique_ptr<value_type, __return_temporary_buffer> __h; if (__len >= __alloc_limit) @@ -287,7 +297,16 @@ _LIBCPP_SUPPRESS_DEPRECATED_PUSH _LIBCPP_SUPPRESS_DEPRECATED_POP __h.reset(__p.first); } - return _VSTD::__stable_partition<_Predicate&>(__first, __last, __pred, __len, __p, bidirectional_iterator_tag()); + return std::__stable_partition_impl<_AlgPolicy, _Predicate&>( + std::move(__first), std::move(__last), __pred, __len, __p, bidirectional_iterator_tag()); +} + +template <class _AlgPolicy, class _Predicate, class _ForwardIterator, class _IterCategory> +_LIBCPP_HIDE_FROM_ABI +_ForwardIterator __stable_partition( + _ForwardIterator __first, _ForwardIterator __last, _Predicate&& __pred, _IterCategory __iter_category) { + return std::__stable_partition_impl<_AlgPolicy, __uncvref_t<_Predicate>&>( + std::move(__first), std::move(__last), __pred, __iter_category); } template <class _ForwardIterator, class _Predicate> @@ -295,7 +314,9 @@ inline _LIBCPP_INLINE_VISIBILITY _ForwardIterator stable_partition(_ForwardIterator __first, _ForwardIterator __last, _Predicate __pred) { - return _VSTD::__stable_partition<_Predicate&>(__first, __last, __pred, typename iterator_traits<_ForwardIterator>::iterator_category()); + using _IterCategory = typename iterator_traits<_ForwardIterator>::iterator_category; + return std::__stable_partition<_ClassicAlgPolicy, _Predicate&>( + std::move(__first), std::move(__last), __pred, _IterCategory()); } _LIBCPP_END_NAMESPACE_STD diff --git a/libcxx/include/__algorithm/stable_sort.h b/libcxx/include/__algorithm/stable_sort.h index e3479aad62e6..6122758bdefe 100644 --- a/libcxx/include/__algorithm/stable_sort.h +++ b/libcxx/include/__algorithm/stable_sort.h @@ -12,11 +12,11 @@ #include <__algorithm/comp.h> #include <__algorithm/comp_ref_type.h> #include <__algorithm/inplace_merge.h> +#include <__algorithm/iterator_operations.h> #include <__algorithm/sort.h> #include <__config> #include <__iterator/iterator_traits.h> #include <__utility/move.h> -#include <__utility/swap.h> #include <memory> #include <type_traits> @@ -26,12 +26,14 @@ _LIBCPP_BEGIN_NAMESPACE_STD -template <class _Compare, class _InputIterator1, class _InputIterator2> +template <class _AlgPolicy, class _Compare, class _InputIterator1, class _InputIterator2> void __merge_move_construct(_InputIterator1 __first1, _InputIterator1 __last1, _InputIterator2 __first2, _InputIterator2 __last2, typename iterator_traits<_InputIterator1>::value_type* __result, _Compare __comp) { + using _Ops = _IterOps<_AlgPolicy>; + typedef typename iterator_traits<_InputIterator1>::value_type value_type; __destruct_n __d(0); unique_ptr<value_type, __destruct_n&> __h(__result, __d); @@ -40,111 +42,115 @@ __merge_move_construct(_InputIterator1 __first1, _InputIterator1 __last1, if (__first1 == __last1) { for (; __first2 != __last2; ++__first2, (void) ++__result, __d.template __incr<value_type>()) - ::new ((void*)__result) value_type(_VSTD::move(*__first2)); + ::new ((void*)__result) value_type(_Ops::__iter_move(__first2)); __h.release(); return; } if (__first2 == __last2) { for (; __first1 != __last1; ++__first1, (void) ++__result, __d.template __incr<value_type>()) - ::new ((void*)__result) value_type(_VSTD::move(*__first1)); + ::new ((void*)__result) value_type(_Ops::__iter_move(__first1)); __h.release(); return; } if (__comp(*__first2, *__first1)) { - ::new ((void*)__result) value_type(_VSTD::move(*__first2)); + ::new ((void*)__result) value_type(_Ops::__iter_move(__first2)); __d.template __incr<value_type>(); ++__first2; } else { - ::new ((void*)__result) value_type(_VSTD::move(*__first1)); + ::new ((void*)__result) value_type(_Ops::__iter_move(__first1)); __d.template __incr<value_type>(); ++__first1; } } } -template <class _Compare, class _InputIterator1, class _InputIterator2, class _OutputIterator> +template <class _AlgPolicy, class _Compare, class _InputIterator1, class _InputIterator2, class _OutputIterator> void __merge_move_assign(_InputIterator1 __first1, _InputIterator1 __last1, _InputIterator2 __first2, _InputIterator2 __last2, _OutputIterator __result, _Compare __comp) { + using _Ops = _IterOps<_AlgPolicy>; + for (; __first1 != __last1; ++__result) { if (__first2 == __last2) { for (; __first1 != __last1; ++__first1, (void) ++__result) - *__result = _VSTD::move(*__first1); + *__result = _Ops::__iter_move(__first1); return; } if (__comp(*__first2, *__first1)) { - *__result = _VSTD::move(*__first2); + *__result = _Ops::__iter_move(__first2); ++__first2; } else { - *__result = _VSTD::move(*__first1); + *__result = _Ops::__iter_move(__first1); ++__first1; } } for (; __first2 != __last2; ++__first2, (void) ++__result) - *__result = _VSTD::move(*__first2); + *__result = _Ops::__iter_move(__first2); } -template <class _Compare, class _RandomAccessIterator> +template <class _AlgPolicy, class _Compare, class _RandomAccessIterator> void __stable_sort(_RandomAccessIterator __first, _RandomAccessIterator __last, _Compare __comp, typename iterator_traits<_RandomAccessIterator>::difference_type __len, typename iterator_traits<_RandomAccessIterator>::value_type* __buff, ptrdiff_t __buff_size); -template <class _Compare, class _RandomAccessIterator> +template <class _AlgPolicy, class _Compare, class _RandomAccessIterator> void __stable_sort_move(_RandomAccessIterator __first1, _RandomAccessIterator __last1, _Compare __comp, typename iterator_traits<_RandomAccessIterator>::difference_type __len, typename iterator_traits<_RandomAccessIterator>::value_type* __first2) { + using _Ops = _IterOps<_AlgPolicy>; + typedef typename iterator_traits<_RandomAccessIterator>::value_type value_type; switch (__len) { case 0: return; case 1: - ::new ((void*)__first2) value_type(_VSTD::move(*__first1)); + ::new ((void*)__first2) value_type(_Ops::__iter_move(__first1)); return; case 2: __destruct_n __d(0); unique_ptr<value_type, __destruct_n&> __h2(__first2, __d); if (__comp(*--__last1, *__first1)) { - ::new ((void*)__first2) value_type(_VSTD::move(*__last1)); + ::new ((void*)__first2) value_type(_Ops::__iter_move(__last1)); __d.template __incr<value_type>(); ++__first2; - ::new ((void*)__first2) value_type(_VSTD::move(*__first1)); + ::new ((void*)__first2) value_type(_Ops::__iter_move(__first1)); } else { - ::new ((void*)__first2) value_type(_VSTD::move(*__first1)); + ::new ((void*)__first2) value_type(_Ops::__iter_move(__first1)); __d.template __incr<value_type>(); ++__first2; - ::new ((void*)__first2) value_type(_VSTD::move(*__last1)); + ::new ((void*)__first2) value_type(_Ops::__iter_move(__last1)); } __h2.release(); return; } if (__len <= 8) { - _VSTD::__insertion_sort_move<_Compare>(__first1, __last1, __first2, __comp); + std::__insertion_sort_move<_AlgPolicy, _Compare>(__first1, __last1, __first2, __comp); return; } typename iterator_traits<_RandomAccessIterator>::difference_type __l2 = __len / 2; _RandomAccessIterator __m = __first1 + __l2; - _VSTD::__stable_sort<_Compare>(__first1, __m, __comp, __l2, __first2, __l2); - _VSTD::__stable_sort<_Compare>(__m, __last1, __comp, __len - __l2, __first2 + __l2, __len - __l2); - _VSTD::__merge_move_construct<_Compare>(__first1, __m, __m, __last1, __first2, __comp); + std::__stable_sort<_AlgPolicy, _Compare>(__first1, __m, __comp, __l2, __first2, __l2); + std::__stable_sort<_AlgPolicy, _Compare>(__m, __last1, __comp, __len - __l2, __first2 + __l2, __len - __l2); + std::__merge_move_construct<_AlgPolicy, _Compare>(__first1, __m, __m, __last1, __first2, __comp); } template <class _Tp> @@ -153,7 +159,7 @@ struct __stable_sort_switch static const unsigned value = 128*is_trivially_copy_assignable<_Tp>::value; }; -template <class _Compare, class _RandomAccessIterator> +template <class _AlgPolicy, class _Compare, class _RandomAccessIterator> void __stable_sort(_RandomAccessIterator __first, _RandomAccessIterator __last, _Compare __comp, typename iterator_traits<_RandomAccessIterator>::difference_type __len, @@ -168,12 +174,12 @@ __stable_sort(_RandomAccessIterator __first, _RandomAccessIterator __last, _Comp return; case 2: if (__comp(*--__last, *__first)) - swap(*__first, *__last); + _IterOps<_AlgPolicy>::iter_swap(__first, __last); return; } if (__len <= static_cast<difference_type>(__stable_sort_switch<value_type>::value)) { - _VSTD::__insertion_sort<_Compare>(__first, __last, __comp); + std::__insertion_sort<_AlgPolicy, _Compare>(__first, __last, __comp); return; } typename iterator_traits<_RandomAccessIterator>::difference_type __l2 = __len / 2; @@ -182,11 +188,12 @@ __stable_sort(_RandomAccessIterator __first, _RandomAccessIterator __last, _Comp { __destruct_n __d(0); unique_ptr<value_type, __destruct_n&> __h2(__buff, __d); - _VSTD::__stable_sort_move<_Compare>(__first, __m, __comp, __l2, __buff); + std::__stable_sort_move<_AlgPolicy, _Compare>(__first, __m, __comp, __l2, __buff); __d.__set(__l2, (value_type*)nullptr); - _VSTD::__stable_sort_move<_Compare>(__m, __last, __comp, __len - __l2, __buff + __l2); + std::__stable_sort_move<_AlgPolicy, _Compare>(__m, __last, __comp, __len - __l2, __buff + __l2); __d.__set(__len, (value_type*)nullptr); - _VSTD::__merge_move_assign<_Compare>(__buff, __buff + __l2, __buff + __l2, __buff + __len, __first, __comp); + std::__merge_move_assign<_AlgPolicy, _Compare>( + __buff, __buff + __l2, __buff + __l2, __buff + __len, __first, __comp); // _VSTD::__merge<_Compare>(move_iterator<value_type*>(__buff), // move_iterator<value_type*>(__buff + __l2), // move_iterator<_RandomAccessIterator>(__buff + __l2), @@ -194,12 +201,12 @@ __stable_sort(_RandomAccessIterator __first, _RandomAccessIterator __last, _Comp // __first, __comp); return; } - _VSTD::__stable_sort<_Compare>(__first, __m, __comp, __l2, __buff, __buff_size); - _VSTD::__stable_sort<_Compare>(__m, __last, __comp, __len - __l2, __buff, __buff_size); - _VSTD::__inplace_merge<_Compare>(__first, __m, __last, __comp, __l2, __len - __l2, __buff, __buff_size); + std::__stable_sort<_AlgPolicy, _Compare>(__first, __m, __comp, __l2, __buff, __buff_size); + std::__stable_sort<_AlgPolicy, _Compare>(__m, __last, __comp, __len - __l2, __buff, __buff_size); + std::__inplace_merge<_AlgPolicy, _Compare>(__first, __m, __last, __comp, __l2, __len - __l2, __buff, __buff_size); } -template <class _RandomAccessIterator, class _Compare> +template <class _AlgPolicy, class _RandomAccessIterator, class _Compare> inline _LIBCPP_HIDE_FROM_ABI void __stable_sort_impl(_RandomAccessIterator __first, _RandomAccessIterator __last, _Compare& __comp) { using value_type = typename iterator_traits<_RandomAccessIterator>::value_type; @@ -217,13 +224,13 @@ _LIBCPP_SUPPRESS_DEPRECATED_POP } using _Comp_ref = typename __comp_ref_type<_Compare>::type; - std::__stable_sort<_Comp_ref>(__first, __last, __comp, __len, __buf.first, __buf.second); + std::__stable_sort<_AlgPolicy, _Comp_ref>(__first, __last, __comp, __len, __buf.first, __buf.second); } template <class _RandomAccessIterator, class _Compare> inline _LIBCPP_HIDE_FROM_ABI void stable_sort(_RandomAccessIterator __first, _RandomAccessIterator __last, _Compare __comp) { - std::__stable_sort_impl(std::move(__first), std::move(__last), __comp); + std::__stable_sort_impl<_ClassicAlgPolicy>(std::move(__first), std::move(__last), __comp); } template <class _RandomAccessIterator> diff --git a/libcxx/include/__algorithm/unwrap_iter.h b/libcxx/include/__algorithm/unwrap_iter.h index 7d1807b7bbf9..fa9a8fbf2dde 100644 --- a/libcxx/include/__algorithm/unwrap_iter.h +++ b/libcxx/include/__algorithm/unwrap_iter.h @@ -12,6 +12,7 @@ #include <__config> #include <__iterator/iterator_traits.h> #include <__memory/pointer_traits.h> +#include <__utility/move.h> #include <type_traits> #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) @@ -20,77 +21,50 @@ _LIBCPP_BEGIN_NAMESPACE_STD -// The job of __unwrap_iter is to lower contiguous iterators (such as -// vector<T>::iterator) into pointers, to reduce the number of template -// instantiations and to enable pointer-based optimizations e.g. in std::copy. -// For iterators that are not contiguous, it must be a no-op. +// TODO: Change the name of __unwrap_iter_impl to something more appropriate +// The job of __unwrap_iter is to remove iterator wrappers (like reverse_iterator or __wrap_iter), +// to reduce the number of template instantiations and to enable pointer-based optimizations e.g. in std::copy. // In debug mode, we don't do this. // -// __unwrap_iter is non-constexpr for user-defined iterators whose -// `to_address` and/or `operator->` is non-constexpr. This is okay; but we -// try to avoid doing __unwrap_iter in constant-evaluated contexts anyway. -// // Some algorithms (e.g. std::copy, but not std::sort) need to convert an -// "unwrapped" result back into a contiguous iterator. Since contiguous iterators -// are random-access, we can do this portably using iterator arithmetic; this -// is the job of __rewrap_iter. +// "unwrapped" result back into the original iterator type. Doing that is the job of __rewrap_iter. +// Default case - we can't unwrap anything template <class _Iter, bool = __is_cpp17_contiguous_iterator<_Iter>::value> struct __unwrap_iter_impl { - static _LIBCPP_CONSTEXPR _Iter - __apply(_Iter __i) _NOEXCEPT { - return __i; - } + static _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR _Iter __rewrap(_Iter, _Iter __iter) { return __iter; } + static _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR _Iter __unwrap(_Iter __i) _NOEXCEPT { return __i; } }; #ifndef _LIBCPP_ENABLE_DEBUG_MODE +// It's a contiguous iterator, so we can use a raw pointer instead template <class _Iter> struct __unwrap_iter_impl<_Iter, true> { - static _LIBCPP_CONSTEXPR decltype(_VSTD::__to_address(declval<_Iter>())) - __apply(_Iter __i) _NOEXCEPT { - return _VSTD::__to_address(__i); - } -}; + using _ToAddressT = decltype(std::__to_address(std::declval<_Iter>())); -#endif // !_LIBCPP_ENABLE_DEBUG_MODE - -template<class _Iter, class _Impl = __unwrap_iter_impl<_Iter> > -inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR -decltype(_Impl::__apply(declval<_Iter>())) -__unwrap_iter(_Iter __i) _NOEXCEPT -{ - return _Impl::__apply(__i); -} - -template <class _OrigIter, class _UnwrappedIter> -struct __rewrap_iter_impl { - static _LIBCPP_CONSTEXPR _OrigIter __apply(_OrigIter __first, _UnwrappedIter __result) { - // Precondition: __result is reachable from __first - // Precondition: _OrigIter is a contiguous iterator - return __first + (__result - std::__unwrap_iter(__first)); + static _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR _Iter __rewrap(_Iter __orig_iter, _ToAddressT __unwrapped_iter) { + return __orig_iter + (__unwrapped_iter - std::__to_address(__orig_iter)); } -}; -template <class _OrigIter> -struct __rewrap_iter_impl<_OrigIter, _OrigIter> { - static _LIBCPP_CONSTEXPR _OrigIter __apply(_OrigIter, _OrigIter __result) { - return __result; + static _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR _ToAddressT __unwrap(_Iter __i) _NOEXCEPT { + return std::__to_address(__i); } }; -template<class _OrigIter> -_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR -_OrigIter __rewrap_iter(_OrigIter, _OrigIter __result) -{ - return __result; +#endif // !_LIBCPP_ENABLE_DEBUG_MODE + +template<class _Iter, + class _Impl = __unwrap_iter_impl<_Iter>, + __enable_if_t<is_copy_constructible<_Iter>::value, int> = 0> +inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_AFTER_CXX11 +decltype(_Impl::__unwrap(std::declval<_Iter>())) __unwrap_iter(_Iter __i) _NOEXCEPT { + return _Impl::__unwrap(__i); } -template<class _OrigIter, class _UnwrappedIter, class _Impl = __rewrap_iter_impl<_OrigIter, _UnwrappedIter> > -_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR -_OrigIter __rewrap_iter(_OrigIter __first, _UnwrappedIter __result) -{ - return _Impl::__apply(__first, __result); +template <class _OrigIter, class _Iter, class _Impl = __unwrap_iter_impl<_OrigIter> > +_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR _OrigIter __rewrap_iter(_OrigIter __orig_iter, _Iter __iter) _NOEXCEPT { + return _Impl::__rewrap(std::move(__orig_iter), std::move(__iter)); } _LIBCPP_END_NAMESPACE_STD diff --git a/libcxx/include/__algorithm/upper_bound.h b/libcxx/include/__algorithm/upper_bound.h index 3fc254873532..1045380bc84e 100644 --- a/libcxx/include/__algorithm/upper_bound.h +++ b/libcxx/include/__algorithm/upper_bound.h @@ -11,10 +11,15 @@ #include <__algorithm/comp.h> #include <__algorithm/half_positive.h> +#include <__algorithm/iterator_operations.h> #include <__config> +#include <__functional/identity.h> +#include <__functional/invoke.h> #include <__iterator/advance.h> #include <__iterator/distance.h> #include <__iterator/iterator_traits.h> +#include <__type_traits/is_copy_constructible.h> +#include <__utility/move.h> #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) # pragma GCC system_header @@ -22,45 +27,40 @@ _LIBCPP_BEGIN_NAMESPACE_STD -template <class _Compare, class _ForwardIterator, class _Tp> -_LIBCPP_CONSTEXPR_AFTER_CXX17 _ForwardIterator -__upper_bound(_ForwardIterator __first, _ForwardIterator __last, const _Tp& __value, _Compare __comp) -{ - typedef typename iterator_traits<_ForwardIterator>::difference_type difference_type; - difference_type __len = _VSTD::distance(__first, __last); - while (__len != 0) - { - difference_type __l2 = _VSTD::__half_positive(__len); - _ForwardIterator __m = __first; - _VSTD::advance(__m, __l2); - if (__comp(__value, *__m)) - __len = __l2; - else - { - __first = ++__m; - __len -= __l2 + 1; - } +template <class _AlgPolicy, class _Compare, class _Iter, class _Sent, class _Tp, class _Proj> +_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_AFTER_CXX17 _Iter +__upper_bound(_Iter __first, _Sent __last, const _Tp& __value, _Compare&& __comp, _Proj&& __proj) { + auto __len = _IterOps<_AlgPolicy>::distance(__first, __last); + while (__len != 0) { + auto __half_len = std::__half_positive(__len); + auto __mid = _IterOps<_AlgPolicy>::next(__first, __half_len); + if (std::__invoke(__comp, __value, std::__invoke(__proj, *__mid))) + __len = __half_len; + else { + __first = ++__mid; + __len -= __half_len + 1; } - return __first; + } + return __first; } template <class _ForwardIterator, class _Tp, class _Compare> -_LIBCPP_NODISCARD_EXT inline -_LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 -_ForwardIterator -upper_bound(_ForwardIterator __first, _ForwardIterator __last, const _Tp& __value, _Compare __comp) -{ - return _VSTD::__upper_bound<_Compare&>(__first, __last, __value, __comp); +_LIBCPP_NODISCARD_EXT inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_AFTER_CXX17 _ForwardIterator +upper_bound(_ForwardIterator __first, _ForwardIterator __last, const _Tp& __value, _Compare __comp) { + static_assert(is_copy_constructible<_ForwardIterator>::value, + "Iterator has to be copy constructible"); + return std::__upper_bound<_ClassicAlgPolicy>( + std::move(__first), std::move(__last), __value, std::move(__comp), std::__identity()); } template <class _ForwardIterator, class _Tp> -_LIBCPP_NODISCARD_EXT inline -_LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 -_ForwardIterator -upper_bound(_ForwardIterator __first, _ForwardIterator __last, const _Tp& __value) -{ - return _VSTD::upper_bound(__first, __last, __value, - __less<_Tp, typename iterator_traits<_ForwardIterator>::value_type>()); +_LIBCPP_NODISCARD_EXT inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_AFTER_CXX17 _ForwardIterator +upper_bound(_ForwardIterator __first, _ForwardIterator __last, const _Tp& __value) { + return std::upper_bound( + std::move(__first), + std::move(__last), + __value, + __less<_Tp, typename iterator_traits<_ForwardIterator>::value_type>()); } _LIBCPP_END_NAMESPACE_STD diff --git a/libcxx/include/__config b/libcxx/include/__config index 22c2ed7fd87b..8c2f7614af53 100644 --- a/libcxx/include/__config +++ b/libcxx/include/__config @@ -214,6 +214,12 @@ # error "libc++ only supports C++03 with Clang-based compilers. Please enable C++11" # endif +# ifdef _LIBCPP_COMPILER_MSVC +# error If you successfully use libc++ with MSVC please tell the libc++ developers and consider upstreaming your \ +changes. We are not aware of anybody using this configuration and know that at least some code is currently broken. \ +If there are users of this configuration we are happy to provide support. +# endif + // FIXME: ABI detection should be done via compiler builtin macros. This // is just a placeholder until Clang implements such macros. For now assume // that Windows compilers pretending to be MSVC++ target the Microsoft ABI, @@ -237,6 +243,19 @@ # define _LIBCPP_ABI_VCRUNTIME # endif +# if __has_feature(experimental_library) +# ifndef _LIBCPP_ENABLE_EXPERIMENTAL +# define _LIBCPP_ENABLE_EXPERIMENTAL +# endif +# endif + +// Incomplete features get their own specific disabling flags. This makes it +// easier to grep for target specific flags once the feature is complete. +# if !defined(_LIBCPP_ENABLE_EXPERIMENTAL) && !defined(_LIBCPP_BUILDING_LIBRARY) +# define _LIBCPP_HAS_NO_INCOMPLETE_FORMAT +# define _LIBCPP_HAS_NO_INCOMPLETE_RANGES +# endif + // Need to detect which libc we're using if we're on Linux. # if defined(__linux__) # include <features.h> @@ -545,11 +564,15 @@ typedef __char32_t char32_t; # define _LIBCPP_TYPE_VIS _LIBCPP_VISIBILITY("default") # define _LIBCPP_TEMPLATE_DATA_VIS _LIBCPP_VISIBILITY("default") # define _LIBCPP_EXPORTED_FROM_ABI _LIBCPP_VISIBILITY("default") -# define _LIBCPP_OVERRIDABLE_FUNC_VIS _LIBCPP_VISIBILITY("default") # define _LIBCPP_EXCEPTION_ABI _LIBCPP_VISIBILITY("default") # define _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS _LIBCPP_VISIBILITY("default") # define _LIBCPP_CLASS_TEMPLATE_INSTANTIATION_VIS +// TODO: Make this a proper customization point or remove the option to override it. +# ifndef _LIBCPP_OVERRIDABLE_FUNC_VIS +# define _LIBCPP_OVERRIDABLE_FUNC_VIS _LIBCPP_VISIBILITY("default") +# endif + # if !defined(_LIBCPP_DISABLE_VISIBILITY_ANNOTATIONS) // The inline should be removed once PR32114 is resolved # define _LIBCPP_METHOD_TEMPLATE_IMPLICIT_INSTANTIATION_VIS inline _LIBCPP_HIDDEN diff --git a/libcxx/include/__debug_utils/randomize_range.h b/libcxx/include/__debug_utils/randomize_range.h index fd5b9e588493..9843709019d4 100644 --- a/libcxx/include/__debug_utils/randomize_range.h +++ b/libcxx/include/__debug_utils/randomize_range.h @@ -22,15 +22,16 @@ _LIBCPP_BEGIN_NAMESPACE_STD -template <class _Iterator> -_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_AFTER_CXX11 void __debug_randomize_range(_Iterator __first, _Iterator __last) { +template <class _AlgPolicy, class _Iterator, class _Sentinel> +_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_AFTER_CXX11 +void __debug_randomize_range(_Iterator __first, _Sentinel __last) { #ifdef _LIBCPP_DEBUG_RANDOMIZE_UNSPECIFIED_STABILITY # ifdef _LIBCPP_CXX03_LANG # error Support for unspecified stability is only for C++11 and higher # endif if (!__libcpp_is_constant_evaluated()) - std::shuffle(__first, __last, __libcpp_debug_randomizer()); + std::__shuffle<_AlgPolicy>(__first, __last, __libcpp_debug_randomizer()); #else (void)__first; (void)__last; diff --git a/libcxx/include/__format/extended_grapheme_cluster_table.h b/libcxx/include/__format/extended_grapheme_cluster_table.h new file mode 100644 index 000000000000..00cd0e91cd15 --- /dev/null +++ b/libcxx/include/__format/extended_grapheme_cluster_table.h @@ -0,0 +1,332 @@ +// -*- C++ -*- +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// WARNING, this entire header is generated by +// utiles/generate_extended_grapheme_cluster_table.py +// DO NOT MODIFY! + +// UNICODE, INC. LICENSE AGREEMENT - DATA FILES AND SOFTWARE +// +// See Terms of Use <https://www.unicode.org/copyright.html> +// for definitions of Unicode Inc.'s Data Files and Software. +// +// NOTICE TO USER: Carefully read the following legal agreement. +// BY DOWNLOADING, INSTALLING, COPYING OR OTHERWISE USING UNICODE INC.'S +// DATA FILES ("DATA FILES"), AND/OR SOFTWARE ("SOFTWARE"), +// YOU UNEQUIVOCALLY ACCEPT, AND AGREE TO BE BOUND BY, ALL OF THE +// TERMS AND CONDITIONS OF THIS AGREEMENT. +// IF YOU DO NOT AGREE, DO NOT DOWNLOAD, INSTALL, COPY, DISTRIBUTE OR USE +// THE DATA FILES OR SOFTWARE. +// +// COPYRIGHT AND PERMISSION NOTICE +// +// Copyright (c) 1991-2022 Unicode, Inc. All rights reserved. +// Distributed under the Terms of Use in https://www.unicode.org/copyright.html. +// +// Permission is hereby granted, free of charge, to any person obtaining +// a copy of the Unicode data files and any associated documentation +// (the "Data Files") or Unicode software and any associated documentation +// (the "Software") to deal in the Data Files or Software +// without restriction, including without limitation the rights to use, +// copy, modify, merge, publish, distribute, and/or sell copies of +// the Data Files or Software, and to permit persons to whom the Data Files +// or Software are furnished to do so, provided that either +// (a) this copyright and permission notice appear with all copies +// of the Data Files or Software, or +// (b) this copyright and permission notice appear in associated +// Documentation. +// +// THE DATA FILES AND SOFTWARE ARE PROVIDED "AS IS", WITHOUT WARRANTY OF +// ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE +// WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +// NONINFRINGEMENT OF THIRD PARTY RIGHTS. +// IN NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS INCLUDED IN THIS +// NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL +// DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, +// DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER +// TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR +// PERFORMANCE OF THE DATA FILES OR SOFTWARE. +// +// Except as contained in this notice, the name of a copyright holder +// shall not be used in advertising or otherwise to promote the sale, +// use or other dealings in these Data Files or Software without prior +// written authorization of the copyright holder. + +#ifndef _LIBCPP___FORMAT_EXTENDED_GRAPHEME_CLUSTER_TABLE_H +#define _LIBCPP___FORMAT_EXTENDED_GRAPHEME_CLUSTER_TABLE_H + +#include <__algorithm/upper_bound.h> +#include <__config> +#include <__iterator/access.h> +#include <cstddef> +#include <cstdint> + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +# pragma GCC system_header +#endif + +_LIBCPP_BEGIN_NAMESPACE_STD + +#if _LIBCPP_STD_VER > 17 + +namespace __extended_grapheme_custer_property_boundary { + +enum class __property : uint8_t { + // Values generated from the data files. + __CR, + __Control, + __Extend, + __Extended_Pictographic, + __L, + __LF, + __LV, + __LVT, + __Prepend, + __Regional_Indicator, + __SpacingMark, + __T, + __V, + __ZWJ, + + // The properies below aren't stored in the "database". + + // Text position properties. + __sot, + __eot, + + // The code unit has none of above properties. + __none +}; + +/// The entries of the extended grapheme cluster bondary property table. +/// +/// The data is generated from +/// - https://www.unicode.org/Public/UCD/latest/ucd/auxiliary/GraphemeBreakProperty.txt +/// - https://www.unicode.org/Public/UCD/latest/ucd/emoji/emoji-data.txt +/// +/// The data has 3 values +/// - bits [0, 3] The property. One of the values generated form the datafiles +/// of \ref __property +/// - bits [4, 10] The size of the range. +/// - bits [11, 31] The lower bound code point of the range. The upper bound of +/// the range is lower bound + size. +/// +/// The 7 bits for the size allow a maximum range of 128 elements. Some ranges +/// in the Unicode tables are larger. They are stored in multiple consecutive +/// ranges in the data table. An alternative would be to store the sizes in a +/// separate 16-bit value. The original MSVC STL code had such an approach, but +/// this approach uses less space for the data and is about 4% faster in the +/// following benchmark. +/// libcxx/benchmarks/std_format_spec_string_unicode.bench.cpp +inline constexpr uint32_t __entries[1480] = { + 0x00000091, 0x00005005, 0x00005811, 0x00006800, 0x00007111, 0x0003fa01, 0x00054803, 0x00056801, 0x00057003, + 0x001806f2, 0x00241862, 0x002c8ac2, 0x002df802, 0x002e0812, 0x002e2012, 0x002e3802, 0x00300058, 0x003080a2, + 0x0030e001, 0x00325942, 0x00338002, 0x0036b062, 0x0036e808, 0x0036f852, 0x00373812, 0x00375032, 0x00387808, + 0x00388802, 0x003981a2, 0x003d30a2, 0x003f5882, 0x003fe802, 0x0040b032, 0x0040d882, 0x00412822, 0x00414842, + 0x0042c822, 0x00448018, 0x0044c072, 0x00465172, 0x00471008, 0x004719f2, 0x0048180a, 0x0049d002, 0x0049d80a, + 0x0049e002, 0x0049f02a, 0x004a0872, 0x004a483a, 0x004a6802, 0x004a701a, 0x004a8862, 0x004b1012, 0x004c0802, + 0x004c101a, 0x004de002, 0x004df002, 0x004df81a, 0x004e0832, 0x004e381a, 0x004e581a, 0x004e6802, 0x004eb802, + 0x004f1012, 0x004ff002, 0x00500812, 0x0050180a, 0x0051e002, 0x0051f02a, 0x00520812, 0x00523812, 0x00525822, + 0x00528802, 0x00538012, 0x0053a802, 0x00540812, 0x0054180a, 0x0055e002, 0x0055f02a, 0x00560842, 0x00563812, + 0x0056480a, 0x0056581a, 0x00566802, 0x00571012, 0x0057d052, 0x00580802, 0x0058101a, 0x0059e002, 0x0059f012, + 0x005a000a, 0x005a0832, 0x005a381a, 0x005a581a, 0x005a6802, 0x005aa822, 0x005b1012, 0x005c1002, 0x005df002, + 0x005df80a, 0x005e0002, 0x005e081a, 0x005e302a, 0x005e502a, 0x005e6802, 0x005eb802, 0x00600002, 0x0060082a, + 0x00602002, 0x0061e002, 0x0061f022, 0x0062083a, 0x00623022, 0x00625032, 0x0062a812, 0x00631012, 0x00640802, + 0x0064101a, 0x0065e002, 0x0065f00a, 0x0065f802, 0x0066001a, 0x00661002, 0x0066181a, 0x00663002, 0x0066381a, + 0x0066501a, 0x00666012, 0x0066a812, 0x00671012, 0x00680012, 0x0068101a, 0x0069d812, 0x0069f002, 0x0069f81a, + 0x006a0832, 0x006a302a, 0x006a502a, 0x006a6802, 0x006a7008, 0x006ab802, 0x006b1012, 0x006c0802, 0x006c101a, + 0x006e5002, 0x006e7802, 0x006e801a, 0x006e9022, 0x006eb002, 0x006ec06a, 0x006ef802, 0x006f901a, 0x00718802, + 0x0071980a, 0x0071a062, 0x00723872, 0x00758802, 0x0075980a, 0x0075a082, 0x00764052, 0x0078c012, 0x0079a802, + 0x0079b802, 0x0079c802, 0x0079f01a, 0x007b88d2, 0x007bf80a, 0x007c0042, 0x007c3012, 0x007c68a2, 0x007cca32, + 0x007e3002, 0x00816832, 0x0081880a, 0x00819052, 0x0081c812, 0x0081d81a, 0x0081e812, 0x0082b01a, 0x0082c012, + 0x0082f022, 0x00838832, 0x00841002, 0x0084200a, 0x00842812, 0x00846802, 0x0084e802, 0x008805f4, 0x008b047c, + 0x008d457b, 0x009ae822, 0x00b89022, 0x00b8a80a, 0x00b99012, 0x00b9a00a, 0x00ba9012, 0x00bb9012, 0x00bda012, + 0x00bdb00a, 0x00bdb862, 0x00bdf07a, 0x00be3002, 0x00be381a, 0x00be48a2, 0x00bee802, 0x00c05822, 0x00c07001, + 0x00c07802, 0x00c42812, 0x00c54802, 0x00c90022, 0x00c9183a, 0x00c93812, 0x00c9482a, 0x00c9801a, 0x00c99002, + 0x00c9985a, 0x00c9c822, 0x00d0b812, 0x00d0c81a, 0x00d0d802, 0x00d2a80a, 0x00d2b002, 0x00d2b80a, 0x00d2c062, + 0x00d30002, 0x00d31002, 0x00d32872, 0x00d3685a, 0x00d39892, 0x00d3f802, 0x00d581e2, 0x00d80032, 0x00d8200a, + 0x00d9a062, 0x00d9d80a, 0x00d9e002, 0x00d9e84a, 0x00da1002, 0x00da181a, 0x00db5882, 0x00dc0012, 0x00dc100a, + 0x00dd080a, 0x00dd1032, 0x00dd301a, 0x00dd4012, 0x00dd500a, 0x00dd5822, 0x00df3002, 0x00df380a, 0x00df4012, + 0x00df502a, 0x00df6802, 0x00df700a, 0x00df7822, 0x00df901a, 0x00e1207a, 0x00e16072, 0x00e1a01a, 0x00e1b012, + 0x00e68022, 0x00e6a0c2, 0x00e7080a, 0x00e71062, 0x00e76802, 0x00e7a002, 0x00e7b80a, 0x00e7c012, 0x00ee03f2, + 0x01005801, 0x01006002, 0x0100680d, 0x01007011, 0x01014061, 0x0101e003, 0x01024803, 0x010300f1, 0x01068202, + 0x01091003, 0x0109c803, 0x010ca053, 0x010d4813, 0x0118d013, 0x01194003, 0x011c4003, 0x011e7803, 0x011f48a3, + 0x011fc023, 0x01261003, 0x012d5013, 0x012db003, 0x012e0003, 0x012fd833, 0x01300053, 0x013038b3, 0x0130a713, + 0x01348753, 0x013840a3, 0x0138a003, 0x0138b003, 0x0138e803, 0x01390803, 0x01394003, 0x01399813, 0x013a2003, + 0x013a3803, 0x013a6003, 0x013a7003, 0x013a9823, 0x013ab803, 0x013b1843, 0x013ca823, 0x013d0803, 0x013d8003, + 0x013df803, 0x0149a013, 0x01582823, 0x0158d813, 0x015a8003, 0x015aa803, 0x01677822, 0x016bf802, 0x016f01f2, + 0x01815052, 0x01818003, 0x0181e803, 0x0184c812, 0x0194b803, 0x0194c803, 0x05337832, 0x0533a092, 0x0534f012, + 0x05378012, 0x05401002, 0x05403002, 0x05405802, 0x0541181a, 0x05412812, 0x0541380a, 0x05416002, 0x0544001a, + 0x0545a0fa, 0x05462012, 0x05470112, 0x0547f802, 0x05493072, 0x054a38a2, 0x054a901a, 0x054b01c4, 0x054c0022, + 0x054c180a, 0x054d9802, 0x054da01a, 0x054db032, 0x054dd01a, 0x054de012, 0x054df02a, 0x054f2802, 0x05514852, + 0x0551781a, 0x05518812, 0x0551981a, 0x0551a812, 0x05521802, 0x05526002, 0x0552680a, 0x0553e002, 0x05558002, + 0x05559022, 0x0555b812, 0x0555f012, 0x05560802, 0x0557580a, 0x05576012, 0x0557701a, 0x0557a80a, 0x0557b002, + 0x055f181a, 0x055f2802, 0x055f301a, 0x055f4002, 0x055f481a, 0x055f600a, 0x055f6802, 0x05600006, 0x056009a7, + 0x0560e006, 0x0560e9a7, 0x0561c006, 0x0561c9a7, 0x0562a006, 0x0562a9a7, 0x05638006, 0x056389a7, 0x05646006, + 0x056469a7, 0x05654006, 0x056549a7, 0x05662006, 0x056629a7, 0x05670006, 0x056709a7, 0x0567e006, 0x0567e9a7, + 0x0568c006, 0x0568c9a7, 0x0569a006, 0x0569a9a7, 0x056a8006, 0x056a89a7, 0x056b6006, 0x056b69a7, 0x056c4006, + 0x056c49a7, 0x056d2006, 0x056d29a7, 0x056e0006, 0x056e09a7, 0x056ee006, 0x056ee9a7, 0x056fc006, 0x056fc9a7, + 0x0570a006, 0x0570a9a7, 0x05718006, 0x057189a7, 0x05726006, 0x057269a7, 0x05734006, 0x057349a7, 0x05742006, + 0x057429a7, 0x05750006, 0x057509a7, 0x0575e006, 0x0575e9a7, 0x0576c006, 0x0576c9a7, 0x0577a006, 0x0577a9a7, + 0x05788006, 0x057889a7, 0x05796006, 0x057969a7, 0x057a4006, 0x057a49a7, 0x057b2006, 0x057b29a7, 0x057c0006, + 0x057c09a7, 0x057ce006, 0x057ce9a7, 0x057dc006, 0x057dc9a7, 0x057ea006, 0x057ea9a7, 0x057f8006, 0x057f89a7, + 0x05806006, 0x058069a7, 0x05814006, 0x058149a7, 0x05822006, 0x058229a7, 0x05830006, 0x058309a7, 0x0583e006, + 0x0583e9a7, 0x0584c006, 0x0584c9a7, 0x0585a006, 0x0585a9a7, 0x05868006, 0x058689a7, 0x05876006, 0x058769a7, + 0x05884006, 0x058849a7, 0x05892006, 0x058929a7, 0x058a0006, 0x058a09a7, 0x058ae006, 0x058ae9a7, 0x058bc006, + 0x058bc9a7, 0x058ca006, 0x058ca9a7, 0x058d8006, 0x058d89a7, 0x058e6006, 0x058e69a7, 0x058f4006, 0x058f49a7, + 0x05902006, 0x059029a7, 0x05910006, 0x059109a7, 0x0591e006, 0x0591e9a7, 0x0592c006, 0x0592c9a7, 0x0593a006, + 0x0593a9a7, 0x05948006, 0x059489a7, 0x05956006, 0x059569a7, 0x05964006, 0x059649a7, 0x05972006, 0x059729a7, + 0x05980006, 0x059809a7, 0x0598e006, 0x0598e9a7, 0x0599c006, 0x0599c9a7, 0x059aa006, 0x059aa9a7, 0x059b8006, + 0x059b89a7, 0x059c6006, 0x059c69a7, 0x059d4006, 0x059d49a7, 0x059e2006, 0x059e29a7, 0x059f0006, 0x059f09a7, + 0x059fe006, 0x059fe9a7, 0x05a0c006, 0x05a0c9a7, 0x05a1a006, 0x05a1a9a7, 0x05a28006, 0x05a289a7, 0x05a36006, + 0x05a369a7, 0x05a44006, 0x05a449a7, 0x05a52006, 0x05a529a7, 0x05a60006, 0x05a609a7, 0x05a6e006, 0x05a6e9a7, + 0x05a7c006, 0x05a7c9a7, 0x05a8a006, 0x05a8a9a7, 0x05a98006, 0x05a989a7, 0x05aa6006, 0x05aa69a7, 0x05ab4006, + 0x05ab49a7, 0x05ac2006, 0x05ac29a7, 0x05ad0006, 0x05ad09a7, 0x05ade006, 0x05ade9a7, 0x05aec006, 0x05aec9a7, + 0x05afa006, 0x05afa9a7, 0x05b08006, 0x05b089a7, 0x05b16006, 0x05b169a7, 0x05b24006, 0x05b249a7, 0x05b32006, + 0x05b329a7, 0x05b40006, 0x05b409a7, 0x05b4e006, 0x05b4e9a7, 0x05b5c006, 0x05b5c9a7, 0x05b6a006, 0x05b6a9a7, + 0x05b78006, 0x05b789a7, 0x05b86006, 0x05b869a7, 0x05b94006, 0x05b949a7, 0x05ba2006, 0x05ba29a7, 0x05bb0006, + 0x05bb09a7, 0x05bbe006, 0x05bbe9a7, 0x05bcc006, 0x05bcc9a7, 0x05bda006, 0x05bda9a7, 0x05be8006, 0x05be89a7, + 0x05bf6006, 0x05bf69a7, 0x05c04006, 0x05c049a7, 0x05c12006, 0x05c129a7, 0x05c20006, 0x05c209a7, 0x05c2e006, + 0x05c2e9a7, 0x05c3c006, 0x05c3c9a7, 0x05c4a006, 0x05c4a9a7, 0x05c58006, 0x05c589a7, 0x05c66006, 0x05c669a7, + 0x05c74006, 0x05c749a7, 0x05c82006, 0x05c829a7, 0x05c90006, 0x05c909a7, 0x05c9e006, 0x05c9e9a7, 0x05cac006, + 0x05cac9a7, 0x05cba006, 0x05cba9a7, 0x05cc8006, 0x05cc89a7, 0x05cd6006, 0x05cd69a7, 0x05ce4006, 0x05ce49a7, + 0x05cf2006, 0x05cf29a7, 0x05d00006, 0x05d009a7, 0x05d0e006, 0x05d0e9a7, 0x05d1c006, 0x05d1c9a7, 0x05d2a006, + 0x05d2a9a7, 0x05d38006, 0x05d389a7, 0x05d46006, 0x05d469a7, 0x05d54006, 0x05d549a7, 0x05d62006, 0x05d629a7, + 0x05d70006, 0x05d709a7, 0x05d7e006, 0x05d7e9a7, 0x05d8c006, 0x05d8c9a7, 0x05d9a006, 0x05d9a9a7, 0x05da8006, + 0x05da89a7, 0x05db6006, 0x05db69a7, 0x05dc4006, 0x05dc49a7, 0x05dd2006, 0x05dd29a7, 0x05de0006, 0x05de09a7, + 0x05dee006, 0x05dee9a7, 0x05dfc006, 0x05dfc9a7, 0x05e0a006, 0x05e0a9a7, 0x05e18006, 0x05e189a7, 0x05e26006, + 0x05e269a7, 0x05e34006, 0x05e349a7, 0x05e42006, 0x05e429a7, 0x05e50006, 0x05e509a7, 0x05e5e006, 0x05e5e9a7, + 0x05e6c006, 0x05e6c9a7, 0x05e7a006, 0x05e7a9a7, 0x05e88006, 0x05e889a7, 0x05e96006, 0x05e969a7, 0x05ea4006, + 0x05ea49a7, 0x05eb2006, 0x05eb29a7, 0x05ec0006, 0x05ec09a7, 0x05ece006, 0x05ece9a7, 0x05edc006, 0x05edc9a7, + 0x05eea006, 0x05eea9a7, 0x05ef8006, 0x05ef89a7, 0x05f06006, 0x05f069a7, 0x05f14006, 0x05f149a7, 0x05f22006, + 0x05f229a7, 0x05f30006, 0x05f309a7, 0x05f3e006, 0x05f3e9a7, 0x05f4c006, 0x05f4c9a7, 0x05f5a006, 0x05f5a9a7, + 0x05f68006, 0x05f689a7, 0x05f76006, 0x05f769a7, 0x05f84006, 0x05f849a7, 0x05f92006, 0x05f929a7, 0x05fa0006, + 0x05fa09a7, 0x05fae006, 0x05fae9a7, 0x05fbc006, 0x05fbc9a7, 0x05fca006, 0x05fca9a7, 0x05fd8006, 0x05fd89a7, + 0x05fe6006, 0x05fe69a7, 0x05ff4006, 0x05ff49a7, 0x06002006, 0x060029a7, 0x06010006, 0x060109a7, 0x0601e006, + 0x0601e9a7, 0x0602c006, 0x0602c9a7, 0x0603a006, 0x0603a9a7, 0x06048006, 0x060489a7, 0x06056006, 0x060569a7, + 0x06064006, 0x060649a7, 0x06072006, 0x060729a7, 0x06080006, 0x060809a7, 0x0608e006, 0x0608e9a7, 0x0609c006, + 0x0609c9a7, 0x060aa006, 0x060aa9a7, 0x060b8006, 0x060b89a7, 0x060c6006, 0x060c69a7, 0x060d4006, 0x060d49a7, + 0x060e2006, 0x060e29a7, 0x060f0006, 0x060f09a7, 0x060fe006, 0x060fe9a7, 0x0610c006, 0x0610c9a7, 0x0611a006, + 0x0611a9a7, 0x06128006, 0x061289a7, 0x06136006, 0x061369a7, 0x06144006, 0x061449a7, 0x06152006, 0x061529a7, + 0x06160006, 0x061609a7, 0x0616e006, 0x0616e9a7, 0x0617c006, 0x0617c9a7, 0x0618a006, 0x0618a9a7, 0x06198006, + 0x061989a7, 0x061a6006, 0x061a69a7, 0x061b4006, 0x061b49a7, 0x061c2006, 0x061c29a7, 0x061d0006, 0x061d09a7, + 0x061de006, 0x061de9a7, 0x061ec006, 0x061ec9a7, 0x061fa006, 0x061fa9a7, 0x06208006, 0x062089a7, 0x06216006, + 0x062169a7, 0x06224006, 0x062249a7, 0x06232006, 0x062329a7, 0x06240006, 0x062409a7, 0x0624e006, 0x0624e9a7, + 0x0625c006, 0x0625c9a7, 0x0626a006, 0x0626a9a7, 0x06278006, 0x062789a7, 0x06286006, 0x062869a7, 0x06294006, + 0x062949a7, 0x062a2006, 0x062a29a7, 0x062b0006, 0x062b09a7, 0x062be006, 0x062be9a7, 0x062cc006, 0x062cc9a7, + 0x062da006, 0x062da9a7, 0x062e8006, 0x062e89a7, 0x062f6006, 0x062f69a7, 0x06304006, 0x063049a7, 0x06312006, + 0x063129a7, 0x06320006, 0x063209a7, 0x0632e006, 0x0632e9a7, 0x0633c006, 0x0633c9a7, 0x0634a006, 0x0634a9a7, + 0x06358006, 0x063589a7, 0x06366006, 0x063669a7, 0x06374006, 0x063749a7, 0x06382006, 0x063829a7, 0x06390006, + 0x063909a7, 0x0639e006, 0x0639e9a7, 0x063ac006, 0x063ac9a7, 0x063ba006, 0x063ba9a7, 0x063c8006, 0x063c89a7, + 0x063d6006, 0x063d69a7, 0x063e4006, 0x063e49a7, 0x063f2006, 0x063f29a7, 0x06400006, 0x064009a7, 0x0640e006, + 0x0640e9a7, 0x0641c006, 0x0641c9a7, 0x0642a006, 0x0642a9a7, 0x06438006, 0x064389a7, 0x06446006, 0x064469a7, + 0x06454006, 0x064549a7, 0x06462006, 0x064629a7, 0x06470006, 0x064709a7, 0x0647e006, 0x0647e9a7, 0x0648c006, + 0x0648c9a7, 0x0649a006, 0x0649a9a7, 0x064a8006, 0x064a89a7, 0x064b6006, 0x064b69a7, 0x064c4006, 0x064c49a7, + 0x064d2006, 0x064d29a7, 0x064e0006, 0x064e09a7, 0x064ee006, 0x064ee9a7, 0x064fc006, 0x064fc9a7, 0x0650a006, + 0x0650a9a7, 0x06518006, 0x065189a7, 0x06526006, 0x065269a7, 0x06534006, 0x065349a7, 0x06542006, 0x065429a7, + 0x06550006, 0x065509a7, 0x0655e006, 0x0655e9a7, 0x0656c006, 0x0656c9a7, 0x0657a006, 0x0657a9a7, 0x06588006, + 0x065889a7, 0x06596006, 0x065969a7, 0x065a4006, 0x065a49a7, 0x065b2006, 0x065b29a7, 0x065c0006, 0x065c09a7, + 0x065ce006, 0x065ce9a7, 0x065dc006, 0x065dc9a7, 0x065ea006, 0x065ea9a7, 0x065f8006, 0x065f89a7, 0x06606006, + 0x066069a7, 0x06614006, 0x066149a7, 0x06622006, 0x066229a7, 0x06630006, 0x066309a7, 0x0663e006, 0x0663e9a7, + 0x0664c006, 0x0664c9a7, 0x0665a006, 0x0665a9a7, 0x06668006, 0x066689a7, 0x06676006, 0x066769a7, 0x06684006, + 0x066849a7, 0x06692006, 0x066929a7, 0x066a0006, 0x066a09a7, 0x066ae006, 0x066ae9a7, 0x066bc006, 0x066bc9a7, + 0x066ca006, 0x066ca9a7, 0x066d8006, 0x066d89a7, 0x066e6006, 0x066e69a7, 0x066f4006, 0x066f49a7, 0x06702006, + 0x067029a7, 0x06710006, 0x067109a7, 0x0671e006, 0x0671e9a7, 0x0672c006, 0x0672c9a7, 0x0673a006, 0x0673a9a7, + 0x06748006, 0x067489a7, 0x06756006, 0x067569a7, 0x06764006, 0x067649a7, 0x06772006, 0x067729a7, 0x06780006, + 0x067809a7, 0x0678e006, 0x0678e9a7, 0x0679c006, 0x0679c9a7, 0x067aa006, 0x067aa9a7, 0x067b8006, 0x067b89a7, + 0x067c6006, 0x067c69a7, 0x067d4006, 0x067d49a7, 0x067e2006, 0x067e29a7, 0x067f0006, 0x067f09a7, 0x067fe006, + 0x067fe9a7, 0x0680c006, 0x0680c9a7, 0x0681a006, 0x0681a9a7, 0x06828006, 0x068289a7, 0x06836006, 0x068369a7, + 0x06844006, 0x068449a7, 0x06852006, 0x068529a7, 0x06860006, 0x068609a7, 0x0686e006, 0x0686e9a7, 0x0687c006, + 0x0687c9a7, 0x0688a006, 0x0688a9a7, 0x06898006, 0x068989a7, 0x068a6006, 0x068a69a7, 0x068b4006, 0x068b49a7, + 0x068c2006, 0x068c29a7, 0x068d0006, 0x068d09a7, 0x068de006, 0x068de9a7, 0x068ec006, 0x068ec9a7, 0x068fa006, + 0x068fa9a7, 0x06908006, 0x069089a7, 0x06916006, 0x069169a7, 0x06924006, 0x069249a7, 0x06932006, 0x069329a7, + 0x06940006, 0x069409a7, 0x0694e006, 0x0694e9a7, 0x0695c006, 0x0695c9a7, 0x0696a006, 0x0696a9a7, 0x06978006, + 0x069789a7, 0x06986006, 0x069869a7, 0x06994006, 0x069949a7, 0x069a2006, 0x069a29a7, 0x069b0006, 0x069b09a7, + 0x069be006, 0x069be9a7, 0x069cc006, 0x069cc9a7, 0x069da006, 0x069da9a7, 0x069e8006, 0x069e89a7, 0x069f6006, + 0x069f69a7, 0x06a04006, 0x06a049a7, 0x06a12006, 0x06a129a7, 0x06a20006, 0x06a209a7, 0x06a2e006, 0x06a2e9a7, + 0x06a3c006, 0x06a3c9a7, 0x06a4a006, 0x06a4a9a7, 0x06a58006, 0x06a589a7, 0x06a66006, 0x06a669a7, 0x06a74006, + 0x06a749a7, 0x06a82006, 0x06a829a7, 0x06a90006, 0x06a909a7, 0x06a9e006, 0x06a9e9a7, 0x06aac006, 0x06aac9a7, + 0x06aba006, 0x06aba9a7, 0x06ac8006, 0x06ac89a7, 0x06ad6006, 0x06ad69a7, 0x06ae4006, 0x06ae49a7, 0x06af2006, + 0x06af29a7, 0x06b00006, 0x06b009a7, 0x06b0e006, 0x06b0e9a7, 0x06b1c006, 0x06b1c9a7, 0x06b2a006, 0x06b2a9a7, + 0x06b38006, 0x06b389a7, 0x06b46006, 0x06b469a7, 0x06b54006, 0x06b549a7, 0x06b62006, 0x06b629a7, 0x06b70006, + 0x06b709a7, 0x06b7e006, 0x06b7e9a7, 0x06b8c006, 0x06b8c9a7, 0x06b9a006, 0x06b9a9a7, 0x06ba8006, 0x06ba89a7, + 0x06bb6006, 0x06bb69a7, 0x06bc4006, 0x06bc49a7, 0x06bd816c, 0x06be5b0b, 0x07d8f002, 0x07f000f2, 0x07f100f2, + 0x07f7f801, 0x07fcf012, 0x07ff80b1, 0x080fe802, 0x08170002, 0x081bb042, 0x08500822, 0x08502812, 0x08506032, + 0x0851c022, 0x0851f802, 0x08572812, 0x08692032, 0x08755812, 0x087a30a2, 0x087c1032, 0x0880000a, 0x08800802, + 0x0880100a, 0x0881c0e2, 0x08838002, 0x08839812, 0x0883f822, 0x0884100a, 0x0885802a, 0x08859832, 0x0885b81a, + 0x0885c812, 0x0885e808, 0x08861002, 0x08866808, 0x08880022, 0x08893842, 0x0889600a, 0x08896872, 0x088a281a, + 0x088b9802, 0x088c0012, 0x088c100a, 0x088d982a, 0x088db082, 0x088df81a, 0x088e1018, 0x088e4832, 0x088e700a, + 0x088e7802, 0x0891602a, 0x08917822, 0x0891901a, 0x0891a002, 0x0891a80a, 0x0891b012, 0x0891f002, 0x0896f802, + 0x0897002a, 0x08971872, 0x08980012, 0x0898101a, 0x0899d812, 0x0899f002, 0x0899f80a, 0x089a0002, 0x089a083a, + 0x089a381a, 0x089a582a, 0x089ab802, 0x089b101a, 0x089b3062, 0x089b8042, 0x08a1a82a, 0x08a1c072, 0x08a2001a, + 0x08a21022, 0x08a2280a, 0x08a23002, 0x08a2f002, 0x08a58002, 0x08a5881a, 0x08a59852, 0x08a5c80a, 0x08a5d002, + 0x08a5d81a, 0x08a5e802, 0x08a5f00a, 0x08a5f812, 0x08a6080a, 0x08a61012, 0x08ad7802, 0x08ad801a, 0x08ad9032, + 0x08adc03a, 0x08ade012, 0x08adf00a, 0x08adf812, 0x08aee012, 0x08b1802a, 0x08b19872, 0x08b1d81a, 0x08b1e802, + 0x08b1f00a, 0x08b1f812, 0x08b55802, 0x08b5600a, 0x08b56802, 0x08b5701a, 0x08b58052, 0x08b5b00a, 0x08b5b802, + 0x08b8e822, 0x08b91032, 0x08b9300a, 0x08b93842, 0x08c1602a, 0x08c17882, 0x08c1c00a, 0x08c1c812, 0x08c98002, + 0x08c9884a, 0x08c9b81a, 0x08c9d812, 0x08c9e80a, 0x08c9f002, 0x08c9f808, 0x08ca000a, 0x08ca0808, 0x08ca100a, + 0x08ca1802, 0x08ce882a, 0x08cea032, 0x08ced012, 0x08cee03a, 0x08cf0002, 0x08cf200a, 0x08d00892, 0x08d19852, + 0x08d1c80a, 0x08d1d008, 0x08d1d832, 0x08d23802, 0x08d28852, 0x08d2b81a, 0x08d2c822, 0x08d42058, 0x08d450c2, + 0x08d4b80a, 0x08d4c012, 0x08e1780a, 0x08e18062, 0x08e1c052, 0x08e1f00a, 0x08e1f802, 0x08e49152, 0x08e5480a, + 0x08e55062, 0x08e5880a, 0x08e59012, 0x08e5a00a, 0x08e5a812, 0x08e98852, 0x08e9d002, 0x08e9e012, 0x08e9f862, + 0x08ea3008, 0x08ea3802, 0x08ec504a, 0x08ec8012, 0x08ec981a, 0x08eca802, 0x08ecb00a, 0x08ecb802, 0x08f79812, + 0x08f7a81a, 0x09a18081, 0x0b578042, 0x0b598062, 0x0b7a7802, 0x0b7a8b6a, 0x0b7c7832, 0x0b7f2002, 0x0b7f801a, + 0x0de4e812, 0x0de50031, 0x0e7802d2, 0x0e798162, 0x0e8b2802, 0x0e8b300a, 0x0e8b3822, 0x0e8b680a, 0x0e8b7042, + 0x0e8b9871, 0x0e8bd872, 0x0e8c2862, 0x0e8d5032, 0x0e921022, 0x0ed00362, 0x0ed1db12, 0x0ed3a802, 0x0ed42002, + 0x0ed4d842, 0x0ed508e2, 0x0f000062, 0x0f004102, 0x0f00d862, 0x0f011812, 0x0f013042, 0x0f098062, 0x0f157002, + 0x0f176032, 0x0f468062, 0x0f4a2062, 0x0f8007f3, 0x0f8407f3, 0x0f886823, 0x0f897803, 0x0f8b6053, 0x0f8bf013, + 0x0f8c7003, 0x0f8c8893, 0x0f8d6b83, 0x0f8f3199, 0x0f9008e3, 0x0f90d003, 0x0f917803, 0x0f919083, 0x0f91e033, + 0x0f924ff3, 0x0f964ff3, 0x0f9a4ff3, 0x0f9e4b13, 0x0f9fd842, 0x0fa007f3, 0x0fa407f3, 0x0fa803d3, 0x0faa37f3, + 0x0fae37f3, 0x0fb23093, 0x0fb407f3, 0x0fbba0b3, 0x0fbeaaa3, 0x0fc06033, 0x0fc24073, 0x0fc2d053, 0x0fc44073, + 0x0fc57513, 0x0fc862e3, 0x0fc9e093, 0x0fca3ff3, 0x0fce3ff3, 0x0fd23ff3, 0x0fd63b83, 0x0fe007f3, 0x0fe407f3, + 0x0fe807f3, 0x0fec07f3, 0x0ff007f3, 0x0ff407f3, 0x0ff807f3, 0x0ffc07d3, 0x700001f1, 0x700105f2, 0x700407f1, + 0x700807f2, 0x700c06f2, 0x700f87f1, 0x701387f1, 0x701787f1, 0x701b87f1, 0x701f87f1, 0x702387f1, 0x702787f1, + 0x702b87f1, 0x702f87f1, 0x703387f1, 0x703787f1, 0x703b87f1, 0x703f87f1, 0x704387f1, 0x704787f1, 0x704b87f1, + 0x704f87f1, 0x705387f1, 0x705787f1, 0x705b87f1, 0x705f87f1, 0x706387f1, 0x706787f1, 0x706b87f1, 0x706f87f1, + 0x707387f1, 0x707787f1, 0x707b87f1, 0x707f80f1}; + +/// Returns the extended grapheme cluster bondary property of a code point. +[[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr __property __get_property(const char32_t __code_point) noexcept { + // TODO FMT use std::ranges::upper_bound. + + // The algorithm searches for the upper bound of the range and, when found, + // steps back one entry. This algorithm is used since the code point can be + // anywhere in the range. After a lower bound is found the next step is to + // compare whether the code unit is indeed in the range. + // + // Since the entry contains a code unit, size, and property the code point + // being sought needs to be adjusted. Just shifting the code point to the + // proper position doesn't work; suppose an entry has property 0, size 1, + // and lower bound 3. This results in the entry 0x1810. + // When searching for code point 3 it will search for 0x1800, find 0x1810 + // and moves to the previous entry. Thus the lower bound value will never + // be found. + // The simple solution is to set the bits belonging to the property and + // size. Then the upper bound for code point 3 will return the entry after + // 0x1810. After moving to the previous entry the algorithm arrives at the + // correct entry. + ptrdiff_t __i = std::upper_bound(__entries, std::end(__entries), (__code_point << 11) | 0x7ffu) - __entries; + if (__i == 0) + return __property::__none; + + --__i; + uint32_t __upper_bound = (__entries[__i] >> 11) + ((__entries[__i] >> 4) & 0x7f); + if (__code_point <= __upper_bound) + return static_cast<__property>(__entries[__i] & 0xf); + + return __property::__none; +} + +} // namespace __extended_grapheme_custer_property_boundary + +#endif //_LIBCPP_STD_VER > 17 + +_LIBCPP_END_NAMESPACE_STD + +#endif // _LIBCPP___FORMAT_EXTENDED_GRAPHEME_CLUSTER_TABLE_H diff --git a/libcxx/include/__format/formatter_integral.h b/libcxx/include/__format/formatter_integral.h index d6fa5ec18eb8..b9ed5fe80f7f 100644 --- a/libcxx/include/__format/formatter_integral.h +++ b/libcxx/include/__format/formatter_integral.h @@ -343,7 +343,7 @@ __format_bool(bool __value, auto& __ctx, __format_spec::__parsed_specifications< if (__specs.__std_.__locale_specific_form_) { const auto& __np = use_facet<numpunct<_CharT>>(__ctx.locale()); basic_string<_CharT> __str = __value ? __np.truename() : __np.falsename(); - return __formatter::__write_unicode_no_precision(basic_string_view<_CharT>{__str}, __ctx.out(), __specs); + return __formatter::__write_string_no_precision(basic_string_view<_CharT>{__str}, __ctx.out(), __specs); } # endif basic_string_view<_CharT> __str = diff --git a/libcxx/include/__format/formatter_output.h b/libcxx/include/__format/formatter_output.h index c59cbbeeb5dd..e09534c41dff 100644 --- a/libcxx/include/__format/formatter_output.h +++ b/libcxx/include/__format/formatter_output.h @@ -17,6 +17,7 @@ #include <__config> #include <__format/formatter.h> #include <__format/parser_std_format_spec.h> +#include <__format/unicode.h> #include <__utility/move.h> #include <__utility/unreachable.h> #include <cstddef> @@ -59,8 +60,8 @@ struct _LIBCPP_TYPE_VIS __padding_size_result { _LIBCPP_HIDE_FROM_ABI constexpr __padding_size_result __padding_size(size_t __size, size_t __width, __format_spec::__alignment __align) { _LIBCPP_ASSERT(__width > __size, "don't call this function when no padding is required"); - _LIBCPP_ASSERT(__align != __format_spec::__alignment::__zero_padding, - "the caller should have handled the zero-padding"); + _LIBCPP_ASSERT( + __align != __format_spec::__alignment::__zero_padding, "the caller should have handled the zero-padding"); size_t __fill = __width - __size; switch (__align) { @@ -75,7 +76,7 @@ __padding_size(size_t __size, size_t __width, __format_spec::__alignment __align // __before = floor(__fill, 2); // __after = ceil(__fill, 2); size_t __before = __fill / 2; - size_t __after = __fill - __before; + size_t __after = __fill - __before; return {__before, __after}; } case __format_spec::__alignment::__default: @@ -173,10 +174,12 @@ _LIBCPP_HIDE_FROM_ABI _OutIt __write_using_decimal_separators(_OutIt __out_it, c /// conversion, which means the [\a __first, \a __last) always contains elements /// of the type \c char. template <class _CharT, class _ParserCharT> -_LIBCPP_HIDE_FROM_ABI auto __write(const _CharT* __first, const _CharT* __last, - output_iterator<const _CharT&> auto __out_it, - __format_spec::__parsed_specifications<_ParserCharT> __specs, ptrdiff_t __size) - -> decltype(__out_it) { +_LIBCPP_HIDE_FROM_ABI auto __write( + const _CharT* __first, + const _CharT* __last, + output_iterator<const _CharT&> auto __out_it, + __format_spec::__parsed_specifications<_ParserCharT> __specs, + ptrdiff_t __size) -> decltype(__out_it) { _LIBCPP_ASSERT(__first <= __last, "Not a valid range"); if (__size >= __specs.__width_) @@ -189,6 +192,7 @@ _LIBCPP_HIDE_FROM_ABI auto __write(const _CharT* __first, const _CharT* __last, } /// \overload +/// /// Calls the function above where \a __size = \a __last - \a __first. template <class _CharT, class _ParserCharT> _LIBCPP_HIDE_FROM_ABI auto __write(const _CharT* __first, const _CharT* __last, @@ -243,77 +247,56 @@ _LIBCPP_HIDE_FROM_ABI auto __write_using_trailing_zeros( return _VSTD::fill_n(_VSTD::move(__out_it), __padding.__after_, __specs.__fill_); } -# ifndef _LIBCPP_HAS_NO_UNICODE +/// Writes a string using format's width estimation algorithm. +/// +/// \pre !__specs.__has_precision() +/// +/// \note When \c _LIBCPP_HAS_NO_UNICODE is defined the function assumes the +/// input is ASCII. template <class _CharT> -_LIBCPP_HIDE_FROM_ABI auto __write_unicode_no_precision(basic_string_view<_CharT> __str, - output_iterator<const _CharT&> auto __out_it, - __format_spec::__parsed_specifications<_CharT> __specs) - -> decltype(__out_it) { +_LIBCPP_HIDE_FROM_ABI auto __write_string_no_precision( + basic_string_view<_CharT> __str, + output_iterator<const _CharT&> auto __out_it, + __format_spec::__parsed_specifications<_CharT> __specs) -> decltype(__out_it) { + _LIBCPP_ASSERT(!__specs.__has_precision(), "use __write_string"); - _LIBCPP_ASSERT(!__specs.__has_precision(), "use __write_unicode"); // No padding -> copy the string if (!__specs.__has_width()) return _VSTD::copy(__str.begin(), __str.end(), _VSTD::move(__out_it)); - // Non Unicode part larger than width -> copy the string - auto __last = __format_spec::__detail::__estimate_column_width_fast(__str.begin(), __str.end()); - ptrdiff_t __size = __last - __str.begin(); - if (__size >= __specs.__width_) - return _VSTD::copy(__str.begin(), __str.end(), _VSTD::move(__out_it)); - - // Is there a non Unicode part? - if (__last != __str.end()) { - // Non Unicode and Unicode part larger than width -> copy the string - __format_spec::__detail::__column_width_result __column_width = - __format_spec::__detail::__estimate_column_width(__last, __str.end(), __specs.__width_); - __size += __column_width.__width; // Note this new size is used when __size < __specs.__width_ - if (__size >= __specs.__width_) - return _VSTD::copy(__str.begin(), __str.end(), _VSTD::move(__out_it)); - } + // Note when the estimated width is larger than size there's no padding. So + // there's no reason to get the real size when the estimate is larger than or + // equal to the minimum field width. + size_t __size = + __format_spec::__estimate_column_width(__str, __specs.__width_, __format_spec::__column_width_rounding::__up) + .__width_; return __formatter::__write(__str.begin(), __str.end(), _VSTD::move(__out_it), __specs, __size); } -# endif template <class _CharT> -_LIBCPP_HIDE_FROM_ABI auto __write_unicode(basic_string_view<_CharT> __str, - output_iterator<const _CharT&> auto __out_it, - __format_spec::__parsed_specifications<_CharT> __specs) - -> decltype(__out_it) { -# ifndef _LIBCPP_HAS_NO_UNICODE - if (!__specs.__has_precision()) - return __formatter::__write_unicode_no_precision(__str, _VSTD::move(__out_it), __specs); - - // Non unicode part larger than precision -> truncate the output and use the normal write operation. - auto __last = __format_spec::__detail::__estimate_column_width_fast(__str.begin(), __str.end()); - ptrdiff_t __size = __last - __str.begin(); - if (__size >= __specs.__precision_) - return __formatter::__write(__str.begin(), __str.begin() + __specs.__precision_, _VSTD::move(__out_it), __specs, - __specs.__precision_); - - // No non Unicode part, implies __size < __specs.__precision_ -> use normal write operation - if (__last == __str.end()) - return __formatter::__write(__str.begin(), __str.end(), _VSTD::move(__out_it), __specs, __str.size()); - - __format_spec::__detail::__column_width_result __column_width = - __format_spec::__detail::__estimate_column_width(__last, __str.end(), __specs.__precision_ - __size); - __size += __column_width.__width; - // Truncate the output - if (__column_width.__ptr != __str.end()) - __str.remove_suffix(__str.end() - __column_width.__ptr); +_LIBCPP_HIDE_FROM_ABI int __truncate(basic_string_view<_CharT>& __str, int __precision) { + __format_spec::__column_width_result<_CharT> __result = + __format_spec::__estimate_column_width(__str, __precision, __format_spec::__column_width_rounding::__down); + __str = basic_string_view<_CharT>{__str.begin(), __result.__last_}; + return __result.__width_; +} - return __formatter::__write(__str.begin(), __str.end(), _VSTD::move(__out_it), __specs, __size); +/// Writes a string using format's width estimation algorithm. +/// +/// \note When \c _LIBCPP_HAS_NO_UNICODE is defined the function assumes the +/// input is ASCII. +template <class _CharT> +_LIBCPP_HIDE_FROM_ABI auto __write_string( + basic_string_view<_CharT> __str, + output_iterator<const _CharT&> auto __out_it, + __format_spec::__parsed_specifications<_CharT> __specs) -> decltype(__out_it) { + if (!__specs.__has_precision()) + return __formatter::__write_string_no_precision(__str, _VSTD::move(__out_it), __specs); -# else - if (__specs.__has_precision()) { - ptrdiff_t __size = __str.size(); - if (__size > __specs.__precision_) - return __formatter::__write(__str.begin(), __str.begin() + __specs.__precision_, _VSTD::move(__out_it), __specs, - __specs.__precision_); - } - return __formatter::__write(__str.begin(), __str.end(), _VSTD::move(__out_it), __specs, __str.size()); + int __size = __formatter::__truncate(__str, __specs.__precision_); -# endif + return __write(__str.begin(), __str.end(), _VSTD::move(__out_it), __specs, __size); } } // namespace __formatter diff --git a/libcxx/include/__format/formatter_string.h b/libcxx/include/__format/formatter_string.h index 139c05e58c28..71bda4fcded1 100644 --- a/libcxx/include/__format/formatter_string.h +++ b/libcxx/include/__format/formatter_string.h @@ -40,7 +40,7 @@ public: } _LIBCPP_HIDE_FROM_ABI auto format(basic_string_view<_CharT> __str, auto& __ctx) const -> decltype(__ctx.out()) { - return __formatter::__write_unicode(__str, __ctx.out(), __parser_.__get_parsed_std_specifications(__ctx)); + return __formatter::__write_string(__str, __ctx.out(), __parser_.__get_parsed_std_specifications(__ctx)); } __format_spec::__parser<_CharT> __parser_; @@ -69,7 +69,7 @@ struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT formatter<const _CharT*, // TODO FMT Implement these improvements. __format_spec::__parsed_specifications<_CharT> __specs = _Base::__parser_.__get_parsed_std_specifications(__ctx); if (__specs.__has_width() || __specs.__has_precision()) - return __formatter::__write_unicode(basic_string_view<_CharT>{__str}, __ctx.out(), __specs); + return __formatter::__write_string(basic_string_view<_CharT>{__str}, __ctx.out(), __specs); // No formatting required, copy the string to the output. auto __out_it = __ctx.out(); diff --git a/libcxx/include/__format/parser_std_format_spec.h b/libcxx/include/__format/parser_std_format_spec.h index 034fc55a44dc..1425a953ebaa 100644 --- a/libcxx/include/__format/parser_std_format_spec.h +++ b/libcxx/include/__format/parser_std_format_spec.h @@ -25,10 +25,12 @@ #include <__format/format_error.h> #include <__format/format_parse_context.h> #include <__format/format_string.h> +#include <__format/unicode.h> #include <__variant/monostate.h> #include <bit> #include <concepts> #include <cstdint> +#include <string_view> #include <type_traits> #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) @@ -91,462 +93,6 @@ __substitute_arg_id(basic_format_arg<_Context> __format_arg) { __format_arg); } -/** Helper struct returned from @ref __get_string_alignment. */ -template <class _CharT> -struct _LIBCPP_TEMPLATE_VIS __string_alignment { - /** Points beyond the last character to write to the output. */ - const _CharT* __last; - /** - * The estimated number of columns in the output or 0. - * - * Only when the output needs to be aligned it's required to know the exact - * number of columns in the output. So if the formatted output has only a - * minimum width the exact size isn't important. It's only important to know - * the minimum has been reached. The minimum width is the width specified in - * the format-spec. - * - * For example in this code @code std::format("{:10}", MyString); @endcode - * the width estimation can stop once the algorithm has determined the output - * width is 10 columns. - * - * So if: - * * @ref __align == @c true the @ref __size is the estimated number of - * columns required. - * * @ref __align == @c false the @ref __size is the estimated number of - * columns required or 0 when the estimation algorithm stopped prematurely. - */ - ptrdiff_t __size; - /** - * Does the output need to be aligned. - * - * When alignment is needed the output algorithm needs to add the proper - * padding. Else the output algorithm just needs to copy the input up to - * @ref __last. - */ - bool __align; -}; - -#ifndef _LIBCPP_HAS_NO_UNICODE -namespace __detail { - -/** - * Unicode column width estimates. - * - * Unicode can be stored in several formats: UTF-8, UTF-16, and UTF-32. - * Depending on format the relation between the number of code units stored and - * the number of output columns differs. The first relation is the number of - * code units forming a code point. (The text assumes the code units are - * unsigned.) - * - UTF-8 The number of code units is between one and four. The first 127 - * Unicode code points match the ASCII character set. When the highest bit is - * set it means the code point has more than one code unit. - * - UTF-16: The number of code units is between 1 and 2. When the first - * code unit is in the range [0xd800,0xdfff) it means the code point uses two - * code units. - * - UTF-32: The number of code units is always one. - * - * The code point to the number of columns isn't well defined. The code uses the - * estimations defined in [format.string.std]/11. This list might change in the - * future. - * - * The algorithm of @ref __get_string_alignment uses two different scanners: - * - The simple scanner @ref __estimate_column_width_fast. This scanner assumes - * 1 code unit is 1 column. This scanner stops when it can't be sure the - * assumption is valid: - * - UTF-8 when the code point is encoded in more than 1 code unit. - * - UTF-16 and UTF-32 when the first multi-column code point is encountered. - * (The code unit's value is lower than 0xd800 so the 2 code unit encoding - * is irrelevant for this scanner.) - * Due to these assumptions the scanner is faster than the full scanner. It - * can process all text only containing ASCII. For UTF-16/32 it can process - * most (all?) European languages. (Note the set it can process might be - * reduced in the future, due to updates in the scanning rules.) - * - The full scanner @ref __estimate_column_width. This scanner, if needed, - * converts multiple code units into one code point then converts the code - * point to a column width. - * - * See also: - * - [format.string.general]/11 - * - https://en.wikipedia.org/wiki/UTF-8#Encoding - * - https://en.wikipedia.org/wiki/UTF-16#U+D800_to_U+DFFF - */ - -/** - * The first 2 column code point. - * - * This is the point where the fast UTF-16/32 scanner needs to stop processing. - */ -inline constexpr uint32_t __two_column_code_point = 0x1100; - -/** Helper concept for an UTF-8 character type. */ -template <class _CharT> -concept __utf8_character = same_as<_CharT, char> || same_as<_CharT, char8_t>; - -/** Helper concept for an UTF-16 character type. */ -template <class _CharT> -concept __utf16_character = (same_as<_CharT, wchar_t> && sizeof(wchar_t) == 2) || same_as<_CharT, char16_t>; - -/** Helper concept for an UTF-32 character type. */ -template <class _CharT> -concept __utf32_character = (same_as<_CharT, wchar_t> && sizeof(wchar_t) == 4) || same_as<_CharT, char32_t>; - -/** Helper concept for an UTF-16 or UTF-32 character type. */ -template <class _CharT> -concept __utf16_or_32_character = __utf16_character<_CharT> || __utf32_character<_CharT>; - -/** - * Converts a code point to the column width. - * - * The estimations are conforming to [format.string.general]/11 - * - * This version expects a value less than 0x1'0000, which is a 3-byte UTF-8 - * character. - */ -_LIBCPP_HIDE_FROM_ABI inline constexpr int __column_width_3(uint32_t __c) noexcept { - _LIBCPP_ASSERT(__c < 0x10000, - "Use __column_width_4 or __column_width for larger values"); - - // clang-format off - return 1 + (__c >= 0x1100 && (__c <= 0x115f || - (__c >= 0x2329 && (__c <= 0x232a || - (__c >= 0x2e80 && (__c <= 0x303e || - (__c >= 0x3040 && (__c <= 0xa4cf || - (__c >= 0xac00 && (__c <= 0xd7a3 || - (__c >= 0xf900 && (__c <= 0xfaff || - (__c >= 0xfe10 && (__c <= 0xfe19 || - (__c >= 0xfe30 && (__c <= 0xfe6f || - (__c >= 0xff00 && (__c <= 0xff60 || - (__c >= 0xffe0 && (__c <= 0xffe6 - )))))))))))))))))))); - // clang-format on -} - -/** - * @overload - * - * This version expects a value greater than or equal to 0x1'0000, which is a - * 4-byte UTF-8 character. - */ -_LIBCPP_HIDE_FROM_ABI inline constexpr int __column_width_4(uint32_t __c) noexcept { - _LIBCPP_ASSERT(__c >= 0x10000, - "Use __column_width_3 or __column_width for smaller values"); - - // clang-format off - return 1 + (__c >= 0x1'f300 && (__c <= 0x1'f64f || - (__c >= 0x1'f900 && (__c <= 0x1'f9ff || - (__c >= 0x2'0000 && (__c <= 0x2'fffd || - (__c >= 0x3'0000 && (__c <= 0x3'fffd - )))))))); - // clang-format on -} - -/** - * @overload - * - * The general case, accepting all values. - */ -_LIBCPP_HIDE_FROM_ABI inline constexpr int __column_width(uint32_t __c) noexcept { - if (__c < 0x10000) - return __column_width_3(__c); - - return __column_width_4(__c); -} - -/** - * Estimate the column width for the UTF-8 sequence using the fast algorithm. - */ -template <__utf8_character _CharT> -_LIBCPP_HIDE_FROM_ABI constexpr const _CharT* -__estimate_column_width_fast(const _CharT* __first, - const _CharT* __last) noexcept { - return _VSTD::find_if(__first, __last, - [](unsigned char __c) { return __c & 0x80; }); -} - -/** - * @overload - * - * The implementation for UTF-16/32. - */ -template <__utf16_or_32_character _CharT> -_LIBCPP_HIDE_FROM_ABI constexpr const _CharT* -__estimate_column_width_fast(const _CharT* __first, - const _CharT* __last) noexcept { - return _VSTD::find_if(__first, __last, - [](uint32_t __c) { return __c >= 0x1100; }); -} - -template <class _CharT> -struct _LIBCPP_TEMPLATE_VIS __column_width_result { - /** The number of output columns. */ - size_t __width; - /** - * The last parsed element. - * - * This limits the original output to fit in the wanted number of columns. - */ - const _CharT* __ptr; -}; - -/** - * Small helper to determine the width of malformed Unicode. - * - * @note This function's only needed for UTF-8. During scanning UTF-8 there - * are multiple place where it can be detected that the Unicode is malformed. - * UTF-16 only requires 1 test and UTF-32 requires no testing. - */ -template <__utf8_character _CharT> -_LIBCPP_HIDE_FROM_ABI constexpr __column_width_result<_CharT> -__estimate_column_width_malformed(const _CharT* __first, const _CharT* __last, - size_t __maximum, size_t __result) noexcept { - size_t __size = __last - __first; - size_t __n = _VSTD::min(__size, __maximum); - return {__result + __n, __first + __n}; -} - -/** - * Determines the number of output columns needed to render the input. - * - * @note When the scanner encounters malformed Unicode it acts as-if every code - * unit at the end of the input is one output column. It's expected the output - * terminal will replace these malformed code units with a one column - * replacement characters. - * - * @param __first Points to the first element of the input range. - * @param __last Points beyond the last element of the input range. - * @param __maximum The maximum number of output columns. The returned number - * of estimated output columns will not exceed this value. - */ -template <__utf8_character _CharT> -_LIBCPP_HIDE_FROM_ABI constexpr __column_width_result<_CharT> -__estimate_column_width(const _CharT* __first, const _CharT* __last, - size_t __maximum) noexcept { - size_t __result = 0; - - while (__first != __last) { - // Based on the number of leading 1 bits the number of code units in the - // code point can be determined. See - // https://en.wikipedia.org/wiki/UTF-8#Encoding - switch (_VSTD::countl_one(static_cast<unsigned char>(*__first))) { - case 0: // 1-code unit encoding: all 1 column - ++__result; - ++__first; - break; - - case 2: // 2-code unit encoding: all 1 column - // Malformed Unicode. - if (__last - __first < 2) [[unlikely]] - return __estimate_column_width_malformed(__first, __last, __maximum, - __result); - __first += 2; - ++__result; - break; - - case 3: // 3-code unit encoding: either 1 or 2 columns - // Malformed Unicode. - if (__last - __first < 3) [[unlikely]] - return __estimate_column_width_malformed(__first, __last, __maximum, - __result); - { - uint32_t __c = static_cast<unsigned char>(*__first++) & 0x0f; - __c <<= 6; - __c |= static_cast<unsigned char>(*__first++) & 0x3f; - __c <<= 6; - __c |= static_cast<unsigned char>(*__first++) & 0x3f; - __result += __column_width_3(__c); - if (__result > __maximum) - return {__result - 2, __first - 3}; - } - break; - case 4: // 4-code unit encoding: either 1 or 2 columns - // Malformed Unicode. - if (__last - __first < 4) [[unlikely]] - return __estimate_column_width_malformed(__first, __last, __maximum, - __result); - { - uint32_t __c = static_cast<unsigned char>(*__first++) & 0x07; - __c <<= 6; - __c |= static_cast<unsigned char>(*__first++) & 0x3f; - __c <<= 6; - __c |= static_cast<unsigned char>(*__first++) & 0x3f; - __c <<= 6; - __c |= static_cast<unsigned char>(*__first++) & 0x3f; - __result += __column_width_4(__c); - if (__result > __maximum) - return {__result - 2, __first - 4}; - } - break; - default: - // Malformed Unicode. - return __estimate_column_width_malformed(__first, __last, __maximum, - __result); - } - - if (__result >= __maximum) - return {__result, __first}; - } - return {__result, __first}; -} - -template <__utf16_character _CharT> -_LIBCPP_HIDE_FROM_ABI constexpr __column_width_result<_CharT> -__estimate_column_width(const _CharT* __first, const _CharT* __last, - size_t __maximum) noexcept { - size_t __result = 0; - - while (__first != __last) { - uint32_t __c = *__first; - // Is the code unit part of a surrogate pair? See - // https://en.wikipedia.org/wiki/UTF-16#U+D800_to_U+DFFF - if (__c >= 0xd800 && __c <= 0xDfff) { - // Malformed Unicode. - if (__last - __first < 2) [[unlikely]] - return {__result + 1, __first + 1}; - - __c -= 0xd800; - __c <<= 10; - __c += (*(__first + 1) - 0xdc00); - __c += 0x10000; - - __result += __column_width_4(__c); - if (__result > __maximum) - return {__result - 2, __first}; - __first += 2; - } else { - __result += __column_width_3(__c); - if (__result > __maximum) - return {__result - 2, __first}; - ++__first; - } - - if (__result >= __maximum) - return {__result, __first}; - } - - return {__result, __first}; -} - -template <__utf32_character _CharT> -_LIBCPP_HIDE_FROM_ABI constexpr __column_width_result<_CharT> -__estimate_column_width(const _CharT* __first, const _CharT* __last, - size_t __maximum) noexcept { - size_t __result = 0; - - while (__first != __last) { - uint32_t __c = *__first; - __result += __column_width(__c); - - if (__result > __maximum) - return {__result - 2, __first}; - - ++__first; - if (__result >= __maximum) - return {__result, __first}; - } - - return {__result, __first}; -} - -} // namespace __detail - -template <class _CharT> -_LIBCPP_HIDE_FROM_ABI constexpr __string_alignment<_CharT> -__get_string_alignment(const _CharT* __first, const _CharT* __last, - ptrdiff_t __width, ptrdiff_t __precision) noexcept { - _LIBCPP_ASSERT(__width != 0 || __precision != -1, - "The function has no effect and shouldn't be used"); - - // TODO FMT There might be more optimizations possible: - // If __precision == __format::__number_max and the encoding is: - // * UTF-8 : 4 * (__last - __first) >= __width - // * UTF-16 : 2 * (__last - __first) >= __width - // * UTF-32 : (__last - __first) >= __width - // In these cases it's certain the output is at least the requested width. - // It's unknown how often this happens in practice. For now the improvement - // isn't implemented. - - /* - * First assume there are no special Unicode code units in the input. - * - Apply the precision (this may reduce the size of the input). When - * __precison == -1 this step is omitted. - * - Scan for special code units in the input. - * If our assumption was correct the __pos will be at the end of the input. - */ - const ptrdiff_t __length = __last - __first; - const _CharT* __limit = - __first + - (__precision == -1 ? __length : _VSTD::min(__length, __precision)); - ptrdiff_t __size = __limit - __first; - const _CharT* __pos = - __detail::__estimate_column_width_fast(__first, __limit); - - if (__pos == __limit) - return {__limit, __size, __size < __width}; - - /* - * Our assumption was wrong, there are special Unicode code units. - * The range [__first, __pos) contains a set of code units with the - * following property: - * Every _CharT in the range will be rendered in 1 column. - * - * If there's no maximum width and the parsed size already exceeds the - * minimum required width. The real size isn't important. So bail out. - */ - if (__precision == -1 && (__pos - __first) >= __width) - return {__last, 0, false}; - - /* If there's a __precision, truncate the output to that width. */ - ptrdiff_t __prefix = __pos - __first; - if (__precision != -1) { - _LIBCPP_ASSERT(__precision > __prefix, "Logic error."); - auto __lengh_info = __detail::__estimate_column_width( - __pos, __last, __precision - __prefix); - __size = __lengh_info.__width + __prefix; - return {__lengh_info.__ptr, __size, __size < __width}; - } - - /* Else use __width to determine the number of required padding characters. */ - _LIBCPP_ASSERT(__width > __prefix, "Logic error."); - /* - * The column width is always one or two columns. For the precision the wanted - * column width is the maximum, for the width it's the minimum. Using the - * width estimation with its truncating behavior will result in the wrong - * result in the following case: - * - The last code unit processed requires two columns and exceeds the - * maximum column width. - * By increasing the __maximum by one avoids this issue. (It means it may - * pass one code point more than required to determine the proper result; - * that however isn't a problem for the algorithm.) - */ - size_t __maximum = 1 + __width - __prefix; - auto __lengh_info = - __detail::__estimate_column_width(__pos, __last, __maximum); - if (__lengh_info.__ptr != __last) { - // Consumed the width number of code units. The exact size of the string - // is unknown. We only know we don't need to align the output. - _LIBCPP_ASSERT(static_cast<ptrdiff_t>(__lengh_info.__width + __prefix) >= - __width, - "Logic error"); - return {__last, 0, false}; - } - - __size = __lengh_info.__width + __prefix; - return {__last, __size, __size < __width}; -} -#else // _LIBCPP_HAS_NO_UNICODE -template <class _CharT> -_LIBCPP_HIDE_FROM_ABI constexpr __string_alignment<_CharT> -__get_string_alignment(const _CharT* __first, const _CharT* __last, - ptrdiff_t __width, ptrdiff_t __precision) noexcept { - const ptrdiff_t __length = __last - __first; - const _CharT* __limit = - __first + - (__precision == -1 ? __length : _VSTD::min(__length, __precision)); - ptrdiff_t __size = __limit - __first; - return {__limit, __size, __size < __width}; -} -#endif // _LIBCPP_HAS_NO_UNICODE - /// These fields are a filter for which elements to parse. /// /// They default to false so when a new field is added it needs to be opted in @@ -1143,6 +689,212 @@ _LIBCPP_HIDE_FROM_ABI constexpr void __process_display_type_pointer(__format_spe } } +template <class _CharT> +struct __column_width_result { + /// The number of output columns. + size_t __width_; + /// One beyond the last code unit used in the estimation. + /// + /// This limits the original output to fit in the wanted number of columns. + const _CharT* __last_; +}; + +/// Since a column width can be two it's possible that the requested column +/// width can't be achieved. Depending on the intended usage the policy can be +/// selected. +/// - When used as precision the maximum width may not be exceeded and the +/// result should be "rounded down" to the previous boundary. +/// - When used as a width we're done once the minimum is reached, but +/// exceeding is not an issue. Rounding down is an issue since that will +/// result in writing fill characters. Therefore the result needs to be +/// "rounded up". +enum class __column_width_rounding { __down, __up }; + +# ifndef _LIBCPP_HAS_NO_UNICODE + +namespace __detail { + +/// Converts a code point to the column width. +/// +/// The estimations are conforming to [format.string.general]/11 +/// +/// This version expects a value less than 0x1'0000, which is a 3-byte UTF-8 +/// character. +_LIBCPP_HIDE_FROM_ABI constexpr int __column_width_3(uint32_t __c) noexcept { + _LIBCPP_ASSERT(__c < 0x10000, "Use __column_width_4 or __column_width for larger values"); + + // clang-format off + return 1 + (__c >= 0x1100 && (__c <= 0x115f || + (__c >= 0x2329 && (__c <= 0x232a || + (__c >= 0x2e80 && (__c <= 0x303e || + (__c >= 0x3040 && (__c <= 0xa4cf || + (__c >= 0xac00 && (__c <= 0xd7a3 || + (__c >= 0xf900 && (__c <= 0xfaff || + (__c >= 0xfe10 && (__c <= 0xfe19 || + (__c >= 0xfe30 && (__c <= 0xfe6f || + (__c >= 0xff00 && (__c <= 0xff60 || + (__c >= 0xffe0 && (__c <= 0xffe6 + )))))))))))))))))))); + // clang-format on +} + +/// @overload +/// +/// This version expects a value greater than or equal to 0x1'0000, which is a +/// 4-byte UTF-8 character. +_LIBCPP_HIDE_FROM_ABI constexpr int __column_width_4(uint32_t __c) noexcept { + _LIBCPP_ASSERT(__c >= 0x10000, "Use __column_width_3 or __column_width for smaller values"); + + // clang-format off + return 1 + (__c >= 0x1'f300 && (__c <= 0x1'f64f || + (__c >= 0x1'f900 && (__c <= 0x1'f9ff || + (__c >= 0x2'0000 && (__c <= 0x2'fffd || + (__c >= 0x3'0000 && (__c <= 0x3'fffd + )))))))); + // clang-format on +} + +/// @overload +/// +/// The general case, accepting all values. +_LIBCPP_HIDE_FROM_ABI constexpr int __column_width(uint32_t __c) noexcept { + if (__c < 0x10000) + return __detail::__column_width_3(__c); + + return __detail::__column_width_4(__c); +} + +template <class _CharT> +_LIBCPP_HIDE_FROM_ABI constexpr __column_width_result<_CharT> __estimate_column_width_grapheme_clustering( + const _CharT* __first, const _CharT* __last, size_t __maximum, __column_width_rounding __rounding) noexcept { + __unicode::__extended_grapheme_cluster_view<_CharT> __view{__first, __last}; + + __column_width_result<_CharT> __result{0, __first}; + while (__result.__last_ != __last && __result.__width_ <= __maximum) { + typename __unicode::__extended_grapheme_cluster_view<_CharT>::__cluster __cluster = __view.__consume(); + int __width = __detail::__column_width(__cluster.__code_point_); + + // When the next entry would exceed the maximum width the previous width + // might be returned. For example when a width of 100 is requested the + // returned width might be 99, since the next code point has an estimated + // column width of 2. This depends on the rounding flag. + // When the maximum is exceeded the loop will abort the next iteration. + if (__rounding == __column_width_rounding::__down && __result.__width_ + __width > __maximum) + return __result; + + __result.__width_ += __width; + __result.__last_ = __cluster.__last_; + } + + return __result; +} + +} // namespace __detail + +// Unicode can be stored in several formats: UTF-8, UTF-16, and UTF-32. +// Depending on format the relation between the number of code units stored and +// the number of output columns differs. The first relation is the number of +// code units forming a code point. (The text assumes the code units are +// unsigned.) +// - UTF-8 The number of code units is between one and four. The first 127 +// Unicode code points match the ASCII character set. When the highest bit is +// set it means the code point has more than one code unit. +// - UTF-16: The number of code units is between 1 and 2. When the first +// code unit is in the range [0xd800,0xdfff) it means the code point uses two +// code units. +// - UTF-32: The number of code units is always one. +// +// The code point to the number of columns is specified in +// [format.string.std]/11. This list might change in the future. +// +// Another thing to be taken into account is Grapheme clustering. This means +// that in some cases multiple code points are combined one element in the +// output. For example: +// - an ASCII character with a combined diacritical mark +// - an emoji with a skin tone modifier +// - a group of combined people emoji to create a family +// - a combination of flag emoji +// +// See also: +// - [format.string.general]/11 +// - https://en.wikipedia.org/wiki/UTF-8#Encoding +// - https://en.wikipedia.org/wiki/UTF-16#U+D800_to_U+DFFF + +_LIBCPP_HIDE_FROM_ABI constexpr bool __is_ascii(char32_t __c) { return __c < 0x80; } + +/// Determines the number of output columns needed to render the input. +/// +/// \note When the scanner encounters malformed Unicode it acts as-if every +/// code unit is a one column code point. Typically a terminal uses the same +/// strategy and replaces every malformed code unit with a one column +/// replacement character. +/// +/// \param __first Points to the first element of the input range. +/// \param __last Points beyond the last element of the input range. +/// \param __maximum The maximum number of output columns. The returned number +/// of estimated output columns will not exceed this value. +/// \param __rounding Selects the rounding method. +/// \c __down result.__width_ <= __maximum +/// \c __up result.__width_ <= __maximum + 1 +template <class _CharT> +_LIBCPP_HIDE_FROM_ABI constexpr __column_width_result<_CharT> __estimate_column_width( + basic_string_view<_CharT> __str, size_t __maximum, __column_width_rounding __rounding) noexcept { + // The width estimation is done in two steps: + // - Quickly process for the ASCII part. ASCII has the following properties + // - One code unit is one code point + // - Every code point has an estimated width of one + // - When needed it will a Unicode Grapheme clustering algorithm to find + // the proper place for truncation. + + if (__str.empty() || __maximum == 0) + return {0, __str.begin()}; + + // ASCII has one caveat; when an ASCII character is followed by a non-ASCII + // character they might be part of an extended grapheme cluster. For example: + // an ASCII letter and a COMBINING ACUTE ACCENT + // The truncate should happen after the COMBINING ACUTE ACCENT. Therefore we + // need to scan one code unit beyond the requested precision. When this code + // unit is non-ASCII we omit the current code unit and let the Grapheme + // clustering algorithm do its work. + const _CharT* __it = __str.begin(); + if (__is_ascii(*__it)) { + do { + --__maximum; + ++__it; + if (__it == __str.end()) + return {__str.size(), __str.end()}; + + if (__maximum == 0) { + if (__is_ascii(*__it)) + return {static_cast<size_t>(__it - __str.begin()), __it}; + + break; + } + } while (__is_ascii(*__it)); + --__it; + ++__maximum; + } + + ptrdiff_t __ascii_size = __it - __str.begin(); + __column_width_result __result = + __detail::__estimate_column_width_grapheme_clustering(__it, __str.end(), __maximum, __rounding); + + __result.__width_ += __ascii_size; + return __result; +} +# else // !defined(_LIBCPP_HAS_NO_UNICODE) +template <class _CharT> +_LIBCPP_HIDE_FROM_ABI constexpr __column_width_result<_CharT> +__estimate_column_width(basic_string_view<_CharT> __str, size_t __maximum, __column_width_rounding) noexcept { + // When Unicode isn't supported assume ASCII and every code unit is one code + // point. In ASCII the estimated column width is always one. Thus there's no + // need for rounding. + size_t __width_ = _VSTD::min(__str.size(), __maximum); + return {__width_, __str.begin() + __width_}; +} + +# endif // !defined(_LIBCPP_HAS_NO_UNICODE) + } // namespace __format_spec #endif //_LIBCPP_STD_VER > 17 diff --git a/libcxx/include/__format/unicode.h b/libcxx/include/__format/unicode.h new file mode 100644 index 000000000000..3316217f4a1e --- /dev/null +++ b/libcxx/include/__format/unicode.h @@ -0,0 +1,339 @@ +// -*- C++ -*- +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___FORMAT_UNICODE_H +#define _LIBCPP___FORMAT_UNICODE_H + +#include <__assert> +#include <__config> +#include <__format/extended_grapheme_cluster_table.h> +#include <__utility/unreachable.h> +#include <bit> + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +# pragma GCC system_header +#endif + +_LIBCPP_BEGIN_NAMESPACE_STD + +#if _LIBCPP_STD_VER > 17 + +# ifndef _LIBCPP_HAS_NO_UNICODE + +/// Implements the grapheme cluster boundary rules +/// +/// These rules are used to implement format's width estimation as stated in +/// [format.string.std]/11 +/// +/// The Standard refers to UAX \#29 for Unicode 12.0.0 +/// https://www.unicode.org/reports/tr29/#Grapheme_Cluster_Boundary_Rules +/// +/// The data tables used are +/// https://www.unicode.org/Public/UCD/latest/ucd/auxiliary/GraphemeBreakProperty.txt +/// https://www.unicode.org/Public/UCD/latest/ucd/emoji/emoji-data.txt +/// https://www.unicode.org/Public/UCD/latest/ucd/auxiliary/GraphemeBreakTest.txt (for testing only) + +namespace __unicode { + +inline constexpr char32_t __replacement_character = U'\ufffd'; + +_LIBCPP_HIDE_FROM_ABI constexpr bool __is_continuation(const char* __char, int __count) { + do { + if ((*__char & 0b1000'0000) != 0b1000'0000) + return false; + --__count; + ++__char; + } while (__count); + return true; +} + +/// Helper class to extract a code unit from a Unicode character range. +/// +/// The stored range is a view. There are multiple specialization for different +/// character types. +template <class _CharT> +class __code_point_view; + +/// UTF-8 specialization. +template <> +class __code_point_view<char> { +public: + _LIBCPP_HIDE_FROM_ABI constexpr explicit __code_point_view(const char* __first, const char* __last) + : __first_(__first), __last_(__last) {} + + _LIBCPP_HIDE_FROM_ABI constexpr bool __at_end() const noexcept { return __first_ == __last_; } + _LIBCPP_HIDE_FROM_ABI constexpr const char* __position() const noexcept { return __first_; } + + _LIBCPP_HIDE_FROM_ABI constexpr char32_t __consume() noexcept { + _LIBCPP_ASSERT(__first_ != __last_, "can't move beyond the end of input"); + + // Based on the number of leading 1 bits the number of code units in the + // code point can be determined. See + // https://en.wikipedia.org/wiki/UTF-8#Encoding + switch (_VSTD::countl_one(static_cast<unsigned char>(*__first_))) { + case 0: + return *__first_++; + + case 2: + if (__last_ - __first_ < 2 || !__unicode::__is_continuation(__first_ + 1, 1)) [[unlikely]] + break; + else { + char32_t __value = static_cast<unsigned char>(*__first_++) & 0x1f; + __value <<= 6; + __value |= static_cast<unsigned char>(*__first_++) & 0x3f; + return __value; + } + + case 3: + if (__last_ - __first_ < 3 || !__unicode::__is_continuation(__first_ + 1, 2)) [[unlikely]] + break; + else { + char32_t __value = static_cast<unsigned char>(*__first_++) & 0x0f; + __value <<= 6; + __value |= static_cast<unsigned char>(*__first_++) & 0x3f; + __value <<= 6; + __value |= static_cast<unsigned char>(*__first_++) & 0x3f; + return __value; + } + + case 4: + if (__last_ - __first_ < 4 || !__unicode::__is_continuation(__first_ + 1, 3)) [[unlikely]] + break; + else { + char32_t __value = static_cast<unsigned char>(*__first_++) & 0x07; + __value <<= 6; + __value |= static_cast<unsigned char>(*__first_++) & 0x3f; + __value <<= 6; + __value |= static_cast<unsigned char>(*__first_++) & 0x3f; + __value <<= 6; + __value |= static_cast<unsigned char>(*__first_++) & 0x3f; + return __value; + } + } + // An invalid number of leading ones can be garbage or a code unit in the + // middle of a code point. By consuming one code unit the parser may get + // "in sync" after a few code units. + ++__first_; + return __replacement_character; + } + +private: + const char* __first_; + const char* __last_; +}; + +# ifndef TEST_HAS_NO_WIDE_CHARACTERS +/// This specialization depends on the size of wchar_t +/// - 2 UTF-16 (for example Windows and AIX) +/// - 4 UTF-32 (for example Linux) +template <> +class __code_point_view<wchar_t> { +public: + _LIBCPP_HIDE_FROM_ABI constexpr explicit __code_point_view(const wchar_t* __first, const wchar_t* __last) + : __first_(__first), __last_(__last) {} + + _LIBCPP_HIDE_FROM_ABI constexpr const wchar_t* __position() const noexcept { return __first_; } + _LIBCPP_HIDE_FROM_ABI constexpr bool __at_end() const noexcept { return __first_ == __last_; } + + _LIBCPP_HIDE_FROM_ABI constexpr char32_t __consume() noexcept { + _LIBCPP_ASSERT(__first_ != __last_, "can't move beyond the end of input"); + + if constexpr (sizeof(wchar_t) == 2) { + char32_t __result = *__first_++; + // Is the code unit part of a surrogate pair? See + // https://en.wikipedia.org/wiki/UTF-16#U+D800_to_U+DFFF + if (__result >= 0xd800 && __result <= 0xDfff) { + // Malformed Unicode. + if (__first_ == __last_) [[unlikely]] + return __replacement_character; + + __result -= 0xd800; + __result <<= 10; + __result += *__first_++ - 0xdc00; + __result += 0x10000; + } + return __result; + + } else if constexpr (sizeof(wchar_t) == 4) { + char32_t __result = *__first_++; + if (__result > 0x10FFFF) [[unlikely]] + return __replacement_character; + return __result; + } else { + // TODO FMT P2593R0 Use static_assert(false, "sizeof(wchar_t) has a not implemented value"); + _LIBCPP_ASSERT(sizeof(wchar_t) == 0, "sizeof(wchar_t) has a not implemented value"); + __libcpp_unreachable(); + } + } + +private: + const wchar_t* __first_; + const wchar_t* __last_; +}; +# endif + +_LIBCPP_HIDE_FROM_ABI constexpr bool __at_extended_grapheme_cluster_break( + bool& __ri_break_allowed, + bool __has_extened_pictographic, + __extended_grapheme_custer_property_boundary::__property __prev, + __extended_grapheme_custer_property_boundary::__property __next) { + using __extended_grapheme_custer_property_boundary::__property; + + __has_extened_pictographic |= __prev == __property::__Extended_Pictographic; + + // https://www.unicode.org/reports/tr29/tr29-39.html#Grapheme_Cluster_Boundary_Rules + + // *** Break at the start and end of text, unless the text is empty. *** + + _LIBCPP_ASSERT(__prev != __property::__sot, "should be handled in the constructor"); // GB1 + _LIBCPP_ASSERT(__prev != __property::__eot, "should be handled by our caller"); // GB2 + + // *** Do not break between a CR and LF. Otherwise, break before and after controls. *** + if (__prev == __property::__CR && __next == __property::__LF) // GB3 + return false; + + if (__prev == __property::__Control || __prev == __property::__CR || __prev == __property::__LF) // GB4 + return true; + + if (__next == __property::__Control || __next == __property::__CR || __next == __property::__LF) // GB5 + return true; + + // *** Do not break Hangul syllable sequences. *** + if (__prev == __property::__L && + (__next == __property::__L || __next == __property::__V || __next == __property::__LV || + __next == __property::__LVT)) // GB6 + return false; + + if ((__prev == __property::__LV || __prev == __property::__V) && + (__next == __property::__V || __next == __property::__T)) // GB7 + return false; + + if ((__prev == __property::__LVT || __prev == __property::__T) && __next == __property::__T) // GB8 + return false; + + // *** Do not break before extending characters or ZWJ. *** + if (__next == __property::__Extend || __next == __property::__ZWJ) + return false; // GB9 + + // *** Do not break before SpacingMarks, or after Prepend characters. *** + if (__next == __property::__SpacingMark) // GB9a + return false; + + if (__prev == __property::__Prepend) // GB9b + return false; + + // *** Do not break within emoji modifier sequences or emoji zwj sequences. *** + + // GB11 \p{Extended_Pictographic} Extend* ZWJ x \p{Extended_Pictographic} + // + // Note that several parts of this rule are matched by GB9: Any x (Extend | ZWJ) + // - \p{Extended_Pictographic} x Extend + // - Extend x Extend + // - \p{Extended_Pictographic} x ZWJ + // - Extend x ZWJ + // + // So the only case left to test is + // - \p{Extended_Pictographic}' x ZWJ x \p{Extended_Pictographic} + // where \p{Extended_Pictographic}' is stored in __has_extened_pictographic + if (__has_extened_pictographic && __prev == __property::__ZWJ && __next == __property::__Extended_Pictographic) + return false; + + // *** Do not break within emoji flag sequences *** + + // That is, do not break between regional indicator (RI) symbols if there + // is an odd number of RI characters before the break point. + + if (__prev == __property::__Regional_Indicator && __next == __property::__Regional_Indicator) { // GB12 + GB13 + __ri_break_allowed = !__ri_break_allowed; + if (__ri_break_allowed) + return true; + + return false; + } + + // *** Otherwise, break everywhere. *** + return true; // GB999 +} + +/// Helper class to extract an extended grapheme cluster from a Unicode character range. +/// +/// This function is used to determine the column width of an extended grapheme +/// cluster. In order to do that only the first code point is evaluated. +/// Therefore only this code point is extracted. +template <class _CharT> +class __extended_grapheme_cluster_view { +public: + _LIBCPP_HIDE_FROM_ABI constexpr explicit __extended_grapheme_cluster_view(const _CharT* __first, const _CharT* __last) + : __code_point_view_(__first, __last), + __next_code_point_(__code_point_view_.__consume()), + __next_prop_(__extended_grapheme_custer_property_boundary::__get_property(__next_code_point_)) {} + + struct __cluster { + /// The first code point of the extended grapheme cluster. + /// + /// The first code point is used to estimate the width of the extended + /// grapheme cluster. + char32_t __code_point_; + + /// Points one beyond the last code unit in the extended grapheme cluster. + /// + /// It's expected the caller has the start position and thus can determine + /// the code unit range of the extended grapheme cluster. + const _CharT* __last_; + }; + + _LIBCPP_HIDE_FROM_ABI constexpr __cluster __consume() { + _LIBCPP_ASSERT( + __next_prop_ != __extended_grapheme_custer_property_boundary::__property::__eot, + "can't move beyond the end of input"); + char32_t __code_point = __next_code_point_; + if (!__code_point_view_.__at_end()) + return {__code_point, __get_break()}; + + __next_prop_ = __extended_grapheme_custer_property_boundary::__property::__eot; + return {__code_point, __code_point_view_.__position()}; + } + +private: + __code_point_view<_CharT> __code_point_view_; + + char32_t __next_code_point_; + __extended_grapheme_custer_property_boundary::__property __next_prop_; + + _LIBCPP_HIDE_FROM_ABI constexpr const _CharT* __get_break() { + bool __ri_break_allowed = true; + bool __has_extened_pictographic = false; + while (true) { + const _CharT* __result = __code_point_view_.__position(); + __extended_grapheme_custer_property_boundary::__property __prev = __next_prop_; + if (__code_point_view_.__at_end()) { + __next_prop_ = __extended_grapheme_custer_property_boundary::__property::__eot; + return __result; + } + __next_code_point_ = __code_point_view_.__consume(); + __next_prop_ = __extended_grapheme_custer_property_boundary::__get_property(__next_code_point_); + + __has_extened_pictographic |= + __prev == __extended_grapheme_custer_property_boundary::__property::__Extended_Pictographic; + + if (__at_extended_grapheme_cluster_break(__ri_break_allowed, __has_extened_pictographic, __prev, __next_prop_)) + return __result; + } + } +}; + +} // namespace __unicode + +# endif // _LIBCPP_HAS_NO_UNICODE + +#endif //_LIBCPP_STD_VER > 17 + +_LIBCPP_END_NAMESPACE_STD + +#endif // _LIBCPP___FORMAT_UNICODE_H diff --git a/libcxx/include/__iterator/reverse_iterator.h b/libcxx/include/__iterator/reverse_iterator.h index a915609dbe33..7f4ef3c3d503 100644 --- a/libcxx/include/__iterator/reverse_iterator.h +++ b/libcxx/include/__iterator/reverse_iterator.h @@ -332,41 +332,16 @@ using _ReverseWrapper = reverse_iterator<reverse_iterator<_Iter> >; template <class _Iter, bool __b> struct __unwrap_iter_impl<_ReverseWrapper<_Iter>, __b> { - static _LIBCPP_CONSTEXPR decltype(std::__unwrap_iter(std::declval<_Iter>())) - __apply(_ReverseWrapper<_Iter> __i) _NOEXCEPT { - return std::__unwrap_iter(__i.base().base()); - } -}; - -template <class _OrigIter, class _UnwrappedIter> -struct __rewrap_iter_impl<_ReverseWrapper<_OrigIter>, _UnwrappedIter> { - template <class _Iter> - struct _ReverseWrapperCount { - static _LIBCPP_CONSTEXPR const size_t value = 1; - }; - - template <class _Iter> - struct _ReverseWrapperCount<_ReverseWrapper<_Iter> > { - static _LIBCPP_CONSTEXPR const size_t value = 1 + _ReverseWrapperCount<_Iter>::value; - }; - - template <size_t _RewrapCount, class _OIter, class _UIter, __enable_if_t<_RewrapCount != 0, int> = 0> - _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR _ReverseWrapper<_OIter> __rewrap(_ReverseWrapper<_OIter> __iter1, - _UIter __iter2) { - return _ReverseWrapper<_OIter>( - reverse_iterator<_OIter>(__rewrap<_RewrapCount - 1>(__iter1.base().base(), __iter2))); - } + using _UnwrappedIter = decltype(__unwrap_iter_impl<_Iter>::__unwrap(std::declval<_Iter>())); - template <size_t _RewrapCount, class _OIter, class _UIter, __enable_if_t<_RewrapCount == 0, int> = 0> - _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR decltype(std::__rewrap_iter(std::declval<_OIter>(), - std::declval<_UIter>())) - __rewrap(_OIter __iter1, _UIter __iter2) { - return std::__rewrap_iter(__iter1, __iter2); + static _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR _ReverseWrapper<_Iter> + __rewrap(_ReverseWrapper<_Iter> __orig_iter, _UnwrappedIter __unwrapped_iter) { + return _ReverseWrapper<_Iter>( + reverse_iterator<_Iter>(__unwrap_iter_impl<_Iter>::__rewrap(__orig_iter.base().base(), __unwrapped_iter))); } - _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR _ReverseWrapper<_OrigIter> __apply(_ReverseWrapper<_OrigIter> __iter1, - _UnwrappedIter __iter2) { - return __rewrap<_ReverseWrapperCount<_OrigIter>::value>(__iter1, __iter2); + static _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR _UnwrappedIter __unwrap(_ReverseWrapper<_Iter> __i) _NOEXCEPT { + return __unwrap_iter_impl<_Iter>::__unwrap(__i.base().base()); } }; diff --git a/libcxx/include/__locale b/libcxx/include/__locale index 4450123db194..40f9a3ff57c2 100644 --- a/libcxx/include/__locale +++ b/libcxx/include/__locale @@ -33,7 +33,7 @@ # include <__support/newlib/xlocale.h> #elif defined(__OpenBSD__) # include <__support/openbsd/xlocale.h> -#elif (defined(__APPLE__) || defined(__FreeBSD__) || defined(__IBMCPP__)) +#elif (defined(__APPLE__) || defined(__FreeBSD__)) # include <xlocale.h> #elif defined(__Fuchsia__) # include <__support/fuchsia/xlocale.h> @@ -492,7 +492,11 @@ public: static const mask punct = _ISPUNCT; static const mask xdigit = _ISXDIGIT; static const mask blank = _ISBLANK; +# if defined(_AIX) + static const mask __regex_word = 0x8000; +# else static const mask __regex_word = 0x80; +# endif #elif defined(_NEWLIB_VERSION) // Same type as Newlib's _ctype_ array in newlib/libc/include/ctype.h. typedef char mask; @@ -545,11 +549,8 @@ public: _LIBCPP_INLINE_VISIBILITY ctype_base() {} -// TODO: Remove the ifndef when the assert no longer fails on AIX. -#ifndef _AIX static_assert((__regex_word & ~(space | print | cntrl | upper | lower | alpha | digit | punct | xdigit | blank)) == __regex_word, "__regex_word can't overlap other bits"); -#endif }; template <class _CharT> class _LIBCPP_TEMPLATE_VIS ctype; diff --git a/libcxx/include/__random/binomial_distribution.h b/libcxx/include/__random/binomial_distribution.h index d0e8f3034939..af60fa2a38e0 100644 --- a/libcxx/include/__random/binomial_distribution.h +++ b/libcxx/include/__random/binomial_distribution.h @@ -27,7 +27,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD template<class _IntType = int> class _LIBCPP_TEMPLATE_VIS binomial_distribution { - static_assert(__libcpp_random_is_valid_inttype<_IntType>::value, "IntType must be an integer type larger than char"); + static_assert(__libcpp_random_is_valid_inttype<_IntType>::value, "IntType must be a supported integer type"); public: // types typedef _IntType result_type; diff --git a/libcxx/include/__random/discrete_distribution.h b/libcxx/include/__random/discrete_distribution.h index d899e72d87f9..8dc63c0e98b4 100644 --- a/libcxx/include/__random/discrete_distribution.h +++ b/libcxx/include/__random/discrete_distribution.h @@ -30,7 +30,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD template<class _IntType = int> class _LIBCPP_TEMPLATE_VIS discrete_distribution { - static_assert(__libcpp_random_is_valid_inttype<_IntType>::value, "IntType must be an integer type larger than char"); + static_assert(__libcpp_random_is_valid_inttype<_IntType>::value, "IntType must be a supported integer type"); public: // types typedef _IntType result_type; diff --git a/libcxx/include/__random/geometric_distribution.h b/libcxx/include/__random/geometric_distribution.h index 8e1be522e0e3..751cf7860e66 100644 --- a/libcxx/include/__random/geometric_distribution.h +++ b/libcxx/include/__random/geometric_distribution.h @@ -27,7 +27,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD template<class _IntType = int> class _LIBCPP_TEMPLATE_VIS geometric_distribution { - static_assert(__libcpp_random_is_valid_inttype<_IntType>::value, "IntType must be an integer type larger than char"); + static_assert(__libcpp_random_is_valid_inttype<_IntType>::value, "IntType must be a supported integer type"); public: // types typedef _IntType result_type; diff --git a/libcxx/include/__random/is_valid.h b/libcxx/include/__random/is_valid.h index d41bfa45ea70..be3b61b8dc01 100644 --- a/libcxx/include/__random/is_valid.h +++ b/libcxx/include/__random/is_valid.h @@ -10,6 +10,7 @@ #define _LIBCPP___RANDOM_IS_VALID_H #include <__config> +#include <cstdint> #include <type_traits> #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) @@ -25,18 +26,20 @@ _LIBCPP_BEGIN_NAMESPACE_STD // unsigned int, unsigned long, or unsigned long long. template<class> struct __libcpp_random_is_valid_inttype : false_type {}; +template<> struct __libcpp_random_is_valid_inttype<int8_t> : true_type {}; // extension template<> struct __libcpp_random_is_valid_inttype<short> : true_type {}; template<> struct __libcpp_random_is_valid_inttype<int> : true_type {}; template<> struct __libcpp_random_is_valid_inttype<long> : true_type {}; template<> struct __libcpp_random_is_valid_inttype<long long> : true_type {}; +template<> struct __libcpp_random_is_valid_inttype<uint8_t> : true_type {}; // extension template<> struct __libcpp_random_is_valid_inttype<unsigned short> : true_type {}; template<> struct __libcpp_random_is_valid_inttype<unsigned int> : true_type {}; template<> struct __libcpp_random_is_valid_inttype<unsigned long> : true_type {}; template<> struct __libcpp_random_is_valid_inttype<unsigned long long> : true_type {}; #ifndef _LIBCPP_HAS_NO_INT128 -template<> struct __libcpp_random_is_valid_inttype<__int128_t> : true_type {}; -template<> struct __libcpp_random_is_valid_inttype<__uint128_t> : true_type {}; +template<> struct __libcpp_random_is_valid_inttype<__int128_t> : true_type {}; // extension +template<> struct __libcpp_random_is_valid_inttype<__uint128_t> : true_type {}; // extension #endif // _LIBCPP_HAS_NO_INT128 // [rand.req.urng]/3: diff --git a/libcxx/include/__random/negative_binomial_distribution.h b/libcxx/include/__random/negative_binomial_distribution.h index 72ce88ea74ba..90d3f0178914 100644 --- a/libcxx/include/__random/negative_binomial_distribution.h +++ b/libcxx/include/__random/negative_binomial_distribution.h @@ -29,7 +29,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD template<class _IntType = int> class _LIBCPP_TEMPLATE_VIS negative_binomial_distribution { - static_assert(__libcpp_random_is_valid_inttype<_IntType>::value, "IntType must be an integer type larger than char"); + static_assert(__libcpp_random_is_valid_inttype<_IntType>::value, "IntType must be a supported integer type"); public: // types typedef _IntType result_type; @@ -121,7 +121,9 @@ negative_binomial_distribution<_IntType>::operator()(_URNG& __urng, const param_ static_assert(__libcpp_random_is_valid_urng<_URNG>::value, ""); result_type __k = __pr.k(); double __p = __pr.p(); - if (__k <= 21 * __p) + // When the number of bits in _IntType is small, we are too likely to + // overflow __f below to use this technique. + if (__k <= 21 * __p && sizeof(_IntType) > 1) { bernoulli_distribution __gen(__p); result_type __f = 0; @@ -133,6 +135,8 @@ negative_binomial_distribution<_IntType>::operator()(_URNG& __urng, const param_ else ++__f; } + _LIBCPP_ASSERT(__f >= 0, "std::negative_binomial_distribution should never produce negative values. " + "This is almost certainly a signed integer overflow issue on __f."); return __f; } return poisson_distribution<result_type>(gamma_distribution<double> diff --git a/libcxx/include/__random/poisson_distribution.h b/libcxx/include/__random/poisson_distribution.h index 7730923ad6ca..ef55b1be4844 100644 --- a/libcxx/include/__random/poisson_distribution.h +++ b/libcxx/include/__random/poisson_distribution.h @@ -31,7 +31,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD template<class _IntType = int> class _LIBCPP_TEMPLATE_VIS poisson_distribution { - static_assert(__libcpp_random_is_valid_inttype<_IntType>::value, "IntType must be an integer type larger than char"); + static_assert(__libcpp_random_is_valid_inttype<_IntType>::value, "IntType must be a supported integer type"); public: // types typedef _IntType result_type; diff --git a/libcxx/include/__random/uniform_int_distribution.h b/libcxx/include/__random/uniform_int_distribution.h index dd0a7e4e4982..46b627f45da9 100644 --- a/libcxx/include/__random/uniform_int_distribution.h +++ b/libcxx/include/__random/uniform_int_distribution.h @@ -159,7 +159,7 @@ __independent_bits_engine<_Engine, _UIntType>::__eval(true_type) template<class _IntType = int> class uniform_int_distribution { - static_assert(__libcpp_random_is_valid_inttype<_IntType>::value, "IntType must be an integer type larger than char"); + static_assert(__libcpp_random_is_valid_inttype<_IntType>::value, "IntType must be a supported integer type"); public: // types typedef _IntType result_type; diff --git a/libcxx/include/algorithm b/libcxx/include/algorithm index f616a031960e..5958ad1a95af 100644 --- a/libcxx/include/algorithm +++ b/libcxx/include/algorithm @@ -360,6 +360,17 @@ namespace ranges { borrowed_iterator_t<R> ranges::stable_sort(R&& r, Comp comp = {}, Proj proj = {}); // since C++20 + template<random_access_iterator I, sentinel_for<I> S, class Comp = ranges::less, + class Proj = identity> + requires sortable<I, Comp, Proj> + constexpr I + ranges::partial_sort(I first, I middle, S last, Comp comp = {}, Proj proj = {}); // since C++20 + + template<random_access_range R, class Comp = ranges::less, class Proj = identity> + requires sortable<iterator_t<R>, Comp, Proj> + constexpr borrowed_iterator_t<R> + ranges::partial_sort(R&& r, iterator_t<R> middle, Comp comp = {}, Proj proj = {}); // since C++20 + template<class T, output_iterator<const T&> O, sentinel_for<O> S> constexpr O ranges::fill(O first, S last, const T& value); // since C++20 @@ -464,6 +475,28 @@ namespace ranges { ranges::less> constexpr bool binary_search(R&& r, const T& value, Comp comp = {}, Proj proj = {}); // since C++20 + + template<permutable I, sentinel_for<I> S, class Proj = identity, + indirect_unary_predicate<projected<I, Proj>> Pred> + constexpr subrange<I> + partition(I first, S last, Pred pred, Proj proj = {}); // Since C++20 + + template<forward_range R, class Proj = identity, + indirect_unary_predicate<projected<iterator_t<R>, Proj>> Pred> + requires permutable<iterator_t<R>> + constexpr borrowed_subrange_t<R> + partition(R&& r, Pred pred, Proj proj = {}); // Since C++20 + + template<bidirectional_iterator I, sentinel_for<I> S, class Proj = identity, + indirect_unary_predicate<projected<I, Proj>> Pred> + requires permutable<I> + subrange<I> stable_partition(I first, S last, Pred pred, Proj proj = {}); // Since C++20 + + template<bidirectional_range R, class Proj = identity, + indirect_unary_predicate<projected<iterator_t<R>, Proj>> Pred> + requires permutable<iterator_t<R>> + borrowed_subrange_t<R> stable_partition(R&& r, Pred pred, Proj proj = {}); // Since C++20 + template<input_iterator I1, sentinel_for<I1> S1, forward_iterator I2, sentinel_for<I2> S2, class Pred = ranges::equal_to, class Proj1 = identity, class Proj2 = identity> requires indirectly_comparable<I1, I2, Pred, Proj1, Proj2> @@ -548,6 +581,34 @@ namespace ranges { constexpr ranges::move_result<borrowed_iterator_t<R>, O> ranges::move(R&& r, O result); // since C++20 + template<class I, class O1, class O2> + using partition_copy_result = in_out_out_result<I, O1, O2>; // since C++20 + + template<input_iterator I, sentinel_for<I> S, + weakly_incrementable O1, weakly_incrementable O2, + class Proj = identity, indirect_unary_predicate<projected<I, Proj>> Pred> + requires indirectly_copyable<I, O1> && indirectly_copyable<I, O2> + constexpr partition_copy_result<I, O1, O2> + partition_copy(I first, S last, O1 out_true, O2 out_false, Pred pred, + Proj proj = {}); // Since C++20 + + template<input_range R, weakly_incrementable O1, weakly_incrementable O2, + class Proj = identity, + indirect_unary_predicate<projected<iterator_t<R>, Proj>> Pred> + requires indirectly_copyable<iterator_t<R>, O1> && + indirectly_copyable<iterator_t<R>, O2> + constexpr partition_copy_result<borrowed_iterator_t<R>, O1, O2> + partition_copy(R&& r, O1 out_true, O2 out_false, Pred pred, Proj proj = {}); // Since C++20 + + template<forward_iterator I, sentinel_for<I> S, class Proj = identity, + indirect_unary_predicate<projected<I, Proj>> Pred> + constexpr I partition_point(I first, S last, Pred pred, Proj proj = {}); // Since C++20 + + template<forward_range R, class Proj = identity, + indirect_unary_predicate<projected<iterator_t<R>, Proj>> Pred> + constexpr borrowed_iterator_t<R> + partition_point(R&& r, Pred pred, Proj proj = {}); // Since C++20 + template<class I1, class I2, class O> using merge_result = in_in_out_result<I1, I2, O>; // since C++20 @@ -649,6 +710,16 @@ namespace ranges { constexpr ranges::rotate_copy_result<borrowed_iterator_t<R>, O> ranges::rotate_copy(R&& r, iterator_t<R> middle, O result); // since C++20 + template<random_access_iterator I, sentinel_for<I> S, class Gen> + requires permutable<I> && + uniform_random_bit_generator<remove_reference_t<Gen>> + I shuffle(I first, S last, Gen&& g); // Since C++20 + + template<random_access_range R, class Gen> + requires permutable<iterator_t<R>> && + uniform_random_bit_generator<remove_reference_t<Gen>> + borrowed_iterator_t<R> shuffle(R&& r, Gen&& g); // Since C++20 + template<forward_iterator I1, sentinel_for<I1> S1, forward_iterator I2, sentinel_for<I2> S2, class Pred = ranges::equal_to, class Proj1 = identity, class Proj2 = identity> @@ -711,7 +782,49 @@ namespace ranges { borrowed_iterator_t<R2>, O> set_symmetric_difference(R1&& r1, R2&& r2, O result, Comp comp = {}, Proj1 proj1 = {}, Proj2 proj2 = {}); // since C++20 + + template<forward_iterator I, sentinel_for<I> S, class T, class Proj = identity, + indirect_strict_weak_order<const T*, projected<I, Proj>> Comp = ranges::less> + constexpr subrange<I> + equal_range(I first, S last, const T& value, Comp comp = {}, Proj proj = {}); // since C++20 + + template<forward_range R, class T, class Proj = identity, + indirect_strict_weak_order<const T*, projected<iterator_t<R>, Proj>> Comp = + ranges::less> + constexpr borrowed_subrange_t<R> + equal_range(R&& r, const T& value, Comp comp = {}, Proj proj = {}); // since C++20 + + template<class I1, class I2, class O> + using set_union_result = in_in_out_result<I1, I2, O>; // since C++20 + + template<input_iterator I1, sentinel_for<I1> S1, input_iterator I2, sentinel_for<I2> S2, + weakly_incrementable O, class Comp = ranges::less, + class Proj1 = identity, class Proj2 = identity> + requires mergeable<I1, I2, O, Comp, Proj1, Proj2> + constexpr set_union_result<I1, I2, O> + set_union(I1 first1, S1 last1, I2 first2, S2 last2, O result, Comp comp = {}, + Proj1 proj1 = {}, Proj2 proj2 = {}); // since C++20 + + template<input_range R1, input_range R2, weakly_incrementable O, + class Comp = ranges::less, class Proj1 = identity, class Proj2 = identity> + requires mergeable<iterator_t<R1>, iterator_t<R2>, O, Comp, Proj1, Proj2> + constexpr set_union_result<borrowed_iterator_t<R1>, borrowed_iterator_t<R2>, O> + set_union(R1&& r1, R2&& r2, O result, Comp comp = {}, + Proj1 proj1 = {}, Proj2 proj2 = {}); // since C++20 + + template<input_iterator I1, sentinel_for<I1> S1, input_iterator I2, sentinel_for<I2> S2, + class Proj1 = identity, class Proj2 = identity, + indirect_strict_weak_order<projected<I1, Proj1>, projected<I2, Proj2>> Comp = + ranges::less> + constexpr bool includes(I1 first1, S1 last1, I2 first2, S2 last2, Comp comp = {}, + Proj1 proj1 = {}, Proj2 proj2 = {}); // Since C++20 + template<input_range R1, input_range R2, class Proj1 = identity, + class Proj2 = identity, + indirect_strict_weak_order<projected<iterator_t<R1>, Proj1>, + projected<iterator_t<R2>, Proj2>> Comp = ranges::less> + constexpr bool includes(R1&& r1, R2&& r2, Comp comp = {}, + Proj1 proj1 = {}, Proj2 proj2 = {}); // Since C++20 } constexpr bool // constexpr in C++20 @@ -1452,6 +1565,7 @@ template <class BidirectionalIterator, class Compare> #include <__algorithm/ranges_count.h> #include <__algorithm/ranges_count_if.h> #include <__algorithm/ranges_equal.h> +#include <__algorithm/ranges_equal_range.h> #include <__algorithm/ranges_fill.h> #include <__algorithm/ranges_fill_n.h> #include <__algorithm/ranges_find.h> @@ -1461,6 +1575,7 @@ template <class BidirectionalIterator, class Compare> #include <__algorithm/ranges_find_if_not.h> #include <__algorithm/ranges_for_each.h> #include <__algorithm/ranges_for_each_n.h> +#include <__algorithm/ranges_includes.h> #include <__algorithm/ranges_is_partitioned.h> #include <__algorithm/ranges_is_sorted.h> #include <__algorithm/ranges_is_sorted_until.h> @@ -1479,6 +1594,10 @@ template <class BidirectionalIterator, class Compare> #include <__algorithm/ranges_move_backward.h> #include <__algorithm/ranges_none_of.h> #include <__algorithm/ranges_nth_element.h> +#include <__algorithm/ranges_partial_sort.h> +#include <__algorithm/ranges_partition.h> +#include <__algorithm/ranges_partition_copy.h> +#include <__algorithm/ranges_partition_point.h> #include <__algorithm/ranges_pop_heap.h> #include <__algorithm/ranges_push_heap.h> #include <__algorithm/ranges_remove.h> @@ -1493,8 +1612,11 @@ template <class BidirectionalIterator, class Compare> #include <__algorithm/ranges_set_difference.h> #include <__algorithm/ranges_set_intersection.h> #include <__algorithm/ranges_set_symmetric_difference.h> +#include <__algorithm/ranges_set_union.h> +#include <__algorithm/ranges_shuffle.h> #include <__algorithm/ranges_sort.h> #include <__algorithm/ranges_sort_heap.h> +#include <__algorithm/ranges_stable_partition.h> #include <__algorithm/ranges_stable_sort.h> #include <__algorithm/ranges_swap_ranges.h> #include <__algorithm/ranges_transform.h> diff --git a/libcxx/include/bit b/libcxx/include/bit index fe1bcadc818a..15bc13a504b1 100644 --- a/libcxx/include/bit +++ b/libcxx/include/bit @@ -75,9 +75,6 @@ namespace std { # include <iosfwd> #endif -#if defined(__IBMCPP__) -# include "__support/ibm/support.h" -#endif #if defined(_LIBCPP_COMPILER_MSVC) # include <intrin.h> #endif diff --git a/libcxx/include/format b/libcxx/include/format index 60197d24523f..d2ec8fc23363 100644 --- a/libcxx/include/format +++ b/libcxx/include/format @@ -131,7 +131,7 @@ namespace std { #include <__assert> // all public C++ headers provide the assertion handler // Make sure all feature-test macros are available. #include <version> -// Enable the contents of the header only when libc++ was built with LIBCXX_ENABLE_INCOMPLETE_FEATURES. +// Enable the contents of the header only when libc++ was built with experimental features enabled. #if !defined(_LIBCPP_HAS_NO_INCOMPLETE_FORMAT) #include <__algorithm/clamp.h> @@ -157,6 +157,7 @@ namespace std { #include <__format/formatter_pointer.h> #include <__format/formatter_string.h> #include <__format/parser_std_format_spec.h> +#include <__format/unicode.h> #include <__iterator/back_insert_iterator.h> #include <__iterator/incrementable_traits.h> #include <__variant/monostate.h> diff --git a/libcxx/include/limits b/libcxx/include/limits index 35e4d85734de..1fa3a8228fd8 100644 --- a/libcxx/include/limits +++ b/libcxx/include/limits @@ -110,10 +110,6 @@ template<> class numeric_limits<cv long double>; #include "__support/win32/limits_msvc_win32.h" #endif // _LIBCPP_MSVCRT -#if defined(__IBMCPP__) -#include "__support/ibm/limits.h" -#endif // __IBMCPP__ - #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) # pragma GCC system_header #endif diff --git a/libcxx/include/module.modulemap.in b/libcxx/include/module.modulemap.in index 25e1c6e6d0e2..cbf0b4f7f16d 100644 --- a/libcxx/include/module.modulemap.in +++ b/libcxx/include/module.modulemap.in @@ -337,6 +337,7 @@ module std [system] { module ranges_is_partitioned { private header "__algorithm/ranges_is_partitioned.h" } module ranges_is_sorted { private header "__algorithm/ranges_is_sorted.h" } module ranges_is_sorted_until { private header "__algorithm/ranges_is_sorted_until.h" } + module ranges_iterator_concept { private header "__algorithm/ranges_iterator_concept.h" } module ranges_lexicographical_compare { private header "__algorithm/ranges_lexicographical_compare.h" } module ranges_lower_bound { private header "__algorithm/ranges_lower_bound.h" } module ranges_make_heap { private header "__algorithm/ranges_make_heap.h" } @@ -352,6 +353,7 @@ module std [system] { module ranges_move_backward { private header "__algorithm/ranges_move_backward.h" } module ranges_none_of { private header "__algorithm/ranges_none_of.h" } module ranges_nth_element { private header "__algorithm/ranges_nth_element.h" } + module ranges_partial_sort { private header "__algorithm/ranges_partial_sort.h" } module ranges_partial_sort_copy { private header "__algorithm/ranges_partial_sort_copy.h" } module ranges_partition { private header "__algorithm/ranges_partition.h" } module ranges_partition_copy { private header "__algorithm/ranges_partition_copy.h" } @@ -614,32 +616,34 @@ module std [system] { export * module __format { - module buffer { private header "__format/buffer.h" } - module concepts { private header "__format/concepts.h" } - module enable_insertable { private header "__format/enable_insertable.h" } - module format_arg { private header "__format/format_arg.h" } - module format_arg_store { private header "__format/format_arg_store.h" } - module format_args { private header "__format/format_args.h" } + module buffer { private header "__format/buffer.h" } + module concepts { private header "__format/concepts.h" } + module enable_insertable { private header "__format/enable_insertable.h" } + module extended_grapheme_cluster_table { private header "__format/extended_grapheme_cluster_table.h" } + module format_arg { private header "__format/format_arg.h" } + module format_arg_store { private header "__format/format_arg_store.h" } + module format_args { private header "__format/format_args.h" } module format_context { private header "__format/format_context.h" export optional export locale } - module format_error { private header "__format/format_error.h" } - module format_fwd { private header "__format/format_fwd.h" } - module format_parse_context { private header "__format/format_parse_context.h" } - module format_string { private header "__format/format_string.h" } - module format_to_n_result { private header "__format/format_to_n_result.h" } - module formatter { private header "__format/formatter.h" } - module formatter_bool { private header "__format/formatter_bool.h" } - module formatter_char { private header "__format/formatter_char.h" } - module formatter_floating_point { private header "__format/formatter_floating_point.h" } - module formatter_integer { private header "__format/formatter_integer.h" } - module formatter_integral { private header "__format/formatter_integral.h" } - module formatter_output { private header "__format/formatter_output.h" } - module formatter_pointer { private header "__format/formatter_pointer.h" } - module formatter_string { private header "__format/formatter_string.h" } - module parser_std_format_spec { private header "__format/parser_std_format_spec.h" } + module format_error { private header "__format/format_error.h" } + module format_fwd { private header "__format/format_fwd.h" } + module format_parse_context { private header "__format/format_parse_context.h" } + module format_string { private header "__format/format_string.h" } + module format_to_n_result { private header "__format/format_to_n_result.h" } + module formatter { private header "__format/formatter.h" } + module formatter_bool { private header "__format/formatter_bool.h" } + module formatter_char { private header "__format/formatter_char.h" } + module formatter_floating_point { private header "__format/formatter_floating_point.h" } + module formatter_integer { private header "__format/formatter_integer.h" } + module formatter_integral { private header "__format/formatter_integral.h" } + module formatter_output { private header "__format/formatter_output.h" } + module formatter_pointer { private header "__format/formatter_pointer.h" } + module formatter_string { private header "__format/formatter_string.h" } + module parser_std_format_spec { private header "__format/parser_std_format_spec.h" } + module unicode { private header "__format/unicode.h" } } } module forward_list { diff --git a/libcxx/include/ostream b/libcxx/include/ostream index 283774585b92..14b49d78cb03 100644 --- a/libcxx/include/ostream +++ b/libcxx/include/ostream @@ -130,6 +130,35 @@ template <class charT, class traits> template <class Stream, class T> Stream&& operator<<(Stream&& os, const T& x); +template<class traits> +basic_ostream<char, traits>& operator<<(basic_ostream<char, traits>&, wchar_t) = delete; // since C++20 +template<class traits> +basic_ostream<char, traits>& operator<<(basic_ostream<char, traits>&, char8_t) = delete; // since C++20 +template<class traits> +basic_ostream<char, traits>& operator<<(basic_ostream<char, traits>&, char16_t) = delete; // since C++20 +template<class traits> +basic_ostream<char, traits>& operator<<(basic_ostream<char, traits>&, char32_t) = delete; // since C++20 +template<class traits> +basic_ostream<wchar_t, traits>& operator<<(basic_ostream<wchar_t, traits>&, char8_t) = delete; // since C++20 +template<class traits> +basic_ostream<wchar_t, traits>& operator<<(basic_ostream<wchar_t, traits>&, char16_t) = delete; // since C++20 +template<class traits> +basic_ostream<wchar_t, traits>& operator<<(basic_ostream<wchar_t, traits>&, char32_t) = delete; // since C++20 +template<class traits> +basic_ostream<char, traits>& operator<<(basic_ostream<char, traits>&, const wchar_t*) = delete; // since C++20 +template<class traits> +basic_ostream<char, traits>& operator<<(basic_ostream<char, traits>&, const char8_t*) = delete; // since C++20 +template<class traits> +basic_ostream<char, traits>& operator<<(basic_ostream<char, traits>&, const char16_t*) = delete; // since C++20 +template<class traits> +basic_ostream<char, traits>& operator<<(basic_ostream<char, traits>&, const char32_t*) = delete; // since C++20 +template<class traits> +basic_ostream<wchar_t, traits>& operator<<(basic_ostream<wchar_t, traits>&, const char8_t*) = delete; // since C++20 +template<class traits> +basic_ostream<wchar_t, traits>& operator<<(basic_ostream<wchar_t, traits>&, const char16_t*) = delete; // since C++20 +template<class traits> +basic_ostream<wchar_t, traits>& operator<<(basic_ostream<wchar_t, traits>&, const char32_t*) = delete; // since C++20 + } // std */ @@ -225,9 +254,13 @@ public: basic_ostream& operator<<(basic_streambuf<char_type, traits_type>* __sb); +#if _LIBCPP_STD_VER > 14 +// LWG 2221 - nullptr. This is not backported to older standards modes. +// See https://reviews.llvm.org/D127033 for more info on the rationale. _LIBCPP_INLINE_VISIBILITY basic_ostream& operator<<(nullptr_t) { return *this << "nullptr"; } +#endif // 27.7.2.7 Unformatted output: basic_ostream& put(char_type __c); @@ -1098,6 +1131,57 @@ operator<<(basic_ostream<_CharT, _Traits>& __os, const bitset<_Size>& __x) use_facet<ctype<_CharT> >(__os.getloc()).widen('1')); } +#if _LIBCPP_STD_VER > 17 + +#ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS +template <class _Traits> +basic_ostream<char, _Traits>& operator<<(basic_ostream<char, _Traits>&, wchar_t) = delete; + +template <class _Traits> +basic_ostream<char, _Traits>& operator<<(basic_ostream<char, _Traits>&, const wchar_t*) = delete; + +template <class _Traits> +basic_ostream<wchar_t, _Traits>& operator<<(basic_ostream<wchar_t, _Traits>&, char16_t) = delete; + +template <class _Traits> +basic_ostream<wchar_t, _Traits>& operator<<(basic_ostream<wchar_t, _Traits>&, char32_t) = delete; + +template <class _Traits> +basic_ostream<wchar_t, _Traits>& operator<<(basic_ostream<wchar_t, _Traits>&, const char16_t*) = delete; + +template <class _Traits> +basic_ostream<wchar_t, _Traits>& operator<<(basic_ostream<wchar_t, _Traits>&, const char32_t*) = delete; + +#endif // _LIBCPP_HAS_NO_WIDE_CHARACTERS + +#ifndef _LIBCPP_HAS_NO_CHAR8_T +template <class _Traits> +basic_ostream<char, _Traits>& operator<<(basic_ostream<char, _Traits>&, char8_t) = delete; + +template <class _Traits> +basic_ostream<wchar_t, _Traits>& operator<<(basic_ostream<wchar_t, _Traits>&, char8_t) = delete; + +template <class _Traits> +basic_ostream<char, _Traits>& operator<<(basic_ostream<char, _Traits>&, const char8_t*) = delete; + +template <class _Traits> +basic_ostream<wchar_t, _Traits>& operator<<(basic_ostream<wchar_t, _Traits>&, const char8_t*) = delete; +#endif + +template <class _Traits> +basic_ostream<char, _Traits>& operator<<(basic_ostream<char, _Traits>&, char16_t) = delete; + +template <class _Traits> +basic_ostream<char, _Traits>& operator<<(basic_ostream<char, _Traits>&, char32_t) = delete; + +template <class _Traits> +basic_ostream<char, _Traits>& operator<<(basic_ostream<char, _Traits>&, const char16_t*) = delete; + +template <class _Traits> +basic_ostream<char, _Traits>& operator<<(basic_ostream<char, _Traits>&, const char32_t*) = delete; + +#endif // _LIBCPP_STD_VER > 17 + extern template class _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS basic_ostream<char>; #ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS extern template class _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS basic_ostream<wchar_t>; diff --git a/libcxx/include/version b/libcxx/include/version index 2034e4b1f3d9..1df51fa1cb86 100644 --- a/libcxx/include/version +++ b/libcxx/include/version @@ -47,7 +47,7 @@ __cpp_lib_bounded_array_traits 201902L <type_traits> __cpp_lib_boyer_moore_searcher 201603L <functional> __cpp_lib_byte 201603L <cstddef> __cpp_lib_byteswap 202110L <bit> -__cpp_lib_char8_t 201811L <atomic> <filesystem> <istream> +__cpp_lib_char8_t 201907L <atomic> <filesystem> <istream> <limits> <locale> <ostream> <string> <string_view> __cpp_lib_chrono 201611L <chrono> @@ -308,7 +308,7 @@ __cpp_lib_void_t 201411L <type_traits> // # define __cpp_lib_bitops 201907L # define __cpp_lib_bounded_array_traits 201902L # if !defined(_LIBCPP_HAS_NO_CHAR8_T) -# define __cpp_lib_char8_t 201811L +# define __cpp_lib_char8_t 201907L # endif # define __cpp_lib_concepts 202002L # define __cpp_lib_constexpr_algorithms 201806L diff --git a/libunwind/src/UnwindCursor.hpp b/libunwind/src/UnwindCursor.hpp index e363f9e59c9a..b8bd9bc59010 100644 --- a/libunwind/src/UnwindCursor.hpp +++ b/libunwind/src/UnwindCursor.hpp @@ -2695,8 +2695,14 @@ bool UnwindCursor<A, R>::setInfoForSigReturn(Registers_s390x &) { // own restorer function, though, or user-mode QEMU might write a trampoline // onto the stack. const pint_t pc = static_cast<pint_t>(this->getReg(UNW_REG_IP)); - const uint16_t inst = _addressSpace.get16(pc); - if (inst == 0x0a77 || inst == 0x0aad) { + // The PC might contain an invalid address if the unwind info is bad, so + // directly accessing it could cause a segfault. Use process_vm_readv to + // read the memory safely instead. + uint16_t inst; + struct iovec local_iov = {&inst, sizeof inst}; + struct iovec remote_iov = {reinterpret_cast<void *>(pc), sizeof inst}; + long bytesRead = process_vm_readv(getpid(), &local_iov, 1, &remote_iov, 1, 0); + if (bytesRead == sizeof inst && (inst == 0x0a77 || inst == 0x0aad)) { _info = {}; _info.start_ip = pc; _info.end_ip = pc + 2; diff --git a/lld/COFF/Chunks.cpp b/lld/COFF/Chunks.cpp index 6cabb22d98cf..3cdbd6c0337b 100644 --- a/lld/COFF/Chunks.cpp +++ b/lld/COFF/Chunks.cpp @@ -13,6 +13,7 @@ #include "Symbols.h" #include "Writer.h" #include "llvm/ADT/Twine.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/BinaryFormat/COFF.h" #include "llvm/Object/COFF.h" #include "llvm/Support/Debug.h" @@ -815,7 +816,7 @@ void RVATableChunk::writeTo(uint8_t *buf) const { size_t cnt = 0; for (const ChunkAndOffset &co : syms) begin[cnt++] = co.inputChunk->getRVA() + co.offset; - std::sort(begin, begin + cnt); + llvm::sort(begin, begin + cnt); assert(std::unique(begin, begin + cnt) == begin + cnt && "RVA tables should be de-duplicated"); } diff --git a/lld/COFF/DLL.cpp b/lld/COFF/DLL.cpp index bfa2a6910e2b..42a5a41f87ae 100644 --- a/lld/COFF/DLL.cpp +++ b/lld/COFF/DLL.cpp @@ -21,6 +21,7 @@ #include "COFFLinkerContext.h" #include "Chunks.h" #include "SymbolTable.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/Object/COFF.h" #include "llvm/Support/Endian.h" #include "llvm/Support/Path.h" @@ -150,10 +151,9 @@ binImports(const std::vector<DefinedImportData *> &imports) { for (auto &kv : m) { // Sort symbols by name for each group. std::vector<DefinedImportData *> &syms = kv.second; - std::sort(syms.begin(), syms.end(), - [](DefinedImportData *a, DefinedImportData *b) { - return a->getName() < b->getName(); - }); + llvm::sort(syms, [](DefinedImportData *a, DefinedImportData *b) { + return a->getName() < b->getName(); + }); v.push_back(std::move(syms)); } return v; diff --git a/lld/COFF/DebugTypes.cpp b/lld/COFF/DebugTypes.cpp index 5878386aeb93..800b40f343aa 100644 --- a/lld/COFF/DebugTypes.cpp +++ b/lld/COFF/DebugTypes.cpp @@ -1126,9 +1126,8 @@ void TypeMerger::mergeTypesWithGHash() { } // In parallel, remap all types. - for_each(dependencySources, [&](TpiSource *source) { + for (TpiSource *source : dependencySources) source->remapTpiWithGHashes(&ghashState); - }); parallelForEach(objectSources, [&](TpiSource *source) { source->remapTpiWithGHashes(&ghashState); }); diff --git a/lld/COFF/DriverUtils.cpp b/lld/COFF/DriverUtils.cpp index ac0f1f972c79..29a2d0165839 100644 --- a/lld/COFF/DriverUtils.cpp +++ b/lld/COFF/DriverUtils.cpp @@ -18,6 +18,7 @@ #include "lld/Common/ErrorHandler.h" #include "lld/Common/Memory.h" #include "llvm/ADT/Optional.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/BinaryFormat/COFF.h" #include "llvm/Object/COFF.h" @@ -694,10 +695,9 @@ void fixupExports() { config->exports = std::move(v); // Sort by name. - std::sort(config->exports.begin(), config->exports.end(), - [](const Export &a, const Export &b) { - return a.exportName < b.exportName; - }); + llvm::sort(config->exports, [](const Export &a, const Export &b) { + return a.exportName < b.exportName; + }); } void assignExportOrdinals() { @@ -709,7 +709,7 @@ void assignExportOrdinals() { if (e.ordinal == 0) e.ordinal = ++max; if (max > std::numeric_limits<uint16_t>::max()) - fatal("too many exported symbols (max " + + fatal("too many exported symbols (got " + Twine(max) + ", max " + Twine(std::numeric_limits<uint16_t>::max()) + ")"); } diff --git a/lld/COFF/PDB.cpp b/lld/COFF/PDB.cpp index 2ceb4fb98031..87b6bb55d610 100644 --- a/lld/COFF/PDB.cpp +++ b/lld/COFF/PDB.cpp @@ -296,14 +296,14 @@ static void addGHashTypeInfo(COFFLinkerContext &ctx, // Start the TPI or IPI stream header. builder.getTpiBuilder().setVersionHeader(pdb::PdbTpiV80); builder.getIpiBuilder().setVersionHeader(pdb::PdbTpiV80); - for_each(ctx.tpiSourceList, [&](TpiSource *source) { + for (TpiSource *source : ctx.tpiSourceList) { builder.getTpiBuilder().addTypeRecords(source->mergedTpi.recs, source->mergedTpi.recSizes, source->mergedTpi.recHashes); builder.getIpiBuilder().addTypeRecords(source->mergedIpi.recs, source->mergedIpi.recSizes, source->mergedIpi.recHashes); - }); + } } static void @@ -1134,7 +1134,8 @@ void PDBLinker::addObjectsToPDB() { ScopedTimer t1(ctx.addObjectsTimer); // Create module descriptors - for_each(ctx.objFileInstances, [&](ObjFile *obj) { createModuleDBI(obj); }); + for (ObjFile *obj : ctx.objFileInstances) + createModuleDBI(obj); // Reorder dependency type sources to come first. tMerger.sortDependencies(); @@ -1144,9 +1145,10 @@ void PDBLinker::addObjectsToPDB() { tMerger.mergeTypesWithGHash(); // Merge dependencies and then regular objects. - for_each(tMerger.dependencySources, - [&](TpiSource *source) { addDebug(source); }); - for_each(tMerger.objectSources, [&](TpiSource *source) { addDebug(source); }); + for (TpiSource *source : tMerger.dependencySources) + addDebug(source); + for (TpiSource *source : tMerger.objectSources) + addDebug(source); builder.getStringTableBuilder().setStrings(pdbStrTab); t1.stop(); @@ -1163,10 +1165,10 @@ void PDBLinker::addObjectsToPDB() { t2.stop(); if (config->showSummary) { - for_each(ctx.tpiSourceList, [&](TpiSource *source) { + for (TpiSource *source : ctx.tpiSourceList) { nbTypeRecords += source->nbTypeRecords; nbTypeRecordsBytes += source->nbTypeRecordsBytes; - }); + } } } diff --git a/lld/ELF/Arch/RISCV.cpp b/lld/ELF/Arch/RISCV.cpp index c09bb2e60786..8fca1a686a79 100644 --- a/lld/ELF/Arch/RISCV.cpp +++ b/lld/ELF/Arch/RISCV.cpp @@ -57,6 +57,7 @@ enum Op { enum Reg { X_RA = 1, + X_TP = 4, X_T0 = 5, X_T1 = 6, X_T2 = 7, @@ -76,6 +77,19 @@ static uint32_t utype(uint32_t op, uint32_t rd, uint32_t imm) { return op | (rd << 7) | (imm << 12); } +// Extract bits v[begin:end], where range is inclusive, and begin must be < 63. +static uint32_t extractBits(uint64_t v, uint32_t begin, uint32_t end) { + return (v & ((1ULL << (begin + 1)) - 1)) >> end; +} + +static uint32_t setLO12_I(uint32_t insn, uint32_t imm) { + return (insn & 0xfffff) | (imm << 20); +} +static uint32_t setLO12_S(uint32_t insn, uint32_t imm) { + return (insn & 0x1fff07f) | (extractBits(imm, 11, 5) << 25) | + (extractBits(imm, 4, 0) << 7); +} + RISCV::RISCV() { copyRel = R_RISCV_COPY; pltRel = R_RISCV_JUMP_SLOT; @@ -270,10 +284,9 @@ RelExpr RISCV::getRelExpr(const RelType type, const Symbol &s, case R_RISCV_TPREL_LO12_I: case R_RISCV_TPREL_LO12_S: return R_TPREL; - case R_RISCV_TPREL_ADD: - return R_NONE; case R_RISCV_ALIGN: return R_RELAX_HINT; + case R_RISCV_TPREL_ADD: case R_RISCV_RELAX: return config->relax ? R_RELAX_HINT : R_NONE; default: @@ -283,11 +296,6 @@ RelExpr RISCV::getRelExpr(const RelType type, const Symbol &s, } } -// Extract bits V[Begin:End], where range is inclusive, and Begin must be < 63. -static uint32_t extractBits(uint64_t v, uint32_t begin, uint32_t end) { - return (v & ((1ULL << (begin + 1)) - 1)) >> end; -} - void RISCV::relocate(uint8_t *loc, const Relocation &rel, uint64_t val) const { const unsigned bits = config->wordsize * 8; @@ -404,7 +412,7 @@ void RISCV::relocate(uint8_t *loc, const Relocation &rel, uint64_t val) const { case R_RISCV_LO12_I: { uint64_t hi = (val + 0x800) >> 12; uint64_t lo = val - (hi << 12); - write32le(loc, (read32le(loc) & 0xFFFFF) | ((lo & 0xFFF) << 20)); + write32le(loc, setLO12_I(read32le(loc), lo & 0xfff)); return; } @@ -413,9 +421,7 @@ void RISCV::relocate(uint8_t *loc, const Relocation &rel, uint64_t val) const { case R_RISCV_LO12_S: { uint64_t hi = (val + 0x800) >> 12; uint64_t lo = val - (hi << 12); - uint32_t imm11_5 = extractBits(lo, 11, 5) << 25; - uint32_t imm4_0 = extractBits(lo, 4, 0) << 7; - write32le(loc, (read32le(loc) & 0x1FFF07F) | imm11_5 | imm4_0); + write32le(loc, setLO12_S(read32le(loc), lo)); return; } @@ -567,6 +573,35 @@ static void relaxCall(const InputSection &sec, size_t i, uint64_t loc, } } +// Relax local-exec TLS when hi20 is zero. +static void relaxTlsLe(const InputSection &sec, size_t i, uint64_t loc, + Relocation &r, uint32_t &remove) { + uint64_t val = r.sym->getVA(r.addend); + if (hi20(val) != 0) + return; + uint32_t insn = read32le(sec.rawData.data() + r.offset); + switch (r.type) { + case R_RISCV_TPREL_HI20: + case R_RISCV_TPREL_ADD: + // Remove lui rd, %tprel_hi(x) and add rd, rd, tp, %tprel_add(x). + sec.relaxAux->relocTypes[i] = R_RISCV_RELAX; + remove = 4; + break; + case R_RISCV_TPREL_LO12_I: + // addi rd, rd, %tprel_lo(x) => addi rd, tp, st_value(x) + sec.relaxAux->relocTypes[i] = R_RISCV_32; + insn = (insn & ~(31 << 15)) | (X_TP << 15); + sec.relaxAux->writes.push_back(setLO12_I(insn, val)); + break; + case R_RISCV_TPREL_LO12_S: + // sw rs, %tprel_lo(x)(rd) => sw rs, st_value(x)(rd) + sec.relaxAux->relocTypes[i] = R_RISCV_32; + insn = (insn & ~(31 << 15)) | (X_TP << 15); + sec.relaxAux->writes.push_back(setLO12_S(insn, val)); + break; + } +} + static bool relax(InputSection &sec) { const uint64_t secAddr = sec.getVA(); auto &aux = *sec.relaxAux; @@ -612,6 +647,14 @@ static bool relax(InputSection &sec) { sec.relocations[i + 1].type == R_RISCV_RELAX) relaxCall(sec, i, loc, r, remove); break; + case R_RISCV_TPREL_HI20: + case R_RISCV_TPREL_ADD: + case R_RISCV_TPREL_LO12_I: + case R_RISCV_TPREL_LO12_S: + if (i + 1 != sec.relocations.size() && + sec.relocations[i + 1].type == R_RISCV_RELAX) + relaxTlsLe(sec, i, loc, r, remove); + break; } // For all anchors whose offsets are <= r.offset, they are preceded by @@ -697,7 +740,7 @@ void elf::riscvFinalizeRelax(int passes) { for (size_t i = 0, e = rels.size(); i != e; ++i) { uint32_t remove = aux.relocDeltas[i] - delta; delta = aux.relocDeltas[i]; - if (remove == 0) + if (remove == 0 && aux.relocTypes[i] == R_RISCV_NONE) continue; // Copy from last location to the current relocated location. @@ -723,15 +766,24 @@ void elf::riscvFinalizeRelax(int passes) { } } } else if (RelType newType = aux.relocTypes[i]) { - const uint32_t insn = aux.writes[writesIdx++]; switch (newType) { + case R_RISCV_RELAX: + // Used by relaxTlsLe to indicate the relocation is ignored. + break; case R_RISCV_RVC_JUMP: skip = 2; - write16le(p, insn); + write16le(p, aux.writes[writesIdx++]); break; case R_RISCV_JAL: skip = 4; - write32le(p, insn); + write32le(p, aux.writes[writesIdx++]); + break; + case R_RISCV_32: + // Used by relaxTlsLe to write a uint32_t then suppress the handling + // in relocateAlloc. + skip = 4; + write32le(p, aux.writes[writesIdx++]); + aux.relocTypes[i] = R_RISCV_NONE; break; default: llvm_unreachable("unsupported type"); diff --git a/lld/ELF/Driver.cpp b/lld/ELF/Driver.cpp index 4c26cba1cb4f..dd17adc4dbea 100644 --- a/lld/ELF/Driver.cpp +++ b/lld/ELF/Driver.cpp @@ -197,6 +197,10 @@ std::vector<std::pair<MemoryBufferRef, uint64_t>> static getArchiveMembers( return v; } +static bool isBitcode(MemoryBufferRef mb) { + return identify_magic(mb.getBuffer()) == llvm::file_magic::bitcode; +} + // Opens a file and create a file object. Path has to be resolved already. void LinkerDriver::addFile(StringRef path, bool withLOption) { using namespace sys::fs; @@ -217,8 +221,12 @@ void LinkerDriver::addFile(StringRef path, bool withLOption) { return; case file_magic::archive: { if (inWholeArchive) { - for (const auto &p : getArchiveMembers(mbref)) - files.push_back(createObjectFile(p.first, path, p.second)); + for (const auto &p : getArchiveMembers(mbref)) { + if (isBitcode(p.first)) + files.push_back(make<BitcodeFile>(p.first, path, p.second, false)); + else + files.push_back(createObjFile(p.first, path)); + } return; } @@ -241,8 +249,10 @@ void LinkerDriver::addFile(StringRef path, bool withLOption) { InputFile::isInGroup = true; for (const std::pair<MemoryBufferRef, uint64_t> &p : members) { auto magic = identify_magic(p.first.getBuffer()); - if (magic == file_magic::bitcode || magic == file_magic::elf_relocatable) - files.push_back(createLazyFile(p.first, path, p.second)); + if (magic == file_magic::elf_relocatable) + files.push_back(createObjFile(p.first, path, true)); + else if (magic == file_magic::bitcode) + files.push_back(make<BitcodeFile>(p.first, path, p.second, true)); else warn(path + ": archive member '" + p.first.getBufferIdentifier() + "' is neither ET_REL nor LLVM bitcode"); @@ -267,11 +277,10 @@ void LinkerDriver::addFile(StringRef path, bool withLOption) { make<SharedFile>(mbref, withLOption ? path::filename(path) : path)); return; case file_magic::bitcode: + files.push_back(make<BitcodeFile>(mbref, "", 0, inLib)); + break; case file_magic::elf_relocatable: - if (inLib) - files.push_back(createLazyFile(mbref, "", 0)); - else - files.push_back(createObjectFile(mbref)); + files.push_back(createObjFile(mbref, "", inLib)); break; default: error(path + ": unknown file type"); @@ -1612,7 +1621,7 @@ void LinkerDriver::createFiles(opt::InputArgList &args) { break; case OPT_just_symbols: if (Optional<MemoryBufferRef> mb = readFile(arg->getValue())) { - files.push_back(createObjectFile(*mb)); + files.push_back(createObjFile(*mb)); files.back()->justSymbols = true; } break; diff --git a/lld/ELF/InputFiles.cpp b/lld/ELF/InputFiles.cpp index 0ea2a2c74b63..c0076a3722fe 100644 --- a/lld/ELF/InputFiles.cpp +++ b/lld/ELF/InputFiles.cpp @@ -739,7 +739,7 @@ static void updateSupportedARMFeatures(const ARMAttributeParser &attributes) { attributes.getAttributeValue(ARMBuildAttrs::CPU_arch); if (!attr) return; - auto arch = attr.getValue(); + auto arch = attr.value(); switch (arch) { case ARMBuildAttrs::Pre_v4: case ARMBuildAttrs::v4: @@ -1710,34 +1710,27 @@ void BinaryFile::parse() { data.size(), 0, nullptr}); } -InputFile *elf::createObjectFile(MemoryBufferRef mb, StringRef archiveName, - uint64_t offsetInArchive) { - if (isBitcode(mb)) - return make<BitcodeFile>(mb, archiveName, offsetInArchive, /*lazy=*/false); - +ELFFileBase *elf::createObjFile(MemoryBufferRef mb, StringRef archiveName, + bool lazy) { + ELFFileBase *f; switch (getELFKind(mb, archiveName)) { case ELF32LEKind: - return make<ObjFile<ELF32LE>>(mb, archiveName); + f = make<ObjFile<ELF32LE>>(mb, archiveName); + break; case ELF32BEKind: - return make<ObjFile<ELF32BE>>(mb, archiveName); + f = make<ObjFile<ELF32BE>>(mb, archiveName); + break; case ELF64LEKind: - return make<ObjFile<ELF64LE>>(mb, archiveName); + f = make<ObjFile<ELF64LE>>(mb, archiveName); + break; case ELF64BEKind: - return make<ObjFile<ELF64BE>>(mb, archiveName); + f = make<ObjFile<ELF64BE>>(mb, archiveName); + break; default: llvm_unreachable("getELFKind"); } -} - -InputFile *elf::createLazyFile(MemoryBufferRef mb, StringRef archiveName, - uint64_t offsetInArchive) { - if (isBitcode(mb)) - return make<BitcodeFile>(mb, archiveName, offsetInArchive, /*lazy=*/true); - - auto *file = - cast<ELFFileBase>(createObjectFile(mb, archiveName, offsetInArchive)); - file->lazy = true; - return file; + f->lazy = lazy; + return f; } template <class ELFT> void ObjFile<ELFT>::parseLazy() { @@ -1763,7 +1756,7 @@ template <class ELFT> void ObjFile<ELFT>::parseLazy() { } bool InputFile::shouldExtractForCommon(StringRef name) { - if (isBitcode(mb)) + if (isa<BitcodeFile>(this)) return isBitcodeNonCommonDef(mb, name, archiveName); return isNonCommonDef(mb, name, archiveName); diff --git a/lld/ELF/InputFiles.h b/lld/ELF/InputFiles.h index f89246eb645e..a24e664a7e16 100644 --- a/lld/ELF/InputFiles.h +++ b/lld/ELF/InputFiles.h @@ -371,14 +371,8 @@ public: void parse(); }; -InputFile *createObjectFile(MemoryBufferRef mb, StringRef archiveName = "", - uint64_t offsetInArchive = 0); -InputFile *createLazyFile(MemoryBufferRef mb, StringRef archiveName, - uint64_t offsetInArchive); - -inline bool isBitcode(MemoryBufferRef mb) { - return identify_magic(mb.getBuffer()) == llvm::file_magic::bitcode; -} +ELFFileBase *createObjFile(MemoryBufferRef mb, StringRef archiveName = "", + bool lazy = false); std::string replaceThinLTOSuffix(StringRef path); diff --git a/lld/ELF/LTO.cpp b/lld/ELF/LTO.cpp index 8c5001af3a91..b8019bd7d240 100644 --- a/lld/ELF/LTO.cpp +++ b/lld/ELF/LTO.cpp @@ -383,10 +383,10 @@ std::vector<InputFile *> BitcodeCompiler::compile() { std::vector<InputFile *> ret; for (unsigned i = 0; i != maxTasks; ++i) if (!buf[i].empty()) - ret.push_back(createObjectFile(MemoryBufferRef(buf[i], "lto.tmp"))); + ret.push_back(createObjFile(MemoryBufferRef(buf[i], "lto.tmp"))); for (std::unique_ptr<MemoryBuffer> &file : files) if (file) - ret.push_back(createObjectFile(*file)); + ret.push_back(createObjFile(*file)); return ret; } diff --git a/lld/ELF/SyntheticSections.cpp b/lld/ELF/SyntheticSections.cpp index b8a2ebeefce9..a0c5e6d04748 100644 --- a/lld/ELF/SyntheticSections.cpp +++ b/lld/ELF/SyntheticSections.cpp @@ -29,6 +29,7 @@ #include "lld/Common/DWARF.h" #include "lld/Common/Strings.h" #include "lld/Common/Version.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SetOperations.h" #include "llvm/ADT/StringExtras.h" #include "llvm/BinaryFormat/Dwarf.h" @@ -1703,7 +1704,7 @@ void RelocationBaseSection::computeRels() { parallelSort(relocs.begin(), nonRelative, [&](auto &a, auto &b) { return a.r_offset < b.r_offset; }); // Non-relative relocations are few, so don't bother with parallelSort. - std::sort(nonRelative, relocs.end(), [&](auto &a, auto &b) { + llvm::sort(nonRelative, relocs.end(), [&](auto &a, auto &b) { return std::tie(a.r_sym, a.r_offset) < std::tie(b.r_sym, b.r_offset); }); } @@ -2039,7 +2040,7 @@ template <class ELFT> bool RelrSection<ELFT>::updateAllocSize() { std::unique_ptr<uint64_t[]> offsets(new uint64_t[relocs.size()]); for (auto it : llvm::enumerate(relocs)) offsets[it.index()] = it.value().getOffset(); - std::sort(offsets.get(), offsets.get() + relocs.size()); + llvm::sort(offsets.get(), offsets.get() + relocs.size()); // For each leading relocation, find following ones that can be folded // as a bitmap and fold them. @@ -3855,7 +3856,8 @@ void InStruct::reset() { constexpr char kMemtagAndroidNoteName[] = "Android"; void MemtagAndroidNote::writeTo(uint8_t *buf) { - assert(sizeof(kMemtagAndroidNoteName) == 8); // ABI check for Android 11 & 12. + static_assert(sizeof(kMemtagAndroidNoteName) == 8, + "ABI check for Android 11 & 12."); assert((config->androidMemtagStack || config->androidMemtagHeap) && "Should only be synthesizing a note if heap || stack is enabled."); diff --git a/lld/ELF/Writer.cpp b/lld/ELF/Writer.cpp index 705cc7bf9766..2994e79cd1de 100644 --- a/lld/ELF/Writer.cpp +++ b/lld/ELF/Writer.cpp @@ -1973,8 +1973,8 @@ template <class ELFT> void Writer<ELFT>::finalizeSections() { continue; for (Symbol *sym : file->requiredSymbols) if (sym->isUndefined() && !sym->isWeak()) - diagnose(toString(file) + ": undefined reference to " + - toString(*sym) + " [--no-allow-shlib-undefined]"); + diagnose("undefined reference due to --no-allow-shlib-undefined: " + + toString(*sym) + "\n>>> referenced by " + toString(file)); } } diff --git a/lld/MachO/Arch/ARM.cpp b/lld/MachO/Arch/ARM.cpp index fd215ed99b59..424df414229f 100644 --- a/lld/MachO/Arch/ARM.cpp +++ b/lld/MachO/Arch/ARM.cpp @@ -38,35 +38,27 @@ struct ARM : TargetInfo { uint64_t entryAddr) const override; void relaxGotLoad(uint8_t *loc, uint8_t type) const override; - const RelocAttrs &getRelocAttrs(uint8_t type) const override; uint64_t getPageSize() const override { return 4 * 1024; } void handleDtraceReloc(const Symbol *sym, const Reloc &r, uint8_t *loc) const override; }; - } // namespace -const RelocAttrs &ARM::getRelocAttrs(uint8_t type) const { - static const std::array<RelocAttrs, 10> relocAttrsArray{{ +static constexpr std::array<RelocAttrs, 10> relocAttrsArray{{ #define B(x) RelocAttrBits::x - {"VANILLA", /* FIXME populate this */ B(_0)}, - {"PAIR", /* FIXME populate this */ B(_0)}, - {"SECTDIFF", /* FIXME populate this */ B(_0)}, - {"LOCAL_SECTDIFF", /* FIXME populate this */ B(_0)}, - {"PB_LA_PTR", /* FIXME populate this */ B(_0)}, - {"BR24", B(PCREL) | B(LOCAL) | B(EXTERN) | B(BRANCH) | B(BYTE4)}, - {"BR22", B(PCREL) | B(LOCAL) | B(EXTERN) | B(BRANCH) | B(BYTE4)}, - {"32BIT_BRANCH", /* FIXME populate this */ B(_0)}, - {"HALF", /* FIXME populate this */ B(_0)}, - {"HALF_SECTDIFF", /* FIXME populate this */ B(_0)}, + {"VANILLA", /* FIXME populate this */ B(_0)}, + {"PAIR", /* FIXME populate this */ B(_0)}, + {"SECTDIFF", /* FIXME populate this */ B(_0)}, + {"LOCAL_SECTDIFF", /* FIXME populate this */ B(_0)}, + {"PB_LA_PTR", /* FIXME populate this */ B(_0)}, + {"BR24", B(PCREL) | B(LOCAL) | B(EXTERN) | B(BRANCH) | B(BYTE4)}, + {"BR22", B(PCREL) | B(LOCAL) | B(EXTERN) | B(BRANCH) | B(BYTE4)}, + {"32BIT_BRANCH", /* FIXME populate this */ B(_0)}, + {"HALF", /* FIXME populate this */ B(_0)}, + {"HALF_SECTDIFF", /* FIXME populate this */ B(_0)}, #undef B - }}; - assert(type < relocAttrsArray.size() && "invalid relocation type"); - if (type >= relocAttrsArray.size()) - return invalidRelocAttrs; - return relocAttrsArray[type]; -} +}}; int64_t ARM::getEmbeddedAddend(MemoryBufferRef mb, uint64_t offset, relocation_info rel) const { @@ -167,6 +159,8 @@ ARM::ARM(uint32_t cpuSubtype) : TargetInfo(ILP32()) { stubSize = 0 /* FIXME */; stubHelperHeaderSize = 0 /* FIXME */; stubHelperEntrySize = 0 /* FIXME */; + + relocAttrs = {relocAttrsArray.data(), relocAttrsArray.size()}; } TargetInfo *macho::createARMTargetInfo(uint32_t cpuSubtype) { diff --git a/lld/MachO/Arch/ARM64.cpp b/lld/MachO/Arch/ARM64.cpp index 5901a9e09b35..46e935aa1fd1 100644 --- a/lld/MachO/Arch/ARM64.cpp +++ b/lld/MachO/Arch/ARM64.cpp @@ -34,7 +34,6 @@ struct ARM64 : ARM64Common { void writeStubHelperHeader(uint8_t *buf) const override; void writeStubHelperEntry(uint8_t *buf, const Symbol &, uint64_t entryAddr) const override; - const RelocAttrs &getRelocAttrs(uint8_t type) const override; void populateThunk(InputSection *thunk, Symbol *funcSym) override; void applyOptimizationHints(uint8_t *, const ConcatInputSection *, ArrayRef<uint64_t>) const override; @@ -48,31 +47,24 @@ struct ARM64 : ARM64Common { // absolute version of this relocation. The semantics of the absolute relocation // are weird -- it results in the value of the GOT slot being written, instead // of the address. Let's not support it unless we find a real-world use case. - -const RelocAttrs &ARM64::getRelocAttrs(uint8_t type) const { - static const std::array<RelocAttrs, 11> relocAttrsArray{{ +static constexpr std::array<RelocAttrs, 11> relocAttrsArray{{ #define B(x) RelocAttrBits::x - {"UNSIGNED", - B(UNSIGNED) | B(ABSOLUTE) | B(EXTERN) | B(LOCAL) | B(BYTE4) | B(BYTE8)}, - {"SUBTRACTOR", B(SUBTRAHEND) | B(EXTERN) | B(BYTE4) | B(BYTE8)}, - {"BRANCH26", B(PCREL) | B(EXTERN) | B(BRANCH) | B(BYTE4)}, - {"PAGE21", B(PCREL) | B(EXTERN) | B(BYTE4)}, - {"PAGEOFF12", B(ABSOLUTE) | B(EXTERN) | B(BYTE4)}, - {"GOT_LOAD_PAGE21", B(PCREL) | B(EXTERN) | B(GOT) | B(BYTE4)}, - {"GOT_LOAD_PAGEOFF12", - B(ABSOLUTE) | B(EXTERN) | B(GOT) | B(LOAD) | B(BYTE4)}, - {"POINTER_TO_GOT", B(PCREL) | B(EXTERN) | B(GOT) | B(POINTER) | B(BYTE4)}, - {"TLVP_LOAD_PAGE21", B(PCREL) | B(EXTERN) | B(TLV) | B(BYTE4)}, - {"TLVP_LOAD_PAGEOFF12", - B(ABSOLUTE) | B(EXTERN) | B(TLV) | B(LOAD) | B(BYTE4)}, - {"ADDEND", B(ADDEND)}, + {"UNSIGNED", + B(UNSIGNED) | B(ABSOLUTE) | B(EXTERN) | B(LOCAL) | B(BYTE4) | B(BYTE8)}, + {"SUBTRACTOR", B(SUBTRAHEND) | B(EXTERN) | B(BYTE4) | B(BYTE8)}, + {"BRANCH26", B(PCREL) | B(EXTERN) | B(BRANCH) | B(BYTE4)}, + {"PAGE21", B(PCREL) | B(EXTERN) | B(BYTE4)}, + {"PAGEOFF12", B(ABSOLUTE) | B(EXTERN) | B(BYTE4)}, + {"GOT_LOAD_PAGE21", B(PCREL) | B(EXTERN) | B(GOT) | B(BYTE4)}, + {"GOT_LOAD_PAGEOFF12", + B(ABSOLUTE) | B(EXTERN) | B(GOT) | B(LOAD) | B(BYTE4)}, + {"POINTER_TO_GOT", B(PCREL) | B(EXTERN) | B(GOT) | B(POINTER) | B(BYTE4)}, + {"TLVP_LOAD_PAGE21", B(PCREL) | B(EXTERN) | B(TLV) | B(BYTE4)}, + {"TLVP_LOAD_PAGEOFF12", + B(ABSOLUTE) | B(EXTERN) | B(TLV) | B(LOAD) | B(BYTE4)}, + {"ADDEND", B(ADDEND)}, #undef B - }}; - assert(type < relocAttrsArray.size() && "invalid relocation type"); - if (type >= relocAttrsArray.size()) - return invalidRelocAttrs; - return relocAttrsArray[type]; -} +}}; static constexpr uint32_t stubCode[] = { 0x90000010, // 00: adrp x16, __la_symbol_ptr@page @@ -150,6 +142,8 @@ ARM64::ARM64() : ARM64Common(LP64()) { stubHelperHeaderSize = sizeof(stubHelperHeaderCode); stubHelperEntrySize = sizeof(stubHelperEntryCode); + + relocAttrs = {relocAttrsArray.data(), relocAttrsArray.size()}; } namespace { diff --git a/lld/MachO/Arch/ARM64_32.cpp b/lld/MachO/Arch/ARM64_32.cpp index 5be411e40342..4830c2629761 100644 --- a/lld/MachO/Arch/ARM64_32.cpp +++ b/lld/MachO/Arch/ARM64_32.cpp @@ -33,36 +33,29 @@ struct ARM64_32 : ARM64Common { void writeStubHelperHeader(uint8_t *buf) const override; void writeStubHelperEntry(uint8_t *buf, const Symbol &, uint64_t entryAddr) const override; - const RelocAttrs &getRelocAttrs(uint8_t type) const override; }; } // namespace // These are very similar to ARM64's relocation attributes, except that we don't // have the BYTE8 flag set. -const RelocAttrs &ARM64_32::getRelocAttrs(uint8_t type) const { - static const std::array<RelocAttrs, 11> relocAttrsArray{{ +static constexpr std::array<RelocAttrs, 11> relocAttrsArray{{ #define B(x) RelocAttrBits::x - {"UNSIGNED", B(UNSIGNED) | B(ABSOLUTE) | B(EXTERN) | B(LOCAL) | B(BYTE4)}, - {"SUBTRACTOR", B(SUBTRAHEND) | B(EXTERN) | B(BYTE4)}, - {"BRANCH26", B(PCREL) | B(EXTERN) | B(BRANCH) | B(BYTE4)}, - {"PAGE21", B(PCREL) | B(EXTERN) | B(BYTE4)}, - {"PAGEOFF12", B(ABSOLUTE) | B(EXTERN) | B(BYTE4)}, - {"GOT_LOAD_PAGE21", B(PCREL) | B(EXTERN) | B(GOT) | B(BYTE4)}, - {"GOT_LOAD_PAGEOFF12", - B(ABSOLUTE) | B(EXTERN) | B(GOT) | B(LOAD) | B(BYTE4)}, - {"POINTER_TO_GOT", B(PCREL) | B(EXTERN) | B(GOT) | B(POINTER) | B(BYTE4)}, - {"TLVP_LOAD_PAGE21", B(PCREL) | B(EXTERN) | B(TLV) | B(BYTE4)}, - {"TLVP_LOAD_PAGEOFF12", - B(ABSOLUTE) | B(EXTERN) | B(TLV) | B(LOAD) | B(BYTE4)}, - {"ADDEND", B(ADDEND)}, + {"UNSIGNED", B(UNSIGNED) | B(ABSOLUTE) | B(EXTERN) | B(LOCAL) | B(BYTE4)}, + {"SUBTRACTOR", B(SUBTRAHEND) | B(EXTERN) | B(BYTE4)}, + {"BRANCH26", B(PCREL) | B(EXTERN) | B(BRANCH) | B(BYTE4)}, + {"PAGE21", B(PCREL) | B(EXTERN) | B(BYTE4)}, + {"PAGEOFF12", B(ABSOLUTE) | B(EXTERN) | B(BYTE4)}, + {"GOT_LOAD_PAGE21", B(PCREL) | B(EXTERN) | B(GOT) | B(BYTE4)}, + {"GOT_LOAD_PAGEOFF12", + B(ABSOLUTE) | B(EXTERN) | B(GOT) | B(LOAD) | B(BYTE4)}, + {"POINTER_TO_GOT", B(PCREL) | B(EXTERN) | B(GOT) | B(POINTER) | B(BYTE4)}, + {"TLVP_LOAD_PAGE21", B(PCREL) | B(EXTERN) | B(TLV) | B(BYTE4)}, + {"TLVP_LOAD_PAGEOFF12", + B(ABSOLUTE) | B(EXTERN) | B(TLV) | B(LOAD) | B(BYTE4)}, + {"ADDEND", B(ADDEND)}, #undef B - }}; - assert(type < relocAttrsArray.size() && "invalid relocation type"); - if (type >= relocAttrsArray.size()) - return invalidRelocAttrs; - return relocAttrsArray[type]; -} +}}; // The stub code is fairly similar to ARM64's, except that we load pointers into // 32-bit 'w' registers, instead of the 64-bit 'x' ones. @@ -112,6 +105,8 @@ ARM64_32::ARM64_32() : ARM64Common(ILP32()) { stubSize = sizeof(stubCode); stubHelperHeaderSize = sizeof(stubHelperHeaderCode); stubHelperEntrySize = sizeof(stubHelperEntryCode); + + relocAttrs = {relocAttrsArray.data(), relocAttrsArray.size()}; } TargetInfo *macho::createARM64_32TargetInfo() { diff --git a/lld/MachO/Arch/X86_64.cpp b/lld/MachO/Arch/X86_64.cpp index d2efa5bb3451..b1c46cbab9d4 100644 --- a/lld/MachO/Arch/X86_64.cpp +++ b/lld/MachO/Arch/X86_64.cpp @@ -37,36 +37,28 @@ struct X86_64 : TargetInfo { uint64_t entryAddr) const override; void relaxGotLoad(uint8_t *loc, uint8_t type) const override; - const RelocAttrs &getRelocAttrs(uint8_t type) const override; uint64_t getPageSize() const override { return 4 * 1024; } void handleDtraceReloc(const Symbol *sym, const Reloc &r, uint8_t *loc) const override; }; - } // namespace -const RelocAttrs &X86_64::getRelocAttrs(uint8_t type) const { - static const std::array<RelocAttrs, 10> relocAttrsArray{{ +static constexpr std::array<RelocAttrs, 10> relocAttrsArray{{ #define B(x) RelocAttrBits::x - {"UNSIGNED", - B(UNSIGNED) | B(ABSOLUTE) | B(EXTERN) | B(LOCAL) | B(BYTE4) | B(BYTE8)}, - {"SIGNED", B(PCREL) | B(EXTERN) | B(LOCAL) | B(BYTE4)}, - {"BRANCH", B(PCREL) | B(EXTERN) | B(BRANCH) | B(BYTE4)}, - {"GOT_LOAD", B(PCREL) | B(EXTERN) | B(GOT) | B(LOAD) | B(BYTE4)}, - {"GOT", B(PCREL) | B(EXTERN) | B(GOT) | B(POINTER) | B(BYTE4)}, - {"SUBTRACTOR", B(SUBTRAHEND) | B(EXTERN) | B(BYTE4) | B(BYTE8)}, - {"SIGNED_1", B(PCREL) | B(EXTERN) | B(LOCAL) | B(BYTE4)}, - {"SIGNED_2", B(PCREL) | B(EXTERN) | B(LOCAL) | B(BYTE4)}, - {"SIGNED_4", B(PCREL) | B(EXTERN) | B(LOCAL) | B(BYTE4)}, - {"TLV", B(PCREL) | B(EXTERN) | B(TLV) | B(LOAD) | B(BYTE4)}, + {"UNSIGNED", + B(UNSIGNED) | B(ABSOLUTE) | B(EXTERN) | B(LOCAL) | B(BYTE4) | B(BYTE8)}, + {"SIGNED", B(PCREL) | B(EXTERN) | B(LOCAL) | B(BYTE4)}, + {"BRANCH", B(PCREL) | B(EXTERN) | B(BRANCH) | B(BYTE4)}, + {"GOT_LOAD", B(PCREL) | B(EXTERN) | B(GOT) | B(LOAD) | B(BYTE4)}, + {"GOT", B(PCREL) | B(EXTERN) | B(GOT) | B(POINTER) | B(BYTE4)}, + {"SUBTRACTOR", B(SUBTRAHEND) | B(EXTERN) | B(BYTE4) | B(BYTE8)}, + {"SIGNED_1", B(PCREL) | B(EXTERN) | B(LOCAL) | B(BYTE4)}, + {"SIGNED_2", B(PCREL) | B(EXTERN) | B(LOCAL) | B(BYTE4)}, + {"SIGNED_4", B(PCREL) | B(EXTERN) | B(LOCAL) | B(BYTE4)}, + {"TLV", B(PCREL) | B(EXTERN) | B(TLV) | B(LOAD) | B(BYTE4)}, #undef B - }}; - assert(type < relocAttrsArray.size() && "invalid relocation type"); - if (type >= relocAttrsArray.size()) - return invalidRelocAttrs; - return relocAttrsArray[type]; -} +}}; static int pcrelOffset(uint8_t type) { switch (type) { @@ -196,6 +188,8 @@ X86_64::X86_64() : TargetInfo(LP64()) { stubSize = sizeof(stub); stubHelperHeaderSize = sizeof(stubHelperHeader); stubHelperEntrySize = sizeof(stubHelperEntry); + + relocAttrs = {relocAttrsArray.data(), relocAttrsArray.size()}; } TargetInfo *macho::createX86_64TargetInfo() { diff --git a/lld/MachO/Config.h b/lld/MachO/Config.h index ccf71b6535ea..c7e4b4f96782 100644 --- a/lld/MachO/Config.h +++ b/lld/MachO/Config.h @@ -109,7 +109,7 @@ struct Configuration { bool archMultiple = false; bool exportDynamic = false; bool forceLoadObjC = false; - bool forceLoadSwift = false; + bool forceLoadSwift = false; // Only applies to LC_LINKER_OPTIONs. bool staticLink = false; bool implicitDylibs = false; bool isPic = false; @@ -188,6 +188,8 @@ struct Configuration { SymbolPatterns unexportedSymbols; SymbolPatterns whyLive; + std::vector<std::pair<llvm::StringRef, llvm::StringRef>> aliasedSymbols; + SymtabPresence localSymbolsPresence = SymtabPresence::All; SymbolPatterns localSymbolPatterns; @@ -202,13 +204,6 @@ struct Configuration { } }; -// Whether to force-load an archive. -enum class ForceLoad { - Default, // Apply -all_load or -ObjC behaviors if those flags are enabled - Yes, // Always load the archive, regardless of other flags - No, // Never load the archive, regardless of other flags -}; - extern std::unique_ptr<Configuration> config; } // namespace macho diff --git a/lld/MachO/Driver.cpp b/lld/MachO/Driver.cpp index abfe381f41e0..454708fad4ef 100644 --- a/lld/MachO/Driver.cpp +++ b/lld/MachO/Driver.cpp @@ -247,9 +247,24 @@ static llvm::CachePruningPolicy getLTOCachePolicy(InputArgList &args) { return CHECK(parseCachePruningPolicy(ltoPolicy), "invalid LTO cache policy"); } -static DenseMap<StringRef, ArchiveFile *> loadedArchives; +// What caused a given library to be loaded. Only relevant for archives. +// Note that this does not tell us *how* we should load the library, i.e. +// whether we should do it lazily or eagerly (AKA force loading). The "how" is +// decided within addFile(). +enum class LoadType { + CommandLine, // Library was passed as a regular CLI argument + CommandLineForce, // Library was passed via `-force_load` + LCLinkerOption, // Library was passed via LC_LINKER_OPTIONS +}; + +struct ArchiveFileInfo { + ArchiveFile *file; + bool isCommandLineLoad; +}; + +static DenseMap<StringRef, ArchiveFileInfo> loadedArchives; -static InputFile *addFile(StringRef path, ForceLoad forceLoadArchive, +static InputFile *addFile(StringRef path, LoadType loadType, bool isLazy = false, bool isExplicit = true, bool isBundleLoader = false) { Optional<MemoryBufferRef> buffer = readFile(path); @@ -261,6 +276,7 @@ static InputFile *addFile(StringRef path, ForceLoad forceLoadArchive, file_magic magic = identify_magic(mbref.getBuffer()); switch (magic) { case file_magic::archive: { + bool isCommandLineLoad = loadType != LoadType::LCLinkerOption; // Avoid loading archives twice. If the archives are being force-loaded, // loading them twice would create duplicate symbol errors. In the // non-force-loading case, this is just a minor performance optimization. @@ -268,23 +284,45 @@ static InputFile *addFile(StringRef path, ForceLoad forceLoadArchive, // loadArchiveMember() call below may recursively call addFile() and // invalidate this reference. auto entry = loadedArchives.find(path); - if (entry != loadedArchives.end()) - return entry->second; - std::unique_ptr<object::Archive> archive = CHECK( - object::Archive::create(mbref), path + ": failed to parse archive"); + ArchiveFile *file; + if (entry == loadedArchives.end()) { + // No cached archive, we need to create a new one + std::unique_ptr<object::Archive> archive = CHECK( + object::Archive::create(mbref), path + ": failed to parse archive"); - if (!archive->isEmpty() && !archive->hasSymbolTable()) - error(path + ": archive has no index; run ranlib to add one"); + if (!archive->isEmpty() && !archive->hasSymbolTable()) + error(path + ": archive has no index; run ranlib to add one"); + file = make<ArchiveFile>(std::move(archive)); + } else { + file = entry->second.file; + // Command-line loads take precedence. If file is previously loaded via + // command line, or is loaded via LC_LINKER_OPTION and being loaded via + // LC_LINKER_OPTION again, using the cached archive is enough. + if (entry->second.isCommandLineLoad || !isCommandLineLoad) + return file; + } - auto *file = make<ArchiveFile>(std::move(archive)); - if ((forceLoadArchive == ForceLoad::Default && config->allLoad) || - forceLoadArchive == ForceLoad::Yes) { + bool isLCLinkerForceLoad = loadType == LoadType::LCLinkerOption && + config->forceLoadSwift && + path::filename(path).startswith("libswift"); + if ((isCommandLineLoad && config->allLoad) || + loadType == LoadType::CommandLineForce || isLCLinkerForceLoad) { if (Optional<MemoryBufferRef> buffer = readFile(path)) { Error e = Error::success(); for (const object::Archive::Child &c : file->getArchive().children(e)) { - StringRef reason = - forceLoadArchive == ForceLoad::Yes ? "-force_load" : "-all_load"; + StringRef reason; + switch (loadType) { + case LoadType::LCLinkerOption: + reason = "LC_LINKER_OPTION"; + break; + case LoadType::CommandLineForce: + reason = "-force_load"; + break; + case LoadType::CommandLine: + reason = "-all_load"; + break; + } if (Error e = file->fetch(c, reason)) error(toString(file) + ": " + reason + " failed to load archive member: " + toString(std::move(e))); @@ -293,8 +331,7 @@ static InputFile *addFile(StringRef path, ForceLoad forceLoadArchive, error(toString(file) + ": Archive::children failed: " + toString(std::move(e))); } - } else if (forceLoadArchive == ForceLoad::Default && - config->forceLoadObjC) { + } else if (isCommandLineLoad && config->forceLoadObjC) { for (const object::Archive::Symbol &sym : file->getArchive().symbols()) if (sym.getName().startswith(objc::klass)) file->fetch(sym); @@ -318,7 +355,8 @@ static InputFile *addFile(StringRef path, ForceLoad forceLoadArchive, } file->addLazySymbols(); - newFile = loadedArchives[path] = file; + loadedArchives[path] = ArchiveFileInfo{file, isCommandLineLoad}; + newFile = file; break; } case file_magic::macho_object: @@ -368,11 +406,10 @@ static InputFile *addFile(StringRef path, ForceLoad forceLoadArchive, } static void addLibrary(StringRef name, bool isNeeded, bool isWeak, - bool isReexport, bool isExplicit, - ForceLoad forceLoadArchive) { + bool isReexport, bool isExplicit, LoadType loadType) { if (Optional<StringRef> path = findLibrary(name)) { if (auto *dylibFile = dyn_cast_or_null<DylibFile>( - addFile(*path, forceLoadArchive, /*isLazy=*/false, isExplicit))) { + addFile(*path, loadType, /*isLazy=*/false, isExplicit))) { if (isNeeded) dylibFile->forceNeeded = true; if (isWeak) @@ -389,14 +426,13 @@ static void addLibrary(StringRef name, bool isNeeded, bool isWeak, static DenseSet<StringRef> loadedObjectFrameworks; static void addFramework(StringRef name, bool isNeeded, bool isWeak, - bool isReexport, bool isExplicit, - ForceLoad forceLoadArchive) { + bool isReexport, bool isExplicit, LoadType loadType) { if (Optional<StringRef> path = findFramework(name)) { if (loadedObjectFrameworks.contains(*path)) return; InputFile *file = - addFile(*path, forceLoadArchive, /*isLazy=*/false, isExplicit); + addFile(*path, loadType, /*isLazy=*/false, isExplicit, false); if (auto *dylibFile = dyn_cast_or_null<DylibFile>(file)) { if (isNeeded) dylibFile->forceNeeded = true; @@ -436,15 +472,14 @@ void macho::parseLCLinkerOption(InputFile *f, unsigned argc, StringRef data) { unsigned i = 0; StringRef arg = argv[i]; if (arg.consume_front("-l")) { - ForceLoad forceLoadArchive = - config->forceLoadSwift && arg.startswith("swift") ? ForceLoad::Yes - : ForceLoad::No; addLibrary(arg, /*isNeeded=*/false, /*isWeak=*/false, - /*isReexport=*/false, /*isExplicit=*/false, forceLoadArchive); + /*isReexport=*/false, /*isExplicit=*/false, + LoadType::LCLinkerOption); } else if (arg == "-framework") { StringRef name = argv[++i]; addFramework(name, /*isNeeded=*/false, /*isWeak=*/false, - /*isReexport=*/false, /*isExplicit=*/false, ForceLoad::No); + /*isReexport=*/false, /*isExplicit=*/false, + LoadType::LCLinkerOption); } else { error(arg + " is not allowed in LC_LINKER_OPTION"); } @@ -456,7 +491,7 @@ static void addFileList(StringRef path, bool isLazy) { return; MemoryBufferRef mbref = *buffer; for (StringRef path : args::getLines(mbref)) - addFile(rerootPath(path), ForceLoad::Default, isLazy); + addFile(rerootPath(path), LoadType::CommandLine, isLazy); } // We expect sub-library names of the form "libfoo", which will match a dylib @@ -468,8 +503,7 @@ static bool markReexport(StringRef searchName, ArrayRef<StringRef> extensions) { if (auto *dylibFile = dyn_cast<DylibFile>(file)) { StringRef filename = path::filename(dylibFile->getName()); if (filename.consume_front(searchName) && - (filename.empty() || - find(extensions, filename) != extensions.end())) { + (filename.empty() || llvm::is_contained(extensions, filename))) { dylibFile->reexport = true; return true; } @@ -552,7 +586,7 @@ static void initializeSectionRenameMap() { section_names::objcCatList, section_names::objcNonLazyCatList, section_names::objcProtoList, - section_names::objcImageInfo}; + section_names::objCImageInfo}; for (StringRef s : v) config->sectionRenameMap[{segment_names::data, s}] = { segment_names::dataConst, s}; @@ -976,30 +1010,30 @@ static void createFiles(const InputArgList &args) { switch (opt.getID()) { case OPT_INPUT: - addFile(rerootPath(arg->getValue()), ForceLoad::Default, isLazy); + addFile(rerootPath(arg->getValue()), LoadType::CommandLine, isLazy); break; case OPT_needed_library: if (auto *dylibFile = dyn_cast_or_null<DylibFile>( - addFile(rerootPath(arg->getValue()), ForceLoad::Default))) + addFile(rerootPath(arg->getValue()), LoadType::CommandLine))) dylibFile->forceNeeded = true; break; case OPT_reexport_library: if (auto *dylibFile = dyn_cast_or_null<DylibFile>( - addFile(rerootPath(arg->getValue()), ForceLoad::Default))) { + addFile(rerootPath(arg->getValue()), LoadType::CommandLine))) { config->hasReexports = true; dylibFile->reexport = true; } break; case OPT_weak_library: if (auto *dylibFile = dyn_cast_or_null<DylibFile>( - addFile(rerootPath(arg->getValue()), ForceLoad::Default))) + addFile(rerootPath(arg->getValue()), LoadType::CommandLine))) dylibFile->forceWeakImport = true; break; case OPT_filelist: addFileList(arg->getValue(), isLazy); break; case OPT_force_load: - addFile(rerootPath(arg->getValue()), ForceLoad::Yes); + addFile(rerootPath(arg->getValue()), LoadType::CommandLineForce); break; case OPT_l: case OPT_needed_l: @@ -1007,7 +1041,7 @@ static void createFiles(const InputArgList &args) { case OPT_weak_l: addLibrary(arg->getValue(), opt.getID() == OPT_needed_l, opt.getID() == OPT_weak_l, opt.getID() == OPT_reexport_l, - /*isExplicit=*/true, ForceLoad::Default); + /*isExplicit=*/true, LoadType::CommandLine); break; case OPT_framework: case OPT_needed_framework: @@ -1016,7 +1050,7 @@ static void createFiles(const InputArgList &args) { addFramework(arg->getValue(), opt.getID() == OPT_needed_framework, opt.getID() == OPT_weak_framework, opt.getID() == OPT_reexport_framework, /*isExplicit=*/true, - ForceLoad::Default); + LoadType::CommandLine); break; case OPT_start_lib: if (isLazy) @@ -1068,6 +1102,8 @@ static void gatherInputSections() { } } } + if (!file->objCImageInfo.empty()) + in.objCImageInfo->addFile(file); } assert(inputOrder <= UnspecifiedInputOrder); } @@ -1264,9 +1300,8 @@ bool macho::link(ArrayRef<const char *> argsArr, llvm::raw_ostream &stdoutOS, if (const Arg *arg = args.getLastArg(OPT_bundle_loader)) { if (config->outputType != MH_BUNDLE) error("-bundle_loader can only be used with MachO bundle output"); - addFile(arg->getValue(), ForceLoad::Default, /*isLazy=*/false, - /*isExplicit=*/false, - /*isBundleLoader=*/true); + addFile(arg->getValue(), LoadType::CommandLine, /*isLazy=*/false, + /*isExplicit=*/false, /*isBundleLoader=*/true); } if (const Arg *arg = args.getLastArg(OPT_umbrella)) { if (config->outputType != MH_DYLIB) @@ -1306,6 +1341,11 @@ bool macho::link(ArrayRef<const char *> argsArr, llvm::raw_ostream &stdoutOS, OPT_call_graph_profile_sort, OPT_no_call_graph_profile_sort, true); config->printSymbolOrder = args.getLastArgValue(OPT_print_symbol_order); + for (const Arg *arg : args.filtered(OPT_alias)) { + config->aliasedSymbols.push_back( + std::make_pair(arg->getValue(0), arg->getValue(1))); + } + // FIXME: Add a commandline flag for this too. config->zeroModTime = getenv("ZERO_AR_DATE"); @@ -1558,6 +1598,18 @@ bool macho::link(ArrayRef<const char *> argsArr, llvm::raw_ostream &stdoutOS, createSyntheticSections(); createSyntheticSymbols(); + for (const auto &pair : config->aliasedSymbols) { + if (const auto &sym = symtab->find(pair.first)) { + if (const auto &defined = dyn_cast<Defined>(sym)) { + symtab->aliasDefined(defined, pair.second); + continue; + } + } + + warn("undefined base symbol '" + pair.first + "' for alias '" + + pair.second + "'\n"); + } + if (config->hasExplicitExports) { parallelForEach(symtab->getSymbols(), [](Symbol *sym) { if (auto *defined = dyn_cast<Defined>(sym)) { @@ -1616,7 +1668,9 @@ bool macho::link(ArrayRef<const char *> argsArr, llvm::raw_ostream &stdoutOS, if (config->icfLevel != ICFLevel::none) { if (config->icfLevel == ICFLevel::safe) markAddrSigSymbols(); - foldIdenticalSections(); + foldIdenticalSections(/*onlyCfStrings=*/false); + } else if (config->dedupLiterals) { + foldIdenticalSections(/*onlyCfStrings=*/true); } // Write to an output file. diff --git a/lld/MachO/ICF.cpp b/lld/MachO/ICF.cpp index ad029142681f..d06fbc6db840 100644 --- a/lld/MachO/ICF.cpp +++ b/lld/MachO/ICF.cpp @@ -384,23 +384,18 @@ void macho::markAddrSigSymbols() { continue; assert(addrSigSection->subsections.size() == 1); - Subsection *subSection = &addrSigSection->subsections[0]; - ArrayRef<unsigned char> &contents = subSection->isec->data; - - const uint8_t *pData = contents.begin(); - while (pData != contents.end()) { - unsigned size; - const char *err; - uint32_t symIndex = decodeULEB128(pData, &size, contents.end(), &err); - if (err) - fatal(toString(file) + ": could not decode addrsig section: " + err); - markSymAsAddrSig(obj->symbols[symIndex]); - pData += size; + const InputSection *isec = addrSigSection->subsections[0].isec; + + for (const Reloc &r : isec->relocs) { + if (auto *sym = r.referent.dyn_cast<Symbol *>()) + markSymAsAddrSig(sym); + else + error(toString(isec) + ": unexpected section relocation"); } } } -void macho::foldIdenticalSections() { +void macho::foldIdenticalSections(bool onlyCfStrings) { TimeTraceScope timeScope("Fold Identical Code Sections"); // The ICF equivalence-class segregation algorithm relies on pre-computed // hashes of InputSection::data for the ConcatOutputSection::inputs and all @@ -420,10 +415,12 @@ void macho::foldIdenticalSections() { uint64_t icfUniqueID = inputSections.size(); for (ConcatInputSection *isec : inputSections) { // FIXME: consider non-code __text sections as hashable? - bool isHashable = (isCodeSection(isec) || isCfStringSection(isec) || - isClassRefsSection(isec)) && - !isec->keepUnique && !isec->shouldOmitFromOutput() && - sectionType(isec->getFlags()) == MachO::S_REGULAR; + bool isHashable = + (!onlyCfStrings || isCfStringSection(isec)) && + (isCodeSection(isec) || isCfStringSection(isec) || + isClassRefsSection(isec) || isGccExceptTabSection(isec)) && + !isec->keepUnique && !isec->shouldOmitFromOutput() && + sectionType(isec->getFlags()) == MachO::S_REGULAR; if (isHashable) { hashable.push_back(isec); for (Defined *d : isec->symbols) diff --git a/lld/MachO/ICF.h b/lld/MachO/ICF.h index a287692d7ffa..b7e695d81d34 100644 --- a/lld/MachO/ICF.h +++ b/lld/MachO/ICF.h @@ -19,7 +19,7 @@ class Symbol; void markAddrSigSymbols(); void markSymAsAddrSig(Symbol *s); -void foldIdenticalSections(); +void foldIdenticalSections(bool onlyCfStrings); } // namespace macho } // namespace lld diff --git a/lld/MachO/InputFiles.cpp b/lld/MachO/InputFiles.cpp index fda6900edabe..e3bf553e5334 100644 --- a/lld/MachO/InputFiles.cpp +++ b/lld/MachO/InputFiles.cpp @@ -263,11 +263,15 @@ static Optional<size_t> getRecordSize(StringRef segname, StringRef name) { if (segname == segment_names::ld) return target->wordSize == 8 ? 32 : 20; } - if (config->icfLevel == ICFLevel::none) + if (!config->dedupLiterals) return {}; if (name == section_names::cfString && segname == segment_names::data) return target->wordSize == 8 ? 32 : 16; + + if (config->icfLevel == ICFLevel::none) + return {}; + if (name == section_names::objcClassRefs && segname == segment_names::data) return target->wordSize; return {}; @@ -359,6 +363,9 @@ void ObjFile::parseSections(ArrayRef<SectionHeader> sectionHeaders) { // have the same name without causing duplicate symbol errors. To avoid // spurious duplicate symbol errors, we do not parse these sections. // TODO: Evaluate whether the bitcode metadata is needed. + } else if (name == section_names::objCImageInfo && + segname == segment_names::data) { + objCImageInfo = data; } else { if (name == section_names::addrSig) addrSigSection = sections.back(); @@ -556,7 +563,7 @@ void ObjFile::parseOptimizationHints(ArrayRef<uint8_t> data) { if (section == sections.end()) return; ++subsection; - if (subsection == (*section)->subsections.end()) { + while (subsection == (*section)->subsections.end()) { ++section; if (section == sections.end()) return; @@ -578,6 +585,7 @@ void ObjFile::parseOptimizationHints(ArrayRef<uint8_t> data) { if (section == sections.end()) break; updateAddr(); + assert(hintStart->offset0 >= subsectionBase); } } @@ -899,7 +907,6 @@ void ObjFile::parseSymbols(ArrayRef<typename LP::section> sectionHeaders, if (sym.n_type & N_STAB) continue; - StringRef name = strtab + sym.n_strx; if ((sym.n_type & N_TYPE) == N_SECT) { Subsections &subsections = sections[sym.n_sect - 1]->subsections; // parseSections() may have chosen not to parse this section. @@ -909,7 +916,7 @@ void ObjFile::parseSymbols(ArrayRef<typename LP::section> sectionHeaders, } else if (isUndef(sym)) { undefineds.push_back(i); } else { - symbols[i] = parseNonSectionSymbol(sym, name); + symbols[i] = parseNonSectionSymbol(sym, StringRef(strtab + sym.n_strx)); } } @@ -1186,14 +1193,27 @@ ArrayRef<data_in_code_entry> ObjFile::getDataInCode() const { void ObjFile::registerCompactUnwind(Section &compactUnwindSection) { for (const Subsection &subsection : compactUnwindSection.subsections) { ConcatInputSection *isec = cast<ConcatInputSection>(subsection.isec); - // Hack!! Since each CUE contains a different function address, if ICF - // operated naively and compared the entire contents of each CUE, entries - // with identical unwind info but belonging to different functions would - // never be considered equivalent. To work around this problem, we slice - // away the function address here. (Note that we do not adjust the offsets - // of the corresponding relocations.) We rely on `relocateCompactUnwind()` - // to correctly handle these truncated input sections. - isec->data = isec->data.slice(target->wordSize); + // Hack!! Each compact unwind entry (CUE) has its UNSIGNED relocations embed + // their addends in its data. Thus if ICF operated naively and compared the + // entire contents of each CUE, entries with identical unwind info but e.g. + // belonging to different functions would never be considered equivalent. To + // work around this problem, we remove some parts of the data containing the + // embedded addends. In particular, we remove the function address and LSDA + // pointers. Since these locations are at the start and end of the entry, + // we can do this using a simple, efficient slice rather than performing a + // copy. We are not losing any information here because the embedded + // addends have already been parsed in the corresponding Reloc structs. + // + // Removing these pointers would not be safe if they were pointers to + // absolute symbols. In that case, there would be no corresponding + // relocation. However, (AFAIK) MC cannot emit references to absolute + // symbols for either the function address or the LSDA. However, it *can* do + // so for the personality pointer, so we are not slicing that field away. + // + // Note that we do not adjust the offsets of the corresponding relocations; + // instead, we rely on `relocateCompactUnwind()` to correctly handle these + // truncated input sections. + isec->data = isec->data.slice(target->wordSize, 8 + target->wordSize); uint32_t encoding = read32le(isec->data.data() + sizeof(uint32_t)); // llvm-mc omits CU entries for functions that need DWARF encoding, but // `ld -r` doesn't. We can ignore them because we will re-synthesize these @@ -1240,11 +1260,23 @@ void ObjFile::registerCompactUnwind(Section &compactUnwindSection) { continue; } d->unwindEntry = isec; - // Since we've sliced away the functionAddress, we should remove the - // corresponding relocation too. Given that clang emits relocations in - // reverse order of address, this relocation should be at the end of the - // vector for most of our input object files, so this is typically an O(1) - // operation. + // Now that the symbol points to the unwind entry, we can remove the reloc + // that points from the unwind entry back to the symbol. + // + // First, the symbol keeps the unwind entry alive (and not vice versa), so + // this keeps dead-stripping simple. + // + // Moreover, it reduces the work that ICF needs to do to figure out if + // functions with unwind info are foldable. + // + // However, this does make it possible for ICF to fold CUEs that point to + // distinct functions (if the CUEs are otherwise identical). + // UnwindInfoSection takes care of this by re-duplicating the CUEs so that + // each one can hold a distinct functionAddress value. + // + // Given that clang emits relocations in reverse order of address, this + // relocation should be at the end of the vector for most of our input + // object files, so this erase() is typically an O(1) operation. it = isec->relocs.erase(it); } } @@ -1500,6 +1532,14 @@ void ObjFile::registerEhFrames(Section &ehFrameSection) { Defined *funcSym; if (funcAddrRelocIt != isec->relocs.end()) { funcSym = targetSymFromCanonicalSubtractor(isec, funcAddrRelocIt); + // Canonicalize the symbol. If there are multiple symbols at the same + // address, we want both `registerEhFrame` and `registerCompactUnwind` + // to register the unwind entry under same symbol. + // This is not particularly efficient, but we should run into this case + // infrequently (only when handling the output of `ld -r`). + if (funcSym->isec) + funcSym = findSymbolAtOffset(cast<ConcatInputSection>(funcSym->isec), + funcSym->value); } else { funcSym = findSymbolAtAddress(sections, funcAddr); ehRelocator.makePcRel(funcAddrOff, funcSym, target->p2WordSize); diff --git a/lld/MachO/InputFiles.h b/lld/MachO/InputFiles.h index efddc1c46782..5deb05272a6b 100644 --- a/lld/MachO/InputFiles.h +++ b/lld/MachO/InputFiles.h @@ -120,6 +120,7 @@ public: std::vector<Symbol *> symbols; std::vector<Section *> sections; + ArrayRef<uint8_t> objCImageInfo; // If not empty, this stores the name of the archive containing this file. // We use this string for creating error messages. diff --git a/lld/MachO/InputSection.cpp b/lld/MachO/InputSection.cpp index df312525df61..76b11d9da4f8 100644 --- a/lld/MachO/InputSection.cpp +++ b/lld/MachO/InputSection.cpp @@ -345,6 +345,11 @@ bool macho::isEhFrameSection(const InputSection *isec) { isec->getSegName() == segment_names::text; } +bool macho::isGccExceptTabSection(const InputSection *isec) { + return isec->getName() == section_names::gccExceptTab && + isec->getSegName() == segment_names::text; +} + std::string lld::toString(const InputSection *isec) { return (toString(isec->getFile()) + ":(" + isec->getName() + ")").str(); } diff --git a/lld/MachO/InputSection.h b/lld/MachO/InputSection.h index e8710c25f534..afe76c56b536 100644 --- a/lld/MachO/InputSection.h +++ b/lld/MachO/InputSection.h @@ -281,6 +281,7 @@ bool isCodeSection(const InputSection *); bool isCfStringSection(const InputSection *); bool isClassRefsSection(const InputSection *); bool isEhFrameSection(const InputSection *); +bool isGccExceptTabSection(const InputSection *); extern std::vector<ConcatInputSection *> inputSections; @@ -320,7 +321,7 @@ constexpr const char objcCatList[] = "__objc_catlist"; constexpr const char objcClassList[] = "__objc_classlist"; constexpr const char objcClassRefs[] = "__objc_classrefs"; constexpr const char objcConst[] = "__objc_const"; -constexpr const char objcImageInfo[] = "__objc_imageinfo"; +constexpr const char objCImageInfo[] = "__objc_imageinfo"; constexpr const char objcNonLazyCatList[] = "__objc_nlcatlist"; constexpr const char objcNonLazyClassList[] = "__objc_nlclslist"; constexpr const char objcProtoList[] = "__objc_protolist"; diff --git a/lld/MachO/LTO.cpp b/lld/MachO/LTO.cpp index e87c4c413106..0b76216d24b5 100644 --- a/lld/MachO/LTO.cpp +++ b/lld/MachO/LTO.cpp @@ -138,8 +138,22 @@ std::vector<ObjFile *> BitcodeCompiler::compile() { saveBuffer(buf[i], config->outputFile + Twine(i) + ".lto.o"); } - if (!config->ltoObjPath.empty()) - fs::create_directories(config->ltoObjPath); + // In ThinLTO mode, Clang passes a temporary directory in -object_path_lto, + // while the argument is a single file in FullLTO mode. + bool objPathIsDir = true; + if (!config->ltoObjPath.empty()) { + if (std::error_code ec = fs::create_directories(config->ltoObjPath)) + fatal("cannot create LTO object path " + config->ltoObjPath + ": " + + ec.message()); + + if (!fs::is_directory(config->ltoObjPath)) { + objPathIsDir = false; + unsigned objCount = + count_if(buf, [](const SmallString<0> &b) { return !b.empty(); }); + if (objCount > 1) + fatal("-object_path_lto must specify a directory when using ThinLTO"); + } + } std::vector<ObjFile *> ret; for (unsigned i = 0; i != maxTasks; ++i) { @@ -149,9 +163,10 @@ std::vector<ObjFile *> BitcodeCompiler::compile() { uint32_t modTime = 0; if (!config->ltoObjPath.empty()) { filePath = config->ltoObjPath; - path::append(filePath, Twine(i) + "." + - getArchitectureName(config->arch()) + - ".lto.o"); + if (objPathIsDir) + path::append(filePath, Twine(i) + "." + + getArchitectureName(config->arch()) + + ".lto.o"); saveBuffer(buf[i], filePath); modTime = getModTime(filePath); } diff --git a/lld/MachO/Options.td b/lld/MachO/Options.td index 9b57f8a0bd49..b3d74a83f582 100644 --- a/lld/MachO/Options.td +++ b/lld/MachO/Options.td @@ -505,7 +505,6 @@ def reexported_symbols_list : Separate<["-"], "reexported_symbols_list">, def alias : MultiArg<["-"], "alias", 2>, MetaVarName<"<symbol_name> <alternate_name>">, HelpText<"Create a symbol alias with default global visibility">, - Flags<[HelpHidden]>, Group<grp_resolve>; def alias_list : Separate<["-"], "alias_list">, MetaVarName<"<file>">, diff --git a/lld/MachO/SectionPriorities.cpp b/lld/MachO/SectionPriorities.cpp index ac4878343ac0..fbc7796c3f8a 100644 --- a/lld/MachO/SectionPriorities.cpp +++ b/lld/MachO/SectionPriorities.cpp @@ -370,7 +370,7 @@ macho::PriorityBuilder::buildInputSectionPriorities() { if (!symbolPriority) return; size_t &priority = sectionPriorities[sym->isec]; - priority = std::max(priority, symbolPriority.getValue()); + priority = std::max(priority, symbolPriority.value()); }; // TODO: Make sure this handles weak symbols correctly. diff --git a/lld/MachO/SymbolTable.cpp b/lld/MachO/SymbolTable.cpp index 7bda1d13069f..3667a7137291 100644 --- a/lld/MachO/SymbolTable.cpp +++ b/lld/MachO/SymbolTable.cpp @@ -117,6 +117,13 @@ Defined *SymbolTable::addDefined(StringRef name, InputFile *file, return defined; } +Defined *SymbolTable::aliasDefined(Defined *src, StringRef target) { + return addDefined(target, src->getFile(), src->isec, src->value, src->size, + src->isWeakDef(), src->privateExtern, src->thumb, + src->referencedDynamically, src->noDeadStrip, + src->weakDefCanBeHidden); +} + Symbol *SymbolTable::addUndefined(StringRef name, InputFile *file, bool isWeakRef) { Symbol *s; diff --git a/lld/MachO/SymbolTable.h b/lld/MachO/SymbolTable.h index 0ecfa6dcd093..1b090105e0ca 100644 --- a/lld/MachO/SymbolTable.h +++ b/lld/MachO/SymbolTable.h @@ -43,6 +43,8 @@ public: bool isReferencedDynamically, bool noDeadStrip, bool isWeakDefCanBeHidden); + Defined *aliasDefined(Defined *src, StringRef target); + Symbol *addUndefined(StringRef name, InputFile *, bool isWeakRef); Symbol *addCommon(StringRef name, InputFile *, uint64_t size, uint32_t align, diff --git a/lld/MachO/SyntheticSections.cpp b/lld/MachO/SyntheticSections.cpp index 0a57de319994..8e7ca520336c 100644 --- a/lld/MachO/SyntheticSections.cpp +++ b/lld/MachO/SyntheticSections.cpp @@ -57,7 +57,7 @@ static void sha256(const uint8_t *data, size_t len, uint8_t *output) { #else ArrayRef<uint8_t> block(data, len); std::array<uint8_t, 32> hash = SHA256::hash(block); - assert(hash.size() == CodeSignatureSection::hashSize); + static_assert(hash.size() == CodeSignatureSection::hashSize, ""); memcpy(output, hash.data(), hash.size()); #endif } @@ -164,62 +164,108 @@ RebaseSection::RebaseSection() : LinkEditSection(segment_names::linkEdit, section_names::rebase) {} namespace { -struct Rebase { - OutputSegment *segment = nullptr; - uint64_t offset = 0; - uint64_t consecutiveCount = 0; +struct RebaseState { + uint64_t sequenceLength; + uint64_t skipLength; }; } // namespace -// Rebase opcodes allow us to describe a contiguous sequence of rebase location -// using a single DO_REBASE opcode. To take advantage of it, we delay emitting -// `DO_REBASE` until we have reached the end of a contiguous sequence. -static void encodeDoRebase(Rebase &rebase, raw_svector_ostream &os) { - assert(rebase.consecutiveCount != 0); - if (rebase.consecutiveCount <= REBASE_IMMEDIATE_MASK) { - os << static_cast<uint8_t>(REBASE_OPCODE_DO_REBASE_IMM_TIMES | - rebase.consecutiveCount); +static void emitIncrement(uint64_t incr, raw_svector_ostream &os) { + assert(incr != 0); + + if ((incr >> target->p2WordSize) <= REBASE_IMMEDIATE_MASK && + (incr % target->wordSize) == 0) { + os << static_cast<uint8_t>(REBASE_OPCODE_ADD_ADDR_IMM_SCALED | + (incr >> target->p2WordSize)); } else { - os << static_cast<uint8_t>(REBASE_OPCODE_DO_REBASE_ULEB_TIMES); - encodeULEB128(rebase.consecutiveCount, os); + os << static_cast<uint8_t>(REBASE_OPCODE_ADD_ADDR_ULEB); + encodeULEB128(incr, os); } - rebase.consecutiveCount = 0; } -static void encodeRebase(const OutputSection *osec, uint64_t outSecOff, - Rebase &lastRebase, raw_svector_ostream &os) { - OutputSegment *seg = osec->parent; - uint64_t offset = osec->getSegmentOffset() + outSecOff; - if (lastRebase.segment != seg || lastRebase.offset != offset) { - if (lastRebase.consecutiveCount != 0) - encodeDoRebase(lastRebase, os); - - if (lastRebase.segment != seg) { - os << static_cast<uint8_t>(REBASE_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB | - seg->index); - encodeULEB128(offset, os); - lastRebase.segment = seg; - lastRebase.offset = offset; +static void flushRebase(const RebaseState &state, raw_svector_ostream &os) { + assert(state.sequenceLength > 0); + + if (state.skipLength == target->wordSize) { + if (state.sequenceLength <= REBASE_IMMEDIATE_MASK) { + os << static_cast<uint8_t>(REBASE_OPCODE_DO_REBASE_IMM_TIMES | + state.sequenceLength); } else { - assert(lastRebase.offset != offset); - uint64_t delta = offset - lastRebase.offset; - // For unknown reasons, ld64 checks if the scaled offset is strictly less - // than REBASE_IMMEDIATE_MASK instead of allowing equality. We match this - // behavior as a precaution. - if ((delta % target->wordSize == 0) && - (delta / target->wordSize < REBASE_IMMEDIATE_MASK)) { - os << static_cast<uint8_t>(REBASE_OPCODE_ADD_ADDR_IMM_SCALED | - (delta / target->wordSize)); - } else { - os << static_cast<uint8_t>(REBASE_OPCODE_ADD_ADDR_ULEB); - encodeULEB128(delta, os); - } - lastRebase.offset = offset; + os << static_cast<uint8_t>(REBASE_OPCODE_DO_REBASE_ULEB_TIMES); + encodeULEB128(state.sequenceLength, os); + } + } else if (state.sequenceLength == 1) { + os << static_cast<uint8_t>(REBASE_OPCODE_DO_REBASE_ADD_ADDR_ULEB); + encodeULEB128(state.skipLength - target->wordSize, os); + } else { + os << static_cast<uint8_t>( + REBASE_OPCODE_DO_REBASE_ULEB_TIMES_SKIPPING_ULEB); + encodeULEB128(state.sequenceLength, os); + encodeULEB128(state.skipLength - target->wordSize, os); + } +} + +// Rebases are communicated to dyld using a bytecode, whose opcodes cause the +// memory location at a specific address to be rebased and/or the address to be +// incremented. +// +// Opcode REBASE_OPCODE_DO_REBASE_ULEB_TIMES_SKIPPING_ULEB is the most generic +// one, encoding a series of evenly spaced addresses. This algorithm works by +// splitting up the sorted list of addresses into such chunks. If the locations +// are consecutive or the sequence consists of a single location, flushRebase +// will use a smaller, more specialized encoding. +static void encodeRebases(const OutputSegment *seg, + MutableArrayRef<Location> locations, + raw_svector_ostream &os) { + // dyld operates on segments. Translate section offsets into segment offsets. + for (Location &loc : locations) + loc.offset = + loc.isec->parent->getSegmentOffset() + loc.isec->getOffset(loc.offset); + // The algorithm assumes that locations are unique. + Location *end = + llvm::unique(locations, [](const Location &a, const Location &b) { + return a.offset == b.offset; + }); + size_t count = end - locations.begin(); + + os << static_cast<uint8_t>(REBASE_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB | + seg->index); + assert(!locations.empty()); + uint64_t offset = locations[0].offset; + encodeULEB128(offset, os); + + RebaseState state{1, target->wordSize}; + + for (size_t i = 1; i < count; ++i) { + offset = locations[i].offset; + + uint64_t skip = offset - locations[i - 1].offset; + assert(skip != 0 && "duplicate locations should have been weeded out"); + + if (skip == state.skipLength) { + ++state.sequenceLength; + } else if (state.sequenceLength == 1) { + ++state.sequenceLength; + state.skipLength = skip; + } else if (skip < state.skipLength) { + // The address is lower than what the rebase pointer would be if the last + // location would be part of a sequence. We start a new sequence from the + // previous location. + --state.sequenceLength; + flushRebase(state, os); + + state.sequenceLength = 2; + state.skipLength = skip; + } else { + // The address is at some positive offset from the rebase pointer. We + // start a new sequence which begins with the current location. + flushRebase(state, os); + emitIncrement(skip - state.skipLength, os); + state.sequenceLength = 1; + state.skipLength = target->wordSize; } } - ++lastRebase.consecutiveCount; - // DO_REBASE causes dyld to both perform the binding and increment the offset - lastRebase.offset += target->wordSize; + flushRebase(state, os); } void RebaseSection::finalizeContents() { @@ -227,19 +273,20 @@ void RebaseSection::finalizeContents() { return; raw_svector_ostream os{contents}; - Rebase lastRebase; - os << static_cast<uint8_t>(REBASE_OPCODE_SET_TYPE_IMM | REBASE_TYPE_POINTER); llvm::sort(locations, [](const Location &a, const Location &b) { return a.isec->getVA(a.offset) < b.isec->getVA(b.offset); }); - for (const Location &loc : locations) - encodeRebase(loc.isec->parent, loc.isec->getOffset(loc.offset), lastRebase, - os); - if (lastRebase.consecutiveCount != 0) - encodeDoRebase(lastRebase, os); + for (size_t i = 0, count = locations.size(); i < count;) { + const OutputSegment *seg = locations[i].isec->parent->parent; + size_t j = i + 1; + while (j < count && locations[j].isec->parent->parent == seg) + ++j; + encodeRebases(seg, {locations.data() + i, locations.data() + j}, os); + i = j; + } os << static_cast<uint8_t>(REBASE_OPCODE_DONE); } @@ -1574,6 +1621,86 @@ void WordLiteralSection::writeTo(uint8_t *buf) const { memcpy(buf + p.second * 4, &p.first, 4); } +ObjCImageInfoSection::ObjCImageInfoSection() + : SyntheticSection(segment_names::data, section_names::objCImageInfo) {} + +ObjCImageInfoSection::ImageInfo +ObjCImageInfoSection::parseImageInfo(const InputFile *file) { + ImageInfo info; + ArrayRef<uint8_t> data = file->objCImageInfo; + // The image info struct has the following layout: + // struct { + // uint32_t version; + // uint32_t flags; + // }; + if (data.size() < 8) { + warn(toString(file) + ": invalid __objc_imageinfo size"); + return info; + } + + auto *buf = reinterpret_cast<const uint32_t *>(data.data()); + if (read32le(buf) != 0) { + warn(toString(file) + ": invalid __objc_imageinfo version"); + return info; + } + + uint32_t flags = read32le(buf + 1); + info.swiftVersion = (flags >> 8) & 0xff; + info.hasCategoryClassProperties = flags & 0x40; + return info; +} + +static std::string swiftVersionString(uint8_t version) { + switch (version) { + case 1: + return "1.0"; + case 2: + return "1.1"; + case 3: + return "2.0"; + case 4: + return "3.0"; + case 5: + return "4.0"; + default: + return ("0x" + Twine::utohexstr(version)).str(); + } +} + +// Validate each object file's __objc_imageinfo and use them to generate the +// image info for the output binary. Only two pieces of info are relevant: +// 1. The Swift version (should be identical across inputs) +// 2. `bool hasCategoryClassProperties` (true only if true for all inputs) +void ObjCImageInfoSection::finalizeContents() { + assert(files.size() != 0); // should have already been checked via isNeeded() + + info.hasCategoryClassProperties = true; + const InputFile *firstFile; + for (auto file : files) { + ImageInfo inputInfo = parseImageInfo(file); + info.hasCategoryClassProperties &= inputInfo.hasCategoryClassProperties; + + // swiftVersion 0 means no Swift is present, so no version checking required + if (inputInfo.swiftVersion == 0) + continue; + + if (info.swiftVersion != 0 && info.swiftVersion != inputInfo.swiftVersion) { + error("Swift version mismatch: " + toString(firstFile) + " has version " + + swiftVersionString(info.swiftVersion) + " but " + toString(file) + + " has version " + swiftVersionString(inputInfo.swiftVersion)); + } else { + info.swiftVersion = inputInfo.swiftVersion; + firstFile = file; + } + } +} + +void ObjCImageInfoSection::writeTo(uint8_t *buf) const { + uint32_t flags = info.hasCategoryClassProperties ? 0x40 : 0x0; + flags |= info.swiftVersion << 8; + write32le(buf + 4, flags); +} + void macho::createSyntheticSymbols() { auto addHeaderSymbol = [](const char *name) { symtab->addSynthetic(name, in.header->isec, /*value=*/0, diff --git a/lld/MachO/SyntheticSections.h b/lld/MachO/SyntheticSections.h index 4f7d5288c9dc..afdd46d8a7de 100644 --- a/lld/MachO/SyntheticSections.h +++ b/lld/MachO/SyntheticSections.h @@ -19,6 +19,7 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/Hashing.h" +#include "llvm/ADT/Optional.h" #include "llvm/ADT/SetVector.h" #include "llvm/MC/StringTableBuilder.h" #include "llvm/Support/MathExtras.h" @@ -600,6 +601,27 @@ private: std::unordered_map<uint32_t, uint64_t> literal4Map; }; +class ObjCImageInfoSection final : public SyntheticSection { +public: + ObjCImageInfoSection(); + bool isNeeded() const override { return !files.empty(); } + uint64_t getSize() const override { return 8; } + void addFile(const InputFile *file) { + assert(!file->objCImageInfo.empty()); + files.push_back(file); + } + void finalizeContents(); + void writeTo(uint8_t *buf) const override; + +private: + struct ImageInfo { + uint8_t swiftVersion = 0; + bool hasCategoryClassProperties = false; + } info; + static ImageInfo parseImageInfo(const InputFile *); + std::vector<const InputFile *> files; // files with image info +}; + struct InStruct { const uint8_t *bufferStart = nullptr; MachHeaderSection *header = nullptr; @@ -616,6 +638,7 @@ struct InStruct { StubsSection *stubs = nullptr; StubHelperSection *stubHelper = nullptr; UnwindInfoSection *unwindInfo = nullptr; + ObjCImageInfoSection *objCImageInfo = nullptr; ConcatInputSection *imageLoaderCache = nullptr; }; diff --git a/lld/MachO/Target.h b/lld/MachO/Target.h index 597502275dee..ef54dbb80408 100644 --- a/lld/MachO/Target.h +++ b/lld/MachO/Target.h @@ -65,14 +65,19 @@ public: // on a level of address indirection. virtual void relaxGotLoad(uint8_t *loc, uint8_t type) const = 0; - virtual const RelocAttrs &getRelocAttrs(uint8_t type) const = 0; - virtual uint64_t getPageSize() const = 0; virtual void populateThunk(InputSection *thunk, Symbol *funcSym) { llvm_unreachable("target does not use thunks"); } + const RelocAttrs &getRelocAttrs(uint8_t type) const { + assert(type < relocAttrs.size() && "invalid relocation type"); + if (type >= relocAttrs.size()) + return invalidRelocAttrs; + return relocAttrs[type]; + } + bool hasAttr(uint8_t type, RelocAttrBits bit) const { return getRelocAttrs(type).hasAttr(bit); } @@ -111,6 +116,8 @@ public: uint8_t subtractorRelocType; uint8_t unsignedRelocType; + llvm::ArrayRef<RelocAttrs> relocAttrs; + // We contrive this value as sufficiently far from any valid address that it // will always be out-of-range for any architecture. UINT64_MAX is not a // good choice because it is (a) only 1 away from wrapping to 0, and (b) the diff --git a/lld/MachO/UnwindInfoSection.cpp b/lld/MachO/UnwindInfoSection.cpp index 8c3425a17459..322057947a3d 100644 --- a/lld/MachO/UnwindInfoSection.cpp +++ b/lld/MachO/UnwindInfoSection.cpp @@ -211,7 +211,7 @@ void UnwindInfoSection::addSymbol(const Defined *d) { // we use that as the key here. auto p = symbols.insert({{d->isec, d->value}, d}); // If we have multiple symbols at the same address, only one of them can have - // an associated CUE. + // an associated unwind entry. if (!p.second && d->unwindEntry) { assert(!p.first->second->unwindEntry); p.first->second = d; diff --git a/lld/MachO/Writer.cpp b/lld/MachO/Writer.cpp index 9395e1a068a3..7fad9f5564ce 100644 --- a/lld/MachO/Writer.cpp +++ b/lld/MachO/Writer.cpp @@ -1164,6 +1164,10 @@ template <class LP> void Writer::run() { if (in.stubHelper->isNeeded()) in.stubHelper->setup(); + + if (in.objCImageInfo->isNeeded()) + in.objCImageInfo->finalizeContents(); + // At this point, we should know exactly which output sections are needed, // courtesy of scanSymbols() and scanRelocations(). createOutputSections<LP>(); @@ -1210,6 +1214,7 @@ void macho::createSyntheticSections() { in.stubs = make<StubsSection>(); in.stubHelper = make<StubHelperSection>(); in.unwindInfo = makeUnwindInfoSection(); + in.objCImageInfo = make<ObjCImageInfoSection>(); // This section contains space for just a single word, and will be used by // dyld to cache an address to the image loader it uses. diff --git a/lldb/include/lldb/Core/EmulateInstruction.h b/lldb/include/lldb/Core/EmulateInstruction.h index f50fee095a8b..e5421e5e91d1 100644 --- a/lldb/include/lldb/Core/EmulateInstruction.h +++ b/lldb/include/lldb/Core/EmulateInstruction.h @@ -179,7 +179,7 @@ public: eInfoTypeISAAndImmediateSigned, eInfoTypeISA, eInfoTypeNoArgs - } InfoType; + }; struct Context { ContextType type = eContextInvalid; diff --git a/lldb/include/lldb/DataFormatters/TypeCategory.h b/lldb/include/lldb/DataFormatters/TypeCategory.h index 2c9305901837..16255f9488bd 100644 --- a/lldb/include/lldb/DataFormatters/TypeCategory.h +++ b/lldb/include/lldb/DataFormatters/TypeCategory.h @@ -331,7 +331,7 @@ private: std::vector<lldb::LanguageType> m_languages; - uint32_t m_enabled_position; + uint32_t m_enabled_position = 0; void Enable(bool value, uint32_t position); diff --git a/lldb/include/lldb/DataFormatters/TypeSynthetic.h b/lldb/include/lldb/DataFormatters/TypeSynthetic.h index 3f58297a529b..890a6eb4f448 100644 --- a/lldb/include/lldb/DataFormatters/TypeSynthetic.h +++ b/lldb/include/lldb/DataFormatters/TypeSynthetic.h @@ -266,7 +266,7 @@ public: uint32_t &GetRevision() { return m_my_revision; } protected: - uint32_t m_my_revision; + uint32_t m_my_revision = 0; Flags m_flags; private: diff --git a/lldb/include/lldb/Expression/Materializer.h b/lldb/include/lldb/Expression/Materializer.h index 25cf22a8b5b0..aae94f86a71e 100644 --- a/lldb/include/lldb/Expression/Materializer.h +++ b/lldb/include/lldb/Expression/Materializer.h @@ -78,6 +78,28 @@ public: AddPersistentVariable(lldb::ExpressionVariableSP &persistent_variable_sp, PersistentVariableDelegate *delegate, Status &err); uint32_t AddVariable(lldb::VariableSP &variable_sp, Status &err); + + /// Create entity from supplied ValueObject and count it as a member + /// of the materialized struct. + /// + /// Behaviour is undefined if 'valobj_provider' is empty. + /// + /// \param[in] name Name of variable to materialize + /// + /// \param[in] valobj_provider When materializing values multiple + /// times, this callback gets used to fetch a fresh + /// ValueObject corresponding to the supplied frame. + /// This is mainly used for conditional breakpoints + /// that re-apply an expression whatever the frame + /// happens to be when the breakpoint got hit. + /// + /// \param[out] err Error status that gets set on error. + /// + /// \returns Offset in bytes of the member we just added to the + /// materialized struct. + uint32_t AddValueObject(ConstString name, + ValueObjectProviderTy valobj_provider, Status &err); + uint32_t AddResultVariable(const CompilerType &type, bool is_lvalue, bool keep_in_memory, PersistentVariableDelegate *delegate, Status &err); diff --git a/lldb/include/lldb/Expression/UserExpression.h b/lldb/include/lldb/Expression/UserExpression.h index 3874a60e06f0..2d62fa37a24c 100644 --- a/lldb/include/lldb/Expression/UserExpression.h +++ b/lldb/include/lldb/Expression/UserExpression.h @@ -280,6 +280,23 @@ protected: static lldb::addr_t GetObjectPointer(lldb::StackFrameSP frame_sp, ConstString &object_name, Status &err); + /// Return ValueObject for a given variable name in the current stack frame + /// + /// \param[in] frame Current stack frame. When passed a 'nullptr', this + /// function returns an empty ValueObjectSP. + /// + /// \param[in] object_name Name of the variable in the current stack frame + /// for which we want the ValueObjectSP. + /// + /// \param[out] err Status object which will get set on error. + /// + /// \returns On success returns a ValueObjectSP corresponding to the variable + /// with 'object_name' in the current 'frame'. Otherwise, returns + /// 'nullptr' (and sets the error status parameter 'err'). + static lldb::ValueObjectSP + GetObjectPointerValueObject(lldb::StackFrameSP frame, + ConstString const &object_name, Status &err); + /// Populate m_in_cplusplus_method and m_in_objectivec_method based on the /// environment. diff --git a/lldb/include/lldb/Interpreter/CommandObject.h b/lldb/include/lldb/Interpreter/CommandObject.h index 0fc1c61bdb92..ad5884e207a1 100644 --- a/lldb/include/lldb/Interpreter/CommandObject.h +++ b/lldb/include/lldb/Interpreter/CommandObject.h @@ -77,17 +77,18 @@ public: explicit operator bool() const { return (help_callback != nullptr); } }; - struct ArgumentTableEntry // Entries in the main argument information table - { + /// Entries in the main argument information table. + struct ArgumentTableEntry { lldb::CommandArgumentType arg_type; const char *arg_name; CommandCompletions::CommonCompletionTypes completion_type; + OptionEnumValues enum_values; ArgumentHelpCallback help_function; const char *help_text; }; - struct CommandArgumentData // Used to build individual command argument lists - { + /// Used to build individual command argument lists. + struct CommandArgumentData { lldb::CommandArgumentType arg_type; ArgumentRepetitionType arg_repetition; /// This arg might be associated only with some particular option set(s). By @@ -199,8 +200,6 @@ public: virtual Options *GetOptions(); - static const ArgumentTableEntry *GetArgumentTable(); - static lldb::CommandArgumentType LookupArgumentName(llvm::StringRef arg_name); static const ArgumentTableEntry * diff --git a/lldb/include/lldb/Interpreter/CommandOptionArgumentTable.h b/lldb/include/lldb/Interpreter/CommandOptionArgumentTable.h new file mode 100644 index 000000000000..c8ec3b941831 --- /dev/null +++ b/lldb/include/lldb/Interpreter/CommandOptionArgumentTable.h @@ -0,0 +1,334 @@ +//===-- CommandOptionArgumentTable.h ----------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLDB_INTERPRETER_COMMANDOPTIONARGUMENTTABLE_H +#define LLDB_INTERPRETER_COMMANDOPTIONARGUMENTTABLE_H + +#include "lldb/Interpreter/CommandObject.h" + +namespace lldb_private { + +static constexpr OptionEnumValueElement g_corefile_save_style[] = { + {lldb::eSaveCoreFull, "full", "Create a core file with all memory saved"}, + {lldb::eSaveCoreDirtyOnly, "modified-memory", + "Create a corefile with only modified memory saved"}, + {lldb::eSaveCoreStackOnly, "stack", + "Create a corefile with only stack memory saved"}, +}; + +static constexpr OptionEnumValueElement g_description_verbosity_type[] = { + { + eLanguageRuntimeDescriptionDisplayVerbosityCompact, + "compact", + "Only show the description string", + }, + { + eLanguageRuntimeDescriptionDisplayVerbosityFull, + "full", + "Show the full output, including persistent variable's name and type", + }, +}; + +static constexpr OptionEnumValueElement g_sort_option_enumeration[] = { + { + eSortOrderNone, + "none", + "No sorting, use the original symbol table order.", + }, + { + eSortOrderByAddress, + "address", + "Sort output by symbol address.", + }, + { + eSortOrderByName, + "name", + "Sort output by symbol name.", + }, +}; + +// Note that the negation in the argument name causes a slightly confusing +// mapping of the enum values. +static constexpr OptionEnumValueElement g_dependents_enumeration[] = { + { + eLoadDependentsDefault, + "default", + "Only load dependents when the target is an executable.", + }, + { + eLoadDependentsNo, + "true", + "Don't load dependents, even if the target is an executable.", + }, + { + eLoadDependentsYes, + "false", + "Load dependents, even if the target is not an executable.", + }, +}; + +// FIXME: "script-type" needs to have its contents determined dynamically, so +// somebody can add a new scripting language to lldb and have it pickable here +// without having to change this enumeration by hand and rebuild lldb proper. +static constexpr OptionEnumValueElement g_script_option_enumeration[] = { + { + lldb::eScriptLanguageNone, + "command", + "Commands are in the lldb command interpreter language", + }, + { + lldb::eScriptLanguagePython, + "python", + "Commands are in the Python language.", + }, + { + lldb::eScriptLanguageLua, + "lua", + "Commands are in the Lua language.", + }, + { + lldb::eScriptLanguageNone, + "default", + "Commands are in the default scripting language.", + }, +}; + +static constexpr OptionEnumValueElement g_log_handler_type[] = { + { + eLogHandlerDefault, + "default", + "Use the default (stream) log handler", + }, + { + eLogHandlerStream, + "stream", + "Write log messages to the debugger output stream or to a file if one " + "is specified. A buffer size (in bytes) can be specified with -b. If " + "no buffer size is specified the output is unbuffered.", + }, + { + eLogHandlerCircular, + "circular", + "Write log messages to a fixed size circular buffer. A buffer size " + "(number of messages) must be specified with -b.", + }, + { + eLogHandlerSystem, + "os", + "Write log messages to the operating system log.", + }, +}; + +static constexpr OptionEnumValueElement g_reproducer_provider_type[] = { + { + eReproducerProviderCommands, + "commands", + "Command Interpreter Commands", + }, + { + eReproducerProviderFiles, + "files", + "Files", + }, + { + eReproducerProviderSymbolFiles, + "symbol-files", + "Symbol Files", + }, + { + eReproducerProviderGDB, + "gdb", + "GDB Remote Packets", + }, + { + eReproducerProviderProcessInfo, + "processes", + "Process Info", + }, + { + eReproducerProviderVersion, + "version", + "Version", + }, + { + eReproducerProviderWorkingDirectory, + "cwd", + "Working Directory", + }, + { + eReproducerProviderHomeDirectory, + "home", + "Home Directory", + }, + { + eReproducerProviderNone, + "none", + "None", + }, +}; + +static constexpr OptionEnumValueElement g_reproducer_signaltype[] = { + { + eReproducerCrashSigill, + "SIGILL", + "Illegal instruction", + }, + { + eReproducerCrashSigsegv, + "SIGSEGV", + "Segmentation fault", + }, +}; + +static constexpr OptionEnumValueElement g_script_synchro_type[] = { + { + eScriptedCommandSynchronicitySynchronous, + "synchronous", + "Run synchronous", + }, + { + eScriptedCommandSynchronicityAsynchronous, + "asynchronous", + "Run asynchronous", + }, + { + eScriptedCommandSynchronicityCurrentValue, + "current", + "Do not alter current setting", + }, +}; + +static constexpr OptionEnumValueElement g_running_mode[] = { + {lldb::eOnlyThisThread, "this-thread", "Run only this thread"}, + {lldb::eAllThreads, "all-threads", "Run all threads"}, + {lldb::eOnlyDuringStepping, "while-stepping", + "Run only this thread while stepping"}, +}; + +llvm::StringRef RegisterNameHelpTextCallback(); +llvm::StringRef BreakpointIDHelpTextCallback(); +llvm::StringRef BreakpointIDRangeHelpTextCallback(); +llvm::StringRef BreakpointNameHelpTextCallback(); +llvm::StringRef GDBFormatHelpTextCallback(); +llvm::StringRef FormatHelpTextCallback(); +llvm::StringRef LanguageTypeHelpTextCallback(); +llvm::StringRef SummaryStringHelpTextCallback(); +llvm::StringRef ExprPathHelpTextCallback(); +llvm::StringRef arch_helper(); + +static constexpr CommandObject::ArgumentTableEntry g_argument_table[] = { + // clang-format off + { lldb::eArgTypeAddress, "address", CommandCompletions::eNoCompletion, {}, { nullptr, false }, "A valid address in the target program's execution space." }, + { lldb::eArgTypeAddressOrExpression, "address-expression", CommandCompletions::eNoCompletion, {}, { nullptr, false }, "An expression that resolves to an address." }, + { lldb::eArgTypeAliasName, "alias-name", CommandCompletions::eNoCompletion, {}, { nullptr, false }, "The name of an abbreviation (alias) for a debugger command." }, + { lldb::eArgTypeAliasOptions, "options-for-aliased-command", CommandCompletions::eNoCompletion, {}, { nullptr, false }, "Command options to be used as part of an alias (abbreviation) definition. (See 'help commands alias' for more information.)" }, + { lldb::eArgTypeArchitecture, "arch", CommandCompletions::eArchitectureCompletion, {}, { arch_helper, true }, "The architecture name, e.g. i386 or x86_64." }, + { lldb::eArgTypeBoolean, "boolean", CommandCompletions::eNoCompletion, {}, { nullptr, false }, "A Boolean value: 'true' or 'false'" }, + { lldb::eArgTypeBreakpointID, "breakpt-id", CommandCompletions::eNoCompletion, {}, { BreakpointIDHelpTextCallback, false }, nullptr }, + { lldb::eArgTypeBreakpointIDRange, "breakpt-id-list", CommandCompletions::eNoCompletion, {}, { BreakpointIDRangeHelpTextCallback, false }, nullptr }, + { lldb::eArgTypeBreakpointName, "breakpoint-name", CommandCompletions::eBreakpointNameCompletion, {}, { BreakpointNameHelpTextCallback, false }, nullptr }, + { lldb::eArgTypeByteSize, "byte-size", CommandCompletions::eNoCompletion, {}, { nullptr, false }, "Number of bytes to use." }, + { lldb::eArgTypeClassName, "class-name", CommandCompletions::eNoCompletion, {}, { nullptr, false }, "Then name of a class from the debug information in the program." }, + { lldb::eArgTypeCommandName, "cmd-name", CommandCompletions::eNoCompletion, {}, { nullptr, false }, "A debugger command (may be multiple words), without any options or arguments." }, + { lldb::eArgTypeCount, "count", CommandCompletions::eNoCompletion, {}, { nullptr, false }, "An unsigned integer." }, + { lldb::eArgTypeDescriptionVerbosity, "description-verbosity", CommandCompletions::eNoCompletion, g_description_verbosity_type, { nullptr, false }, "How verbose the output of 'po' should be." }, + { lldb::eArgTypeDirectoryName, "directory", CommandCompletions::eDiskDirectoryCompletion, {}, { nullptr, false }, "A directory name." }, + { lldb::eArgTypeDisassemblyFlavor, "disassembly-flavor", CommandCompletions::eDisassemblyFlavorCompletion, {}, { nullptr, false }, "A disassembly flavor recognized by your disassembly plugin. Currently the only valid options are \"att\" and \"intel\" for Intel targets" }, + { lldb::eArgTypeEndAddress, "end-address", CommandCompletions::eNoCompletion, {}, { nullptr, false }, "Help text goes here." }, + { lldb::eArgTypeExpression, "expr", CommandCompletions::eNoCompletion, {}, { nullptr, false }, "Help text goes here." }, + { lldb::eArgTypeExpressionPath, "expr-path", CommandCompletions::eNoCompletion, {}, { ExprPathHelpTextCallback, true }, nullptr }, + { lldb::eArgTypeExprFormat, "expression-format", CommandCompletions::eNoCompletion, {}, { nullptr, false }, "[ [bool|b] | [bin] | [char|c] | [oct|o] | [dec|i|d|u] | [hex|x] | [float|f] | [cstr|s] ]" }, + { lldb::eArgTypeFileLineColumn, "linespec", CommandCompletions::eNoCompletion, {}, { nullptr, false }, "A source specifier in the form file:line[:column]" }, + { lldb::eArgTypeFilename, "filename", CommandCompletions::eDiskFileCompletion, {}, { nullptr, false }, "The name of a file (can include path)." }, + { lldb::eArgTypeFormat, "format", CommandCompletions::eNoCompletion, {}, { FormatHelpTextCallback, true }, nullptr }, + { lldb::eArgTypeFrameIndex, "frame-index", CommandCompletions::eFrameIndexCompletion, {}, { nullptr, false }, "Index into a thread's list of frames." }, + { lldb::eArgTypeFullName, "fullname", CommandCompletions::eNoCompletion, {}, { nullptr, false }, "Help text goes here." }, + { lldb::eArgTypeFunctionName, "function-name", CommandCompletions::eNoCompletion, {}, { nullptr, false }, "The name of a function." }, + { lldb::eArgTypeFunctionOrSymbol, "function-or-symbol", CommandCompletions::eNoCompletion, {}, { nullptr, false }, "The name of a function or symbol." }, + { lldb::eArgTypeGDBFormat, "gdb-format", CommandCompletions::eNoCompletion, {}, { GDBFormatHelpTextCallback, true }, nullptr }, + { lldb::eArgTypeHelpText, "help-text", CommandCompletions::eNoCompletion, {}, { nullptr, false }, "Text to be used as help for some other entity in LLDB" }, + { lldb::eArgTypeIndex, "index", CommandCompletions::eNoCompletion, {}, { nullptr, false }, "An index into a list." }, + { lldb::eArgTypeLanguage, "source-language", CommandCompletions::eTypeLanguageCompletion, {}, { LanguageTypeHelpTextCallback, true }, nullptr }, + { lldb::eArgTypeLineNum, "linenum", CommandCompletions::eNoCompletion, {}, { nullptr, false }, "Line number in a source file." }, + { lldb::eArgTypeLogCategory, "log-category", CommandCompletions::eNoCompletion, {}, { nullptr, false }, "The name of a category within a log channel, e.g. all (try \"log list\" to see a list of all channels and their categories." }, + { lldb::eArgTypeLogChannel, "log-channel", CommandCompletions::eNoCompletion, {}, { nullptr, false }, "The name of a log channel, e.g. process.gdb-remote (try \"log list\" to see a list of all channels and their categories)." }, + { lldb::eArgTypeMethod, "method", CommandCompletions::eNoCompletion, {}, { nullptr, false }, "A C++ method name." }, + { lldb::eArgTypeName, "name", CommandCompletions::eTypeCategoryNameCompletion, {}, { nullptr, false }, "Help text goes here." }, + { lldb::eArgTypeNewPathPrefix, "new-path-prefix", CommandCompletions::eNoCompletion, {}, { nullptr, false }, "Help text goes here." }, + { lldb::eArgTypeNumLines, "num-lines", CommandCompletions::eNoCompletion, {}, { nullptr, false }, "The number of lines to use." }, + { lldb::eArgTypeNumberPerLine, "number-per-line", CommandCompletions::eNoCompletion, {}, { nullptr, false }, "The number of items per line to display." }, + { lldb::eArgTypeOffset, "offset", CommandCompletions::eNoCompletion, {}, { nullptr, false }, "Help text goes here." }, + { lldb::eArgTypeOldPathPrefix, "old-path-prefix", CommandCompletions::eNoCompletion, {}, { nullptr, false }, "Help text goes here." }, + { lldb::eArgTypeOneLiner, "one-line-command", CommandCompletions::eNoCompletion, {}, { nullptr, false }, "A command that is entered as a single line of text." }, + { lldb::eArgTypePath, "path", CommandCompletions::eDiskFileCompletion, {}, { nullptr, false }, "Path." }, + { lldb::eArgTypePermissionsNumber, "perms-numeric", CommandCompletions::eNoCompletion, {}, { nullptr, false }, "Permissions given as an octal number (e.g. 755)." }, + { lldb::eArgTypePermissionsString, "perms=string", CommandCompletions::eNoCompletion, {}, { nullptr, false }, "Permissions given as a string value (e.g. rw-r-xr--)." }, + { lldb::eArgTypePid, "pid", CommandCompletions::eProcessIDCompletion, {}, { nullptr, false }, "The process ID number." }, + { lldb::eArgTypePlugin, "plugin", CommandCompletions::eProcessPluginCompletion, {}, { nullptr, false }, "Help text goes here." }, + { lldb::eArgTypeProcessName, "process-name", CommandCompletions::eProcessNameCompletion, {}, { nullptr, false }, "The name of the process." }, + { lldb::eArgTypePythonClass, "python-class", CommandCompletions::eNoCompletion, {}, { nullptr, false }, "The name of a Python class." }, + { lldb::eArgTypePythonFunction, "python-function", CommandCompletions::eNoCompletion, {}, { nullptr, false }, "The name of a Python function." }, + { lldb::eArgTypePythonScript, "python-script", CommandCompletions::eNoCompletion, {}, { nullptr, false }, "Source code written in Python." }, + { lldb::eArgTypeQueueName, "queue-name", CommandCompletions::eNoCompletion, {}, { nullptr, false }, "The name of the thread queue." }, + { lldb::eArgTypeRegisterName, "register-name", CommandCompletions::eNoCompletion, {}, { RegisterNameHelpTextCallback, true }, nullptr }, + { lldb::eArgTypeRegularExpression, "regular-expression", CommandCompletions::eNoCompletion, {}, { nullptr, false }, "A POSIX-compliant extended regular expression." }, + { lldb::eArgTypeRunArgs, "run-args", CommandCompletions::eNoCompletion, {}, { nullptr, false }, "Arguments to be passed to the target program when it starts executing." }, + { lldb::eArgTypeRunMode, "run-mode", CommandCompletions::eNoCompletion, g_running_mode, { nullptr, false }, "Help text goes here." }, + { lldb::eArgTypeScriptedCommandSynchronicity, "script-cmd-synchronicity", CommandCompletions::eNoCompletion, g_script_synchro_type, { nullptr, false }, "The synchronicity to use to run scripted commands with regard to LLDB event system." }, + { lldb::eArgTypeScriptLang, "script-language", CommandCompletions::eNoCompletion, g_script_option_enumeration, { nullptr, false }, "The scripting language to be used for script-based commands. Supported languages are python and lua." }, + { lldb::eArgTypeSearchWord, "search-word", CommandCompletions::eNoCompletion, {}, { nullptr, false }, "Any word of interest for search purposes." }, + { lldb::eArgTypeSelector, "selector", CommandCompletions::eNoCompletion, {}, { nullptr, false }, "An Objective-C selector name." }, + { lldb::eArgTypeSettingIndex, "setting-index", CommandCompletions::eNoCompletion, {}, { nullptr, false }, "An index into a settings variable that is an array (try 'settings list' to see all the possible settings variables and their types)." }, + { lldb::eArgTypeSettingKey, "setting-key", CommandCompletions::eNoCompletion, {}, { nullptr, false }, "A key into a settings variables that is a dictionary (try 'settings list' to see all the possible settings variables and their types)." }, + { lldb::eArgTypeSettingPrefix, "setting-prefix", CommandCompletions::eNoCompletion, {}, { nullptr, false }, "The name of a settable internal debugger variable up to a dot ('.'), e.g. 'target.process.'" }, + { lldb::eArgTypeSettingVariableName, "setting-variable-name", CommandCompletions::eNoCompletion, {}, { nullptr, false }, "The name of a settable internal debugger variable. Type 'settings list' to see a complete list of such variables." }, + { lldb::eArgTypeShlibName, "shlib-name", CommandCompletions::eNoCompletion, {}, { nullptr, false }, "The name of a shared library." }, + { lldb::eArgTypeSourceFile, "source-file", CommandCompletions::eSourceFileCompletion, {}, { nullptr, false }, "The name of a source file.." }, + { lldb::eArgTypeSortOrder, "sort-order", CommandCompletions::eNoCompletion, g_sort_option_enumeration, { nullptr, false }, "Specify a sort order when dumping lists." }, + { lldb::eArgTypeStartAddress, "start-address", CommandCompletions::eNoCompletion, {}, { nullptr, false }, "Help text goes here." }, + { lldb::eArgTypeSummaryString, "summary-string", CommandCompletions::eNoCompletion, {}, { SummaryStringHelpTextCallback, true }, nullptr }, + { lldb::eArgTypeSymbol, "symbol", CommandCompletions::eSymbolCompletion, {}, { nullptr, false }, "Any symbol name (function name, variable, argument, etc.)" }, + { lldb::eArgTypeThreadID, "thread-id", CommandCompletions::eNoCompletion, {}, { nullptr, false }, "Thread ID number." }, + { lldb::eArgTypeThreadIndex, "thread-index", CommandCompletions::eNoCompletion, {}, { nullptr, false }, "Index into the process' list of threads." }, + { lldb::eArgTypeThreadName, "thread-name", CommandCompletions::eNoCompletion, {}, { nullptr, false }, "The thread's name." }, + { lldb::eArgTypeTypeName, "type-name", CommandCompletions::eNoCompletion, {}, { nullptr, false }, "A type name." }, + { lldb::eArgTypeUnsignedInteger, "unsigned-integer", CommandCompletions::eNoCompletion, {}, { nullptr, false }, "An unsigned integer." }, + { lldb::eArgTypeUnixSignal, "unix-signal", CommandCompletions::eNoCompletion, {}, { nullptr, false }, "A valid Unix signal name or number (e.g. SIGKILL, KILL or 9)." }, + { lldb::eArgTypeVarName, "variable-name", CommandCompletions::eNoCompletion, {} ,{ nullptr, false }, "The name of a variable in your program." }, + { lldb::eArgTypeValue, "value", CommandCompletions::eNoCompletion, g_dependents_enumeration, { nullptr, false }, "A value could be anything, depending on where and how it is used." }, + { lldb::eArgTypeWidth, "width", CommandCompletions::eNoCompletion, {}, { nullptr, false }, "Help text goes here." }, + { lldb::eArgTypeNone, "none", CommandCompletions::eNoCompletion, {}, { nullptr, false }, "No help available for this." }, + { lldb::eArgTypePlatform, "platform-name", CommandCompletions::ePlatformPluginCompletion, {}, { nullptr, false }, "The name of an installed platform plug-in . Type 'platform list' to see a complete list of installed platforms." }, + { lldb::eArgTypeWatchpointID, "watchpt-id", CommandCompletions::eNoCompletion, {}, { nullptr, false }, "Watchpoint IDs are positive integers." }, + { lldb::eArgTypeWatchpointIDRange, "watchpt-id-list", CommandCompletions::eNoCompletion, {}, { nullptr, false }, "For example, '1-3' or '1 to 3'." }, + { lldb::eArgTypeWatchType, "watch-type", CommandCompletions::eNoCompletion, {}, { nullptr, false }, "Specify the type for a watchpoint." }, + { lldb::eArgRawInput, "raw-input", CommandCompletions::eNoCompletion, {}, { nullptr, false }, "Free-form text passed to a command without prior interpretation, allowing spaces without requiring quotes. To pass arguments and free form text put two dashes ' -- ' between the last argument and any raw input." }, + { lldb::eArgTypeCommand, "command", CommandCompletions::eNoCompletion, {}, { nullptr, false }, "An LLDB Command line command element." }, + { lldb::eArgTypeColumnNum, "column", CommandCompletions::eNoCompletion, {}, { nullptr, false }, "Column number in a source file." }, + { lldb::eArgTypeModuleUUID, "module-uuid", CommandCompletions::eModuleUUIDCompletion, {}, { nullptr, false }, "A module UUID value." }, + { lldb::eArgTypeSaveCoreStyle, "corefile-style", CommandCompletions::eNoCompletion, g_corefile_save_style, { nullptr, false }, "The type of corefile that lldb will try to create, dependant on this target's capabilities." }, + { lldb::eArgTypeLogHandler, "log-handler", CommandCompletions::eNoCompletion, g_log_handler_type ,{ nullptr, false }, "The log handle that will be used to write out log messages." }, + { lldb::eArgTypeSEDStylePair, "substitution-pair", CommandCompletions::eNoCompletion, {}, { nullptr, false }, "A sed-style pattern and target pair." }, + { lldb::eArgTypeRecognizerID, "frame-recognizer-id", CommandCompletions::eNoCompletion, {}, { nullptr, false }, "The ID for a stack frame recognizer." }, + { lldb::eArgTypeConnectURL, "process-connect-url", CommandCompletions::eNoCompletion, {}, { nullptr, false }, "A URL-style specification for a remote connection." }, + { lldb::eArgTypeTargetID, "target-id", CommandCompletions::eNoCompletion, {}, { nullptr, false }, "The index ID for an lldb Target." }, + { lldb::eArgTypeStopHookID, "stop-hook-id", CommandCompletions::eNoCompletion, {}, { nullptr, false }, "The ID you receive when you create a stop-hook." }, + { lldb::eArgTypeReproducerProvider, "reproducer-provider", CommandCompletions::eNoCompletion, g_reproducer_provider_type, { nullptr, false }, "The reproducer provider." }, + { lldb::eArgTypeReproducerSignal, "reproducer-signal", CommandCompletions::eNoCompletion, g_reproducer_signaltype, { nullptr, false }, "The signal used to emulate a reproducer crash." }, + // clang-format on +}; + +static_assert((sizeof(g_argument_table) / + sizeof(CommandObject::ArgumentTableEntry)) == + lldb::eArgTypeLastArg, + "number of elements in g_argument_table doesn't match " + "CommandArgumentType enumeration"); + +} // namespace lldb_private + +#endif // LLDB_INTERPRETER_COMMANDOPTIONARGUMENTTABLE_H diff --git a/lldb/include/lldb/Symbol/SymbolFile.h b/lldb/include/lldb/Symbol/SymbolFile.h index 1470b96f2491..ed0de1b5bce6 100644 --- a/lldb/include/lldb/Symbol/SymbolFile.h +++ b/lldb/include/lldb/Symbol/SymbolFile.h @@ -132,7 +132,7 @@ public: /// Specify debug info should be loaded. /// /// It will be no-op for most implementations except SymbolFileOnDemand. - virtual void SetLoadDebugInfoEnabled() { return; } + virtual void SetLoadDebugInfoEnabled() {} // Compile Unit function calls // Approach 1 - iterator diff --git a/lldb/include/lldb/Target/MemoryRegionInfo.h b/lldb/include/lldb/Target/MemoryRegionInfo.h index 3ef66b403e14..cf38b6ea3345 100644 --- a/lldb/include/lldb/Target/MemoryRegionInfo.h +++ b/lldb/include/lldb/Target/MemoryRegionInfo.h @@ -132,7 +132,7 @@ public: void SetDirtyPageList(std::vector<lldb::addr_t> pagelist) { if (m_dirty_pages) - m_dirty_pages.getValue().clear(); + m_dirty_pages.value().clear(); m_dirty_pages = std::move(pagelist); } diff --git a/lldb/include/lldb/Target/Target.h b/lldb/include/lldb/Target/Target.h index 93ea8504f8ba..294fd96bc313 100644 --- a/lldb/include/lldb/Target/Target.h +++ b/lldb/include/lldb/Target/Target.h @@ -59,12 +59,6 @@ enum LoadCWDlldbinitFile { eLoadCWDlldbinitWarn }; -enum LoadDependentFiles { - eLoadDependentsDefault, - eLoadDependentsYes, - eLoadDependentsNo, -}; - enum ImportStdModule { eImportStdModuleFalse, eImportStdModuleFallback, diff --git a/lldb/include/lldb/Utility/StringExtractorGDBRemote.h b/lldb/include/lldb/Utility/StringExtractorGDBRemote.h index c32ce0389116..d869950ab6dd 100644 --- a/lldb/include/lldb/Utility/StringExtractorGDBRemote.h +++ b/lldb/include/lldb/Utility/StringExtractorGDBRemote.h @@ -179,6 +179,7 @@ public: eServerPacketType_QNonStop, eServerPacketType_vStopped, eServerPacketType_vCtrlC, + eServerPacketType_vStdio, }; ServerPacketType GetServerPacketType() const; diff --git a/lldb/include/lldb/lldb-enumerations.h b/lldb/include/lldb/lldb-enumerations.h index ad03f7e43056..974cc293709e 100644 --- a/lldb/include/lldb/lldb-enumerations.h +++ b/lldb/include/lldb/lldb-enumerations.h @@ -608,6 +608,8 @@ enum CommandArgumentType { eArgTypeConnectURL, eArgTypeTargetID, eArgTypeStopHookID, + eArgTypeReproducerProvider, + eArgTypeReproducerSignal, eArgTypeLastArg // Always keep this entry as the last entry in this // enumeration!! }; diff --git a/lldb/include/lldb/lldb-private-enumerations.h b/lldb/include/lldb/lldb-private-enumerations.h index 2d13e6ef65e1..310058495882 100644 --- a/lldb/include/lldb/lldb-private-enumerations.h +++ b/lldb/include/lldb/lldb-private-enumerations.h @@ -231,6 +231,29 @@ enum LogHandlerKind { eLogHandlerDefault = eLogHandlerStream, }; +enum ReproducerProvider { + eReproducerProviderCommands, + eReproducerProviderFiles, + eReproducerProviderSymbolFiles, + eReproducerProviderGDB, + eReproducerProviderProcessInfo, + eReproducerProviderVersion, + eReproducerProviderWorkingDirectory, + eReproducerProviderHomeDirectory, + eReproducerProviderNone, +}; + +enum ReproducerCrashSignal { + eReproducerCrashSigill, + eReproducerCrashSigsegv, +}; + +enum LoadDependentFiles { + eLoadDependentsDefault, + eLoadDependentsYes, + eLoadDependentsNo, +}; + inline std::string GetStatDescription(lldb_private::StatisticKind K) { switch (K) { case StatisticKind::ExpressionSuccessful: diff --git a/lldb/include/lldb/lldb-private-types.h b/lldb/include/lldb/lldb-private-types.h index 3be7003cd0fb..1b0d263e2073 100644 --- a/lldb/include/lldb/lldb-private-types.h +++ b/lldb/include/lldb/lldb-private-types.h @@ -106,6 +106,12 @@ struct OptionValidator { typedef struct type128 { uint64_t x[2]; } type128; typedef struct type256 { uint64_t x[4]; } type256; +/// Functor that returns a ValueObjectSP for a variable given its name +/// and the StackFrame of interest. Used primarily in the Materializer +/// to refetch a ValueObject when the ExecutionContextScope changes. +using ValueObjectProviderTy = + std::function<lldb::ValueObjectSP(ConstString, StackFrame *)>; + } // namespace lldb_private #endif // #if defined(__cplusplus) diff --git a/lldb/source/API/SBBreakpoint.cpp b/lldb/source/API/SBBreakpoint.cpp index 5fe8f7fe0583..19b2a4376cf8 100644 --- a/lldb/source/API/SBBreakpoint.cpp +++ b/lldb/source/API/SBBreakpoint.cpp @@ -835,8 +835,7 @@ public: if (bkpt->GetTargetSP() != target_sp) return false; lldb::break_id_t bp_id = bkpt->GetID(); - if (find(m_break_ids.begin(), m_break_ids.end(), bp_id) == - m_break_ids.end()) + if (!llvm::is_contained(m_break_ids, bp_id)) return false; m_break_ids.push_back(bkpt->GetID()); diff --git a/lldb/source/API/SBMemoryRegionInfo.cpp b/lldb/source/API/SBMemoryRegionInfo.cpp index e811bf31c722..23d22fbe86c8 100644 --- a/lldb/source/API/SBMemoryRegionInfo.cpp +++ b/lldb/source/API/SBMemoryRegionInfo.cpp @@ -136,7 +136,7 @@ uint32_t SBMemoryRegionInfo::GetNumDirtyPages() { const llvm::Optional<std::vector<addr_t>> &dirty_page_list = m_opaque_up->GetDirtyPageList(); if (dirty_page_list) - num_dirty_pages = dirty_page_list.getValue().size(); + num_dirty_pages = dirty_page_list.value().size(); return num_dirty_pages; } @@ -147,8 +147,8 @@ addr_t SBMemoryRegionInfo::GetDirtyPageAddressAtIndex(uint32_t idx) { addr_t dirty_page_addr = LLDB_INVALID_ADDRESS; const llvm::Optional<std::vector<addr_t>> &dirty_page_list = m_opaque_up->GetDirtyPageList(); - if (dirty_page_list && idx < dirty_page_list.getValue().size()) - dirty_page_addr = dirty_page_list.getValue()[idx]; + if (dirty_page_list && idx < dirty_page_list.value().size()) + dirty_page_addr = dirty_page_list.value()[idx]; return dirty_page_addr; } diff --git a/lldb/source/Commands/CommandObjectBreakpoint.cpp b/lldb/source/Commands/CommandObjectBreakpoint.cpp index 755cf6c61f26..5d2141d767cb 100644 --- a/lldb/source/Commands/CommandObjectBreakpoint.cpp +++ b/lldb/source/Commands/CommandObjectBreakpoint.cpp @@ -13,6 +13,7 @@ #include "lldb/Breakpoint/BreakpointLocation.h" #include "lldb/Host/OptionParser.h" #include "lldb/Interpreter/CommandInterpreter.h" +#include "lldb/Interpreter/CommandOptionArgumentTable.h" #include "lldb/Interpreter/CommandReturnObject.h" #include "lldb/Interpreter/OptionArgParser.h" #include "lldb/Interpreter/OptionGroupPythonClassWithDict.h" diff --git a/lldb/source/Commands/CommandObjectBreakpointCommand.cpp b/lldb/source/Commands/CommandObjectBreakpointCommand.cpp index bf0cae51d733..499bc550af45 100644 --- a/lldb/source/Commands/CommandObjectBreakpointCommand.cpp +++ b/lldb/source/Commands/CommandObjectBreakpointCommand.cpp @@ -14,6 +14,7 @@ #include "lldb/Core/IOHandler.h" #include "lldb/Host/OptionParser.h" #include "lldb/Interpreter/CommandInterpreter.h" +#include "lldb/Interpreter/CommandOptionArgumentTable.h" #include "lldb/Interpreter/CommandReturnObject.h" #include "lldb/Interpreter/OptionArgParser.h" #include "lldb/Interpreter/OptionGroupPythonClassWithDict.h" @@ -22,36 +23,6 @@ using namespace lldb; using namespace lldb_private; -// FIXME: "script-type" needs to have its contents determined dynamically, so -// somebody can add a new scripting language to lldb and have it pickable here -// without having to change this enumeration by hand and rebuild lldb proper. -static constexpr OptionEnumValueElement g_script_option_enumeration[] = { - { - eScriptLanguageNone, - "command", - "Commands are in the lldb command interpreter language", - }, - { - eScriptLanguagePython, - "python", - "Commands are in the Python language.", - }, - { - eScriptLanguageLua, - "lua", - "Commands are in the Lua language.", - }, - { - eScriptLanguageDefault, - "default-script", - "Commands are in the default scripting language.", - }, -}; - -static constexpr OptionEnumValues ScriptOptionEnum() { - return OptionEnumValues(g_script_option_enumeration); -} - #define LLDB_OPTIONS_breakpoint_command_add #include "CommandOptions.inc" diff --git a/lldb/source/Commands/CommandObjectCommands.cpp b/lldb/source/Commands/CommandObjectCommands.cpp index 39c7207bbdbe..97a883314917 100644 --- a/lldb/source/Commands/CommandObjectCommands.cpp +++ b/lldb/source/Commands/CommandObjectCommands.cpp @@ -13,6 +13,7 @@ #include "lldb/Core/IOHandler.h" #include "lldb/Interpreter/CommandHistory.h" #include "lldb/Interpreter/CommandInterpreter.h" +#include "lldb/Interpreter/CommandOptionArgumentTable.h" #include "lldb/Interpreter/CommandReturnObject.h" #include "lldb/Interpreter/OptionArgParser.h" #include "lldb/Interpreter/OptionValueBoolean.h" @@ -1354,29 +1355,6 @@ protected: CommandOptions m_options; }; -// CommandObjectCommandsScriptAdd -static constexpr OptionEnumValueElement g_script_synchro_type[] = { - { - eScriptedCommandSynchronicitySynchronous, - "synchronous", - "Run synchronous", - }, - { - eScriptedCommandSynchronicityAsynchronous, - "asynchronous", - "Run asynchronous", - }, - { - eScriptedCommandSynchronicityCurrentValue, - "current", - "Do not alter current setting", - }, -}; - -static constexpr OptionEnumValues ScriptSynchroType() { - return OptionEnumValues(g_script_synchro_type); -} - #define LLDB_OPTIONS_script_add #include "CommandOptions.inc" diff --git a/lldb/source/Commands/CommandObjectDisassemble.cpp b/lldb/source/Commands/CommandObjectDisassemble.cpp index 6c33edc8a3a8..a11e2b719727 100644 --- a/lldb/source/Commands/CommandObjectDisassemble.cpp +++ b/lldb/source/Commands/CommandObjectDisassemble.cpp @@ -12,6 +12,7 @@ #include "lldb/Core/Module.h" #include "lldb/Host/OptionParser.h" #include "lldb/Interpreter/CommandInterpreter.h" +#include "lldb/Interpreter/CommandOptionArgumentTable.h" #include "lldb/Interpreter/CommandReturnObject.h" #include "lldb/Interpreter/OptionArgParser.h" #include "lldb/Interpreter/Options.h" diff --git a/lldb/source/Commands/CommandObjectExpression.cpp b/lldb/source/Commands/CommandObjectExpression.cpp index 59c2cdb0938c..0fb50420f70f 100644 --- a/lldb/source/Commands/CommandObjectExpression.cpp +++ b/lldb/source/Commands/CommandObjectExpression.cpp @@ -14,6 +14,7 @@ #include "lldb/Expression/UserExpression.h" #include "lldb/Host/OptionParser.h" #include "lldb/Interpreter/CommandInterpreter.h" +#include "lldb/Interpreter/CommandOptionArgumentTable.h" #include "lldb/Interpreter/CommandReturnObject.h" #include "lldb/Interpreter/OptionArgParser.h" #include "lldb/Target/Language.h" @@ -28,23 +29,6 @@ CommandObjectExpression::CommandOptions::CommandOptions() = default; CommandObjectExpression::CommandOptions::~CommandOptions() = default; -static constexpr OptionEnumValueElement g_description_verbosity_type[] = { - { - eLanguageRuntimeDescriptionDisplayVerbosityCompact, - "compact", - "Only show the description string", - }, - { - eLanguageRuntimeDescriptionDisplayVerbosityFull, - "full", - "Show the full output, including persistent variable's name and type", - }, -}; - -static constexpr OptionEnumValues DescriptionVerbosityTypes() { - return OptionEnumValues(g_description_verbosity_type); -} - #define LLDB_OPTIONS_expression #include "CommandOptions.inc" diff --git a/lldb/source/Commands/CommandObjectFrame.cpp b/lldb/source/Commands/CommandObjectFrame.cpp index 9f2c79d84ad4..23954dd3c9fd 100644 --- a/lldb/source/Commands/CommandObjectFrame.cpp +++ b/lldb/source/Commands/CommandObjectFrame.cpp @@ -13,6 +13,7 @@ #include "lldb/Host/Config.h" #include "lldb/Host/OptionParser.h" #include "lldb/Interpreter/CommandInterpreter.h" +#include "lldb/Interpreter/CommandOptionArgumentTable.h" #include "lldb/Interpreter/CommandReturnObject.h" #include "lldb/Interpreter/OptionArgParser.h" #include "lldb/Interpreter/OptionGroupFormat.h" @@ -143,10 +144,10 @@ protected: "`frame diagnose --address` is incompatible with other arguments."); return false; } - valobj_sp = frame_sp->GuessValueForAddress(m_options.address.getValue()); + valobj_sp = frame_sp->GuessValueForAddress(m_options.address.value()); } else if (m_options.reg) { valobj_sp = frame_sp->GuessValueForRegisterAndOffset( - m_options.reg.getValue(), m_options.offset.value_or(0)); + m_options.reg.value(), m_options.offset.value_or(0)); } else { StopInfoSP stop_info_sp = thread->GetStopInfo(); if (!stop_info_sp) { diff --git a/lldb/source/Commands/CommandObjectHelp.cpp b/lldb/source/Commands/CommandObjectHelp.cpp index a2f682049ae0..c7a3bce1408b 100644 --- a/lldb/source/Commands/CommandObjectHelp.cpp +++ b/lldb/source/Commands/CommandObjectHelp.cpp @@ -8,6 +8,7 @@ #include "CommandObjectHelp.h" #include "lldb/Interpreter/CommandInterpreter.h" +#include "lldb/Interpreter/CommandOptionArgumentTable.h" #include "lldb/Interpreter/CommandReturnObject.h" using namespace lldb; diff --git a/lldb/source/Commands/CommandObjectLog.cpp b/lldb/source/Commands/CommandObjectLog.cpp index 684cb35da1cc..89bc3e0f1b7d 100644 --- a/lldb/source/Commands/CommandObjectLog.cpp +++ b/lldb/source/Commands/CommandObjectLog.cpp @@ -9,6 +9,7 @@ #include "CommandObjectLog.h" #include "lldb/Core/Debugger.h" #include "lldb/Host/OptionParser.h" +#include "lldb/Interpreter/CommandOptionArgumentTable.h" #include "lldb/Interpreter/CommandReturnObject.h" #include "lldb/Interpreter/OptionArgParser.h" #include "lldb/Interpreter/OptionValueEnumeration.h" @@ -23,36 +24,6 @@ using namespace lldb; using namespace lldb_private; -static constexpr OptionEnumValueElement g_log_handler_type[] = { - { - eLogHandlerDefault, - "default", - "Use the default (stream) log handler", - }, - { - eLogHandlerStream, - "stream", - "Write log messages to the debugger output stream or to a file if one " - "is specified. A buffer size (in bytes) can be specified with -b. If " - "no buffer size is specified the output is unbuffered.", - }, - { - eLogHandlerCircular, - "circular", - "Write log messages to a fixed size circular buffer. A buffer size " - "(number of messages) must be specified with -b.", - }, - { - eLogHandlerSystem, - "os", - "Write log messages to the operating system log.", - }, -}; - -static constexpr OptionEnumValues LogHandlerType() { - return OptionEnumValues(g_log_handler_type); -} - #define LLDB_OPTIONS_log_enable #include "CommandOptions.inc" diff --git a/lldb/source/Commands/CommandObjectMemory.cpp b/lldb/source/Commands/CommandObjectMemory.cpp index 98fa38f25635..ca0384cf9453 100644 --- a/lldb/source/Commands/CommandObjectMemory.cpp +++ b/lldb/source/Commands/CommandObjectMemory.cpp @@ -13,6 +13,7 @@ #include "lldb/Core/ValueObjectMemory.h" #include "lldb/Expression/ExpressionVariable.h" #include "lldb/Host/OptionParser.h" +#include "lldb/Interpreter/CommandOptionArgumentTable.h" #include "lldb/Interpreter/CommandReturnObject.h" #include "lldb/Interpreter/OptionArgParser.h" #include "lldb/Interpreter/OptionGroupFormat.h" @@ -274,7 +275,7 @@ public: OptionValueUInt64 m_num_per_line; bool m_output_as_binary = false; OptionValueString m_view_as_type; - bool m_force; + bool m_force = false; OptionValueUInt64 m_offset; OptionValueLanguage m_language_for_type; }; @@ -1738,7 +1739,7 @@ protected: const llvm::Optional<std::vector<addr_t>> &dirty_page_list = range_info.GetDirtyPageList(); if (dirty_page_list) { - const size_t page_count = dirty_page_list.getValue().size(); + const size_t page_count = dirty_page_list.value().size(); result.AppendMessageWithFormat( "Modified memory (dirty) page list provided, %zu entries.\n", page_count); diff --git a/lldb/source/Commands/CommandObjectMemoryTag.cpp b/lldb/source/Commands/CommandObjectMemoryTag.cpp index 67461cac5a29..98ab6201711d 100644 --- a/lldb/source/Commands/CommandObjectMemoryTag.cpp +++ b/lldb/source/Commands/CommandObjectMemoryTag.cpp @@ -8,6 +8,7 @@ #include "CommandObjectMemoryTag.h" #include "lldb/Host/OptionParser.h" +#include "lldb/Interpreter/CommandOptionArgumentTable.h" #include "lldb/Interpreter/CommandReturnObject.h" #include "lldb/Interpreter/OptionArgParser.h" #include "lldb/Interpreter/OptionGroupFormat.h" diff --git a/lldb/source/Commands/CommandObjectPlatform.cpp b/lldb/source/Commands/CommandObjectPlatform.cpp index 42b19db5efc4..98c6a3b2dd30 100644 --- a/lldb/source/Commands/CommandObjectPlatform.cpp +++ b/lldb/source/Commands/CommandObjectPlatform.cpp @@ -13,6 +13,7 @@ #include "lldb/Core/PluginManager.h" #include "lldb/Host/OptionParser.h" #include "lldb/Interpreter/CommandInterpreter.h" +#include "lldb/Interpreter/CommandOptionArgumentTable.h" #include "lldb/Interpreter/CommandOptionValidators.h" #include "lldb/Interpreter/CommandReturnObject.h" #include "lldb/Interpreter/OptionGroupFile.h" diff --git a/lldb/source/Commands/CommandObjectProcess.cpp b/lldb/source/Commands/CommandObjectProcess.cpp index d36a574aba7d..28a99ea3d94a 100644 --- a/lldb/source/Commands/CommandObjectProcess.cpp +++ b/lldb/source/Commands/CommandObjectProcess.cpp @@ -7,8 +7,8 @@ //===----------------------------------------------------------------------===// #include "CommandObjectProcess.h" -#include "CommandObjectTrace.h" #include "CommandObjectBreakpoint.h" +#include "CommandObjectTrace.h" #include "CommandOptionsProcessLaunch.h" #include "lldb/Breakpoint/Breakpoint.h" #include "lldb/Breakpoint/BreakpointIDList.h" @@ -19,6 +19,7 @@ #include "lldb/Core/PluginManager.h" #include "lldb/Host/OptionParser.h" #include "lldb/Interpreter/CommandInterpreter.h" +#include "lldb/Interpreter/CommandOptionArgumentTable.h" #include "lldb/Interpreter/CommandReturnObject.h" #include "lldb/Interpreter/OptionArgParser.h" #include "lldb/Interpreter/OptionGroupPythonClassWithDict.h" @@ -1332,20 +1333,6 @@ protected: } }; -// CommandObjectProcessSaveCore -#pragma mark CommandObjectProcessSaveCore - -static constexpr OptionEnumValueElement g_corefile_save_style[] = { - {eSaveCoreFull, "full", "Create a core file with all memory saved"}, - {eSaveCoreDirtyOnly, "modified-memory", - "Create a corefile with only modified memory saved"}, - {eSaveCoreStackOnly, "stack", - "Create a corefile with only stack memory saved"}}; - -static constexpr OptionEnumValues SaveCoreStyles() { - return OptionEnumValues(g_corefile_save_style); -} - #define LLDB_OPTIONS_process_save_core #include "CommandOptions.inc" diff --git a/lldb/source/Commands/CommandObjectRegister.cpp b/lldb/source/Commands/CommandObjectRegister.cpp index 933c243dedd5..56cbacbebec5 100644 --- a/lldb/source/Commands/CommandObjectRegister.cpp +++ b/lldb/source/Commands/CommandObjectRegister.cpp @@ -10,6 +10,7 @@ #include "lldb/Core/Debugger.h" #include "lldb/Core/DumpRegisterValue.h" #include "lldb/Host/OptionParser.h" +#include "lldb/Interpreter/CommandOptionArgumentTable.h" #include "lldb/Interpreter/CommandReturnObject.h" #include "lldb/Interpreter/OptionGroupFormat.h" #include "lldb/Interpreter/OptionValueArray.h" diff --git a/lldb/source/Commands/CommandObjectReproducer.cpp b/lldb/source/Commands/CommandObjectReproducer.cpp index 8d12decba974..6160a83b4d09 100644 --- a/lldb/source/Commands/CommandObjectReproducer.cpp +++ b/lldb/source/Commands/CommandObjectReproducer.cpp @@ -11,6 +11,7 @@ #include "lldb/Host/HostInfo.h" #include "lldb/Host/OptionParser.h" #include "lldb/Interpreter/CommandInterpreter.h" +#include "lldb/Interpreter/CommandOptionArgumentTable.h" #include "lldb/Interpreter/CommandReturnObject.h" #include "lldb/Interpreter/OptionArgParser.h" #include "lldb/Utility/GDBRemote.h" @@ -24,95 +25,9 @@ using namespace llvm; using namespace lldb_private; using namespace lldb_private::repro; -enum ReproducerProvider { - eReproducerProviderCommands, - eReproducerProviderFiles, - eReproducerProviderSymbolFiles, - eReproducerProviderGDB, - eReproducerProviderProcessInfo, - eReproducerProviderVersion, - eReproducerProviderWorkingDirectory, - eReproducerProviderHomeDirectory, - eReproducerProviderNone -}; - -static constexpr OptionEnumValueElement g_reproducer_provider_type[] = { - { - eReproducerProviderCommands, - "commands", - "Command Interpreter Commands", - }, - { - eReproducerProviderFiles, - "files", - "Files", - }, - { - eReproducerProviderSymbolFiles, - "symbol-files", - "Symbol Files", - }, - { - eReproducerProviderGDB, - "gdb", - "GDB Remote Packets", - }, - { - eReproducerProviderProcessInfo, - "processes", - "Process Info", - }, - { - eReproducerProviderVersion, - "version", - "Version", - }, - { - eReproducerProviderWorkingDirectory, - "cwd", - "Working Directory", - }, - { - eReproducerProviderHomeDirectory, - "home", - "Home Directory", - }, - { - eReproducerProviderNone, - "none", - "None", - }, -}; - -static constexpr OptionEnumValues ReproducerProviderType() { - return OptionEnumValues(g_reproducer_provider_type); -} - #define LLDB_OPTIONS_reproducer_dump #include "CommandOptions.inc" -enum ReproducerCrashSignal { - eReproducerCrashSigill, - eReproducerCrashSigsegv, -}; - -static constexpr OptionEnumValueElement g_reproducer_signaltype[] = { - { - eReproducerCrashSigill, - "SIGILL", - "Illegal instruction", - }, - { - eReproducerCrashSigsegv, - "SIGSEGV", - "Segmentation fault", - }, -}; - -static constexpr OptionEnumValues ReproducerSignalType() { - return OptionEnumValues(g_reproducer_signaltype); -} - #define LLDB_OPTIONS_reproducer_xcrash #include "CommandOptions.inc" diff --git a/lldb/source/Commands/CommandObjectScript.cpp b/lldb/source/Commands/CommandObjectScript.cpp index f53d6540bc04..2ed03e85b2d7 100644 --- a/lldb/source/Commands/CommandObjectScript.cpp +++ b/lldb/source/Commands/CommandObjectScript.cpp @@ -12,6 +12,7 @@ #include "lldb/Host/Config.h" #include "lldb/Host/OptionParser.h" #include "lldb/Interpreter/CommandInterpreter.h" +#include "lldb/Interpreter/CommandOptionArgumentTable.h" #include "lldb/Interpreter/CommandReturnObject.h" #include "lldb/Interpreter/OptionArgParser.h" #include "lldb/Interpreter/ScriptInterpreter.h" @@ -20,28 +21,6 @@ using namespace lldb; using namespace lldb_private; -static constexpr OptionEnumValueElement g_script_option_enumeration[] = { - { - eScriptLanguagePython, - "python", - "Python", - }, - { - eScriptLanguageLua, - "lua", - "Lua", - }, - { - eScriptLanguageNone, - "default", - "The default scripting language.", - }, -}; - -static constexpr OptionEnumValues ScriptOptionEnum() { - return OptionEnumValues(g_script_option_enumeration); -} - #define LLDB_OPTIONS_script #include "CommandOptions.inc" diff --git a/lldb/source/Commands/CommandObjectSession.cpp b/lldb/source/Commands/CommandObjectSession.cpp index c11839a48de0..f9ef1eb93443 100644 --- a/lldb/source/Commands/CommandObjectSession.cpp +++ b/lldb/source/Commands/CommandObjectSession.cpp @@ -1,6 +1,7 @@ #include "CommandObjectSession.h" #include "lldb/Host/OptionParser.h" #include "lldb/Interpreter/CommandInterpreter.h" +#include "lldb/Interpreter/CommandOptionArgumentTable.h" #include "lldb/Interpreter/CommandReturnObject.h" #include "lldb/Interpreter/OptionArgParser.h" #include "lldb/Interpreter/OptionValue.h" diff --git a/lldb/source/Commands/CommandObjectSettings.cpp b/lldb/source/Commands/CommandObjectSettings.cpp index e4162e56dc2c..86194664bf5d 100644 --- a/lldb/source/Commands/CommandObjectSettings.cpp +++ b/lldb/source/Commands/CommandObjectSettings.cpp @@ -13,6 +13,7 @@ #include "lldb/Host/OptionParser.h" #include "lldb/Interpreter/CommandCompletions.h" #include "lldb/Interpreter/CommandInterpreter.h" +#include "lldb/Interpreter/CommandOptionArgumentTable.h" #include "lldb/Interpreter/CommandReturnObject.h" #include "lldb/Interpreter/OptionValueProperties.h" diff --git a/lldb/source/Commands/CommandObjectSource.cpp b/lldb/source/Commands/CommandObjectSource.cpp index f87981859844..fd028d4f62f2 100644 --- a/lldb/source/Commands/CommandObjectSource.cpp +++ b/lldb/source/Commands/CommandObjectSource.cpp @@ -14,6 +14,7 @@ #include "lldb/Core/ModuleSpec.h" #include "lldb/Core/SourceManager.h" #include "lldb/Host/OptionParser.h" +#include "lldb/Interpreter/CommandOptionArgumentTable.h" #include "lldb/Interpreter/CommandReturnObject.h" #include "lldb/Interpreter/OptionArgParser.h" #include "lldb/Interpreter/OptionValueFileColonLine.h" diff --git a/lldb/source/Commands/CommandObjectStats.cpp b/lldb/source/Commands/CommandObjectStats.cpp index 63aa36b39f4d..c0f8ff4b0b58 100644 --- a/lldb/source/Commands/CommandObjectStats.cpp +++ b/lldb/source/Commands/CommandObjectStats.cpp @@ -9,6 +9,7 @@ #include "CommandObjectStats.h" #include "lldb/Core/Debugger.h" #include "lldb/Host/OptionParser.h" +#include "lldb/Interpreter/CommandOptionArgumentTable.h" #include "lldb/Interpreter/CommandReturnObject.h" #include "lldb/Target/Target.h" diff --git a/lldb/source/Commands/CommandObjectTarget.cpp b/lldb/source/Commands/CommandObjectTarget.cpp index 51978878c8b9..3dcb35557bfe 100644 --- a/lldb/source/Commands/CommandObjectTarget.cpp +++ b/lldb/source/Commands/CommandObjectTarget.cpp @@ -17,6 +17,7 @@ #include "lldb/DataFormatters/ValueObjectPrinter.h" #include "lldb/Host/OptionParser.h" #include "lldb/Interpreter/CommandInterpreter.h" +#include "lldb/Interpreter/CommandOptionArgumentTable.h" #include "lldb/Interpreter/CommandReturnObject.h" #include "lldb/Interpreter/OptionArgParser.h" #include "lldb/Interpreter/OptionGroupArchitecture.h" @@ -142,26 +143,6 @@ static uint32_t DumpTargetList(TargetList &target_list, return num_targets; } -// Note that the negation in the argument name causes a slightly confusing -// mapping of the enum values. -static constexpr OptionEnumValueElement g_dependents_enumeration[] = { - { - eLoadDependentsDefault, - "default", - "Only load dependents when the target is an executable.", - }, - { - eLoadDependentsNo, - "true", - "Don't load dependents, even if the target is an executable.", - }, - { - eLoadDependentsYes, - "false", - "Load dependents, even if the target is not an executable.", - }, -}; - #define LLDB_OPTIONS_target_dependents #include "CommandOptions.inc" @@ -1923,26 +1904,6 @@ protected: } }; -#pragma mark CommandObjectTargetModulesDumpSymtab - -static constexpr OptionEnumValueElement g_sort_option_enumeration[] = { - { - eSortOrderNone, - "none", - "No sorting, use the original symbol table order.", - }, - { - eSortOrderByAddress, - "address", - "Sort output by symbol address.", - }, - { - eSortOrderByName, - "name", - "Sort output by symbol name.", - }, -}; - #define LLDB_OPTIONS_target_modules_dump_symtab #include "CommandOptions.inc" @@ -4655,7 +4616,7 @@ public: m_class_name.clear(); m_function_name.clear(); m_line_start = 0; - m_line_end = UINT_MAX; + m_line_end = LLDB_INVALID_LINE_NUMBER; m_file_name.clear(); m_module_name.clear(); m_func_name_type_mask = eFunctionNameTypeAuto; @@ -4676,23 +4637,23 @@ public: std::string m_class_name; std::string m_function_name; uint32_t m_line_start = 0; - uint32_t m_line_end; + uint32_t m_line_end = LLDB_INVALID_LINE_NUMBER; std::string m_file_name; std::string m_module_name; uint32_t m_func_name_type_mask = eFunctionNameTypeAuto; // A pick from lldb::FunctionNameType. - lldb::tid_t m_thread_id; - uint32_t m_thread_index; + lldb::tid_t m_thread_id = LLDB_INVALID_THREAD_ID; + uint32_t m_thread_index = UINT32_MAX; std::string m_thread_name; std::string m_queue_name; bool m_sym_ctx_specified = false; - bool m_no_inlines; + bool m_no_inlines = false; bool m_thread_specified = false; // Instance variables to hold the values for one_liner options. bool m_use_one_liner = false; std::vector<std::string> m_one_liner; - bool m_auto_continue; + bool m_auto_continue = false; }; CommandObjectTargetStopHookAdd(CommandInterpreter &interpreter) diff --git a/lldb/source/Commands/CommandObjectThread.cpp b/lldb/source/Commands/CommandObjectThread.cpp index ad49d27bb9a7..1371b9dbda1e 100644 --- a/lldb/source/Commands/CommandObjectThread.cpp +++ b/lldb/source/Commands/CommandObjectThread.cpp @@ -17,6 +17,7 @@ #include "lldb/Core/ValueObject.h" #include "lldb/Host/OptionParser.h" #include "lldb/Interpreter/CommandInterpreter.h" +#include "lldb/Interpreter/CommandOptionArgumentTable.h" #include "lldb/Interpreter/CommandReturnObject.h" #include "lldb/Interpreter/OptionArgParser.h" #include "lldb/Interpreter/OptionGroupPythonClassWithDict.h" @@ -239,16 +240,6 @@ protected: enum StepScope { eStepScopeSource, eStepScopeInstruction }; -static constexpr OptionEnumValueElement g_tri_running_mode[] = { - {eOnlyThisThread, "this-thread", "Run only this thread"}, - {eAllThreads, "all-threads", "Run all threads"}, - {eOnlyDuringStepping, "while-stepping", - "Run only this thread while stepping"}}; - -static constexpr OptionEnumValues TriRunningModes() { - return OptionEnumValues(g_tri_running_mode); -} - #define LLDB_OPTIONS_thread_step_scope #include "CommandOptions.inc" @@ -813,14 +804,6 @@ public: // CommandObjectThreadUntil -static constexpr OptionEnumValueElement g_duo_running_mode[] = { - {eOnlyThisThread, "this-thread", "Run only this thread"}, - {eAllThreads, "all-threads", "Run all threads"}}; - -static constexpr OptionEnumValues DuoRunningModes() { - return OptionEnumValues(g_duo_running_mode); -} - #define LLDB_OPTIONS_thread_until #include "CommandOptions.inc" @@ -894,8 +877,8 @@ public: return llvm::makeArrayRef(g_thread_until_options); } - uint32_t m_step_thread_idx; - bool m_stop_others; + uint32_t m_step_thread_idx = LLDB_INVALID_THREAD_ID; + bool m_stop_others = false; std::vector<lldb::addr_t> m_until_addrs; // Instance variables to hold the values for command options. diff --git a/lldb/source/Commands/CommandObjectTrace.cpp b/lldb/source/Commands/CommandObjectTrace.cpp index 227de2de7065..eaf99a46b5c2 100644 --- a/lldb/source/Commands/CommandObjectTrace.cpp +++ b/lldb/source/Commands/CommandObjectTrace.cpp @@ -16,6 +16,7 @@ #include "lldb/Host/OptionParser.h" #include "lldb/Interpreter/CommandInterpreter.h" #include "lldb/Interpreter/CommandObject.h" +#include "lldb/Interpreter/CommandOptionArgumentTable.h" #include "lldb/Interpreter/CommandReturnObject.h" #include "lldb/Interpreter/OptionArgParser.h" #include "lldb/Interpreter/OptionGroupFormat.h" diff --git a/lldb/source/Commands/CommandObjectType.cpp b/lldb/source/Commands/CommandObjectType.cpp index 3ad3571b390c..11acbb5c627f 100644 --- a/lldb/source/Commands/CommandObjectType.cpp +++ b/lldb/source/Commands/CommandObjectType.cpp @@ -15,6 +15,7 @@ #include "lldb/Host/OptionParser.h" #include "lldb/Interpreter/CommandInterpreter.h" #include "lldb/Interpreter/CommandObject.h" +#include "lldb/Interpreter/CommandOptionArgumentTable.h" #include "lldb/Interpreter/CommandReturnObject.h" #include "lldb/Interpreter/OptionArgParser.h" #include "lldb/Interpreter/OptionGroupFormat.h" @@ -119,12 +120,12 @@ private: // Instance variables to hold the values for command options. TypeSummaryImpl::Flags m_flags; - bool m_regex; + bool m_regex = false; std::string m_format_string; ConstString m_name; std::string m_python_script; std::string m_python_function; - bool m_is_add_script; + bool m_is_add_script = false; std::string m_category; }; @@ -1054,6 +1055,15 @@ protected: return false; } + static bool ShouldListItem(llvm::StringRef s, RegularExpression *regex) { + // If we have a regex, it can match two kinds of results: + // - An item created with that same regex string (exact string match), so + // the user can list it using the same string it used at creation time. + // - Items that match the regex. + // No regex means list everything. + return regex == nullptr || s == regex->GetText() || regex->Execute(s); + } + bool DoExecute(Args &command, CommandReturnObject &result) override { const size_t argc = command.GetArgumentCount(); @@ -1095,24 +1105,13 @@ protected: .SetExact([&result, &formatter_regex, &any_printed]( const TypeMatcher &type_matcher, const FormatterSharedPointer &format_sp) -> bool { - if (formatter_regex) { - bool escape = true; - if (type_matcher.CreatedBySameMatchString( - ConstString(formatter_regex->GetText()))) { - escape = false; - } else if (formatter_regex->Execute( - type_matcher.GetMatchString().GetStringRef())) { - escape = false; - } - - if (escape) - return true; + if (ShouldListItem(type_matcher.GetMatchString().GetStringRef(), + formatter_regex.get())) { + any_printed = true; + result.GetOutputStream().Printf( + "%s: %s\n", type_matcher.GetMatchString().GetCString(), + format_sp->GetDescription().c_str()); } - - any_printed = true; - result.GetOutputStream().Printf( - "%s: %s\n", type_matcher.GetMatchString().GetCString(), - format_sp->GetDescription().c_str()); return true; }); @@ -1120,24 +1119,13 @@ protected: .SetWithRegex([&result, &formatter_regex, &any_printed]( const TypeMatcher &type_matcher, const FormatterSharedPointer &format_sp) -> bool { - if (formatter_regex) { - bool escape = true; - if (type_matcher.CreatedBySameMatchString( - ConstString(formatter_regex->GetText()))) { - escape = false; - } else if (formatter_regex->Execute( - type_matcher.GetMatchString().GetStringRef())) { - escape = false; - } - - if (escape) - return true; + if (ShouldListItem(type_matcher.GetMatchString().GetStringRef(), + formatter_regex.get())) { + any_printed = true; + result.GetOutputStream().Printf( + "%s: %s\n", type_matcher.GetMatchString().GetCString(), + format_sp->GetDescription().c_str()); } - - any_printed = true; - result.GetOutputStream().Printf( - "%s: %s\n", type_matcher.GetMatchString().GetCString(), - format_sp->GetDescription().c_str()); return true; }); @@ -1154,20 +1142,9 @@ protected: DataVisualization::Categories::ForEach( [&category_regex, &category_closure]( const lldb::TypeCategoryImplSP &category) -> bool { - if (category_regex) { - bool escape = true; - if (category->GetName() == category_regex->GetText()) { - escape = false; - } else if (category_regex->Execute(category->GetName())) { - escape = false; - } - - if (escape) - return true; + if (ShouldListItem(category->GetName(), category_regex.get())) { + category_closure(category); } - - category_closure(category); - return true; }); diff --git a/lldb/source/Commands/CommandObjectWatchpoint.cpp b/lldb/source/Commands/CommandObjectWatchpoint.cpp index 6805ff7f50ae..f87a171daca6 100644 --- a/lldb/source/Commands/CommandObjectWatchpoint.cpp +++ b/lldb/source/Commands/CommandObjectWatchpoint.cpp @@ -18,6 +18,7 @@ #include "lldb/Core/ValueObject.h" #include "lldb/Host/OptionParser.h" #include "lldb/Interpreter/CommandInterpreter.h" +#include "lldb/Interpreter/CommandOptionArgumentTable.h" #include "lldb/Interpreter/CommandReturnObject.h" #include "lldb/Symbol/Variable.h" #include "lldb/Symbol/VariableList.h" diff --git a/lldb/source/Commands/CommandObjectWatchpointCommand.cpp b/lldb/source/Commands/CommandObjectWatchpointCommand.cpp index 0cc83275d7ae..3264813b3d53 100644 --- a/lldb/source/Commands/CommandObjectWatchpointCommand.cpp +++ b/lldb/source/Commands/CommandObjectWatchpointCommand.cpp @@ -15,6 +15,7 @@ #include "lldb/Core/IOHandler.h" #include "lldb/Host/OptionParser.h" #include "lldb/Interpreter/CommandInterpreter.h" +#include "lldb/Interpreter/CommandOptionArgumentTable.h" #include "lldb/Interpreter/CommandReturnObject.h" #include "lldb/Interpreter/OptionArgParser.h" #include "lldb/Target/Target.h" @@ -22,36 +23,6 @@ using namespace lldb; using namespace lldb_private; -// FIXME: "script-type" needs to have its contents determined dynamically, so -// somebody can add a new scripting language to lldb and have it pickable here -// without having to change this enumeration by hand and rebuild lldb proper. -static constexpr OptionEnumValueElement g_script_option_enumeration[] = { - { - eScriptLanguageNone, - "command", - "Commands are in the lldb command interpreter language", - }, - { - eScriptLanguagePython, - "python", - "Commands are in the Python language.", - }, - { - eScriptLanguageLua, - "lua", - "Commands are in the Lua language.", - }, - { - eSortOrderByName, - "default-script", - "Commands are in the default scripting language.", - }, -}; - -static constexpr OptionEnumValues ScriptOptionEnum() { - return OptionEnumValues(g_script_option_enumeration); -} - #define LLDB_OPTIONS_watchpoint_command_add #include "CommandOptions.inc" diff --git a/lldb/source/Commands/CommandOptionArgumentTable.cpp b/lldb/source/Commands/CommandOptionArgumentTable.cpp new file mode 100644 index 000000000000..e8e9307b6291 --- /dev/null +++ b/lldb/source/Commands/CommandOptionArgumentTable.cpp @@ -0,0 +1,313 @@ +//===-- CommandOptionArgumentTable.cpp ------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "lldb/Interpreter/CommandOptionArgumentTable.h" +#include "lldb/DataFormatters/FormatManager.h" +#include "lldb/Target/Language.h" +#include "lldb/Utility/StreamString.h" + +using namespace lldb; +using namespace lldb_private; + +namespace lldb_private { +llvm::StringRef RegisterNameHelpTextCallback() { + return "Register names can be specified using the architecture specific " + "names. " + "They can also be specified using generic names. Not all generic " + "entities have " + "registers backing them on all architectures. When they don't the " + "generic name " + "will return an error.\n" + "The generic names defined in lldb are:\n" + "\n" + "pc - program counter register\n" + "ra - return address register\n" + "fp - frame pointer register\n" + "sp - stack pointer register\n" + "flags - the flags register\n" + "arg{1-6} - integer argument passing registers.\n"; +} + +llvm::StringRef BreakpointIDHelpTextCallback() { + return "Breakpoints are identified using major and minor numbers; the major " + "number corresponds to the single entity that was created with a " + "'breakpoint " + "set' command; the minor numbers correspond to all the locations that " + "were " + "actually found/set based on the major breakpoint. A full breakpoint " + "ID might " + "look like 3.14, meaning the 14th location set for the 3rd " + "breakpoint. You " + "can specify all the locations of a breakpoint by just indicating the " + "major " + "breakpoint number. A valid breakpoint ID consists either of just the " + "major " + "number, or the major number followed by a dot and the location " + "number (e.g. " + "3 or 3.2 could both be valid breakpoint IDs.)"; +} + +llvm::StringRef BreakpointIDRangeHelpTextCallback() { + return "A 'breakpoint ID list' is a manner of specifying multiple " + "breakpoints. " + "This can be done through several mechanisms. The easiest way is to " + "just " + "enter a space-separated list of breakpoint IDs. To specify all the " + "breakpoint locations under a major breakpoint, you can use the major " + "breakpoint number followed by '.*', eg. '5.*' means all the " + "locations under " + "breakpoint 5. You can also indicate a range of breakpoints by using " + "<start-bp-id> - <end-bp-id>. The start-bp-id and end-bp-id for a " + "range can " + "be any valid breakpoint IDs. It is not legal, however, to specify a " + "range " + "using specific locations that cross major breakpoint numbers. I.e. " + "3.2 - 3.7" + " is legal; 2 - 5 is legal; but 3.2 - 4.4 is not legal."; +} + +llvm::StringRef BreakpointNameHelpTextCallback() { + return "A name that can be added to a breakpoint when it is created, or " + "later " + "on with the \"breakpoint name add\" command. " + "Breakpoint names can be used to specify breakpoints in all the " + "places breakpoint IDs " + "and breakpoint ID ranges can be used. As such they provide a " + "convenient way to group breakpoints, " + "and to operate on breakpoints you create without having to track the " + "breakpoint number. " + "Note, the attributes you set when using a breakpoint name in a " + "breakpoint command don't " + "adhere to the name, but instead are set individually on all the " + "breakpoints currently tagged with that " + "name. Future breakpoints " + "tagged with that name will not pick up the attributes previously " + "given using that name. " + "In order to distinguish breakpoint names from breakpoint IDs and " + "ranges, " + "names must start with a letter from a-z or A-Z and cannot contain " + "spaces, \".\" or \"-\". " + "Also, breakpoint names can only be applied to breakpoints, not to " + "breakpoint locations."; +} + +llvm::StringRef GDBFormatHelpTextCallback() { + return "A GDB format consists of a repeat count, a format letter and a size " + "letter. " + "The repeat count is optional and defaults to 1. The format letter is " + "optional " + "and defaults to the previous format that was used. The size letter " + "is optional " + "and defaults to the previous size that was used.\n" + "\n" + "Format letters include:\n" + "o - octal\n" + "x - hexadecimal\n" + "d - decimal\n" + "u - unsigned decimal\n" + "t - binary\n" + "f - float\n" + "a - address\n" + "i - instruction\n" + "c - char\n" + "s - string\n" + "T - OSType\n" + "A - float as hex\n" + "\n" + "Size letters include:\n" + "b - 1 byte (byte)\n" + "h - 2 bytes (halfword)\n" + "w - 4 bytes (word)\n" + "g - 8 bytes (giant)\n" + "\n" + "Example formats:\n" + "32xb - show 32 1 byte hexadecimal integer values\n" + "16xh - show 16 2 byte hexadecimal integer values\n" + "64 - show 64 2 byte hexadecimal integer values (format and size " + "from the last format)\n" + "dw - show 1 4 byte decimal integer value\n"; +} + +llvm::StringRef FormatHelpTextCallback() { + static std::string help_text; + + if (!help_text.empty()) + return help_text; + + StreamString sstr; + sstr << "One of the format names (or one-character names) that can be used " + "to show a variable's value:\n"; + for (Format f = eFormatDefault; f < kNumFormats; f = Format(f + 1)) { + if (f != eFormatDefault) + sstr.PutChar('\n'); + + char format_char = FormatManager::GetFormatAsFormatChar(f); + if (format_char) + sstr.Printf("'%c' or ", format_char); + + sstr.Printf("\"%s\"", FormatManager::GetFormatAsCString(f)); + } + + sstr.Flush(); + + help_text = std::string(sstr.GetString()); + + return help_text; +} + +llvm::StringRef LanguageTypeHelpTextCallback() { + static std::string help_text; + + if (!help_text.empty()) + return help_text; + + StreamString sstr; + sstr << "One of the following languages:\n"; + + Language::PrintAllLanguages(sstr, " ", "\n"); + + sstr.Flush(); + + help_text = std::string(sstr.GetString()); + + return help_text; +} + +llvm::StringRef SummaryStringHelpTextCallback() { + return "A summary string is a way to extract information from variables in " + "order to present them using a summary.\n" + "Summary strings contain static text, variables, scopes and control " + "sequences:\n" + " - Static text can be any sequence of non-special characters, i.e. " + "anything but '{', '}', '$', or '\\'.\n" + " - Variables are sequences of characters beginning with ${, ending " + "with } and that contain symbols in the format described below.\n" + " - Scopes are any sequence of text between { and }. Anything " + "included in a scope will only appear in the output summary if there " + "were no errors.\n" + " - Control sequences are the usual C/C++ '\\a', '\\n', ..., plus " + "'\\$', '\\{' and '\\}'.\n" + "A summary string works by copying static text verbatim, turning " + "control sequences into their character counterpart, expanding " + "variables and trying to expand scopes.\n" + "A variable is expanded by giving it a value other than its textual " + "representation, and the way this is done depends on what comes after " + "the ${ marker.\n" + "The most common sequence if ${var followed by an expression path, " + "which is the text one would type to access a member of an aggregate " + "types, given a variable of that type" + " (e.g. if type T has a member named x, which has a member named y, " + "and if t is of type T, the expression path would be .x.y and the way " + "to fit that into a summary string would be" + " ${var.x.y}). You can also use ${*var followed by an expression path " + "and in that case the object referred by the path will be " + "dereferenced before being displayed." + " If the object is not a pointer, doing so will cause an error. For " + "additional details on expression paths, you can type 'help " + "expr-path'. \n" + "By default, summary strings attempt to display the summary for any " + "variable they reference, and if that fails the value. If neither can " + "be shown, nothing is displayed." + "In a summary string, you can also use an array index [n], or a " + "slice-like range [n-m]. This can have two different meanings " + "depending on what kind of object the expression" + " path refers to:\n" + " - if it is a scalar type (any basic type like int, float, ...) the " + "expression is a bitfield, i.e. the bits indicated by the indexing " + "operator are extracted out of the number" + " and displayed as an individual variable\n" + " - if it is an array or pointer the array items indicated by the " + "indexing operator are shown as the result of the variable. if the " + "expression is an array, real array items are" + " printed; if it is a pointer, the pointer-as-array syntax is used to " + "obtain the values (this means, the latter case can have no range " + "checking)\n" + "If you are trying to display an array for which the size is known, " + "you can also use [] instead of giving an exact range. This has the " + "effect of showing items 0 thru size - 1.\n" + "Additionally, a variable can contain an (optional) format code, as " + "in ${var.x.y%code}, where code can be any of the valid formats " + "described in 'help format', or one of the" + " special symbols only allowed as part of a variable:\n" + " %V: show the value of the object by default\n" + " %S: show the summary of the object by default\n" + " %@: show the runtime-provided object description (for " + "Objective-C, it calls NSPrintForDebugger; for C/C++ it does " + "nothing)\n" + " %L: show the location of the object (memory address or a " + "register name)\n" + " %#: show the number of children of the object\n" + " %T: show the type of the object\n" + "Another variable that you can use in summary strings is ${svar . " + "This sequence works exactly like ${var, including the fact that " + "${*svar is an allowed sequence, but uses" + " the object's synthetic children provider instead of the actual " + "objects. For instance, if you are using STL synthetic children " + "providers, the following summary string would" + " count the number of actual elements stored in an std::list:\n" + "type summary add -s \"${svar%#}\" -x \"std::list<\""; +} + +llvm::StringRef ExprPathHelpTextCallback() { + return "An expression path is the sequence of symbols that is used in C/C++ " + "to access a member variable of an aggregate object (class).\n" + "For instance, given a class:\n" + " class foo {\n" + " int a;\n" + " int b; .\n" + " foo* next;\n" + " };\n" + "the expression to read item b in the item pointed to by next for foo " + "aFoo would be aFoo.next->b.\n" + "Given that aFoo could just be any object of type foo, the string " + "'.next->b' is the expression path, because it can be attached to any " + "foo instance to achieve the effect.\n" + "Expression paths in LLDB include dot (.) and arrow (->) operators, " + "and most commands using expression paths have ways to also accept " + "the star (*) operator.\n" + "The meaning of these operators is the same as the usual one given to " + "them by the C/C++ standards.\n" + "LLDB also has support for indexing ([ ]) in expression paths, and " + "extends the traditional meaning of the square brackets operator to " + "allow bitfield extraction:\n" + "for objects of native types (int, float, char, ...) saying '[n-m]' " + "as an expression path (where n and m are any positive integers, e.g. " + "[3-5]) causes LLDB to extract" + " bits n thru m from the value of the variable. If n == m, [n] is " + "also allowed as a shortcut syntax. For arrays and pointers, " + "expression paths can only contain one index" + " and the meaning of the operation is the same as the one defined by " + "C/C++ (item extraction). Some commands extend bitfield-like syntax " + "for arrays and pointers with the" + " meaning of array slicing (taking elements n thru m inside the array " + "or pointed-to memory)."; +} + +llvm::StringRef arch_helper() { + static StreamString g_archs_help; + if (g_archs_help.Empty()) { + StringList archs; + + ArchSpec::ListSupportedArchNames(archs); + g_archs_help.Printf("These are the supported architecture names:\n"); + archs.Join("\n", g_archs_help); + } + return g_archs_help.GetString(); +} + +template <int I> struct TableValidator : TableValidator<I + 1> { + static_assert( + g_argument_table[I].arg_type == I, + "g_argument_table order doesn't match CommandArgumentType enumeration"); +}; + +template <> struct TableValidator<eArgTypeLastArg> {}; + +TableValidator<0> validator; + +} // namespace lldb_private diff --git a/lldb/source/Commands/CommandOptionsProcessLaunch.cpp b/lldb/source/Commands/CommandOptionsProcessLaunch.cpp index a618796156a6..3c7f4b6c7585 100644 --- a/lldb/source/Commands/CommandOptionsProcessLaunch.cpp +++ b/lldb/source/Commands/CommandOptionsProcessLaunch.cpp @@ -12,6 +12,8 @@ #include "lldb/Host/HostInfo.h" #include "lldb/Host/OptionParser.h" #include "lldb/Interpreter/CommandCompletions.h" +#include "lldb/Interpreter/CommandObject.h" +#include "lldb/Interpreter/CommandOptionArgumentTable.h" #include "lldb/Interpreter/OptionArgParser.h" #include "lldb/Target/ExecutionContext.h" #include "lldb/Target/Platform.h" diff --git a/lldb/source/Commands/Options.td b/lldb/source/Commands/Options.td index 7981917fd8b5..78221db18e65 100644 --- a/lldb/source/Commands/Options.td +++ b/lldb/source/Commands/Options.td @@ -3,7 +3,7 @@ include "OptionsBase.td" let Command = "target modules dump symtab" in { def tm_sort : Option<"sort", "s">, Group<1>, Desc<"Supply a sort order when dumping the symbol table.">, - EnumArg<"SortOrder", "OptionEnumValues(g_sort_option_enumeration)">; + EnumArg<"SortOrder">; def tm_smn : Option<"show-mangled-names", "m">, Group<1>, Desc<"Do not demangle symbol names before showing them.">; } @@ -282,7 +282,7 @@ let Command = "breakpoint command add" in { Arg<"Boolean">, Desc<"Specify whether breakpoint command execution should " "terminate on error.">; def breakpoint_add_script_type : Option<"script-type", "s">, - EnumArg<"None", "ScriptOptionEnum()">, + EnumArg<"ScriptLang">, Desc<"Specify the language for the commands - if none is specified, the " "lldb command interpreter will be used.">; def breakpoint_add_dummy_breakpoints : Option<"dummy-breakpoints", "D">, @@ -370,7 +370,7 @@ let Command = "expression" in { "automatically applied to the expression.">; def expression_options_description_verbosity : Option<"description-verbosity", "v">, Group<1>, - OptionalEnumArg<"DescriptionVerbosity", "DescriptionVerbosityTypes()">, + OptionalEnumArg<"DescriptionVerbosity">, Desc<"How verbose should the output of this expression be, if the object " "description is asked for.">; def expression_options_top_level : Option<"top-level", "p">, Groups<[1,2]>, @@ -437,7 +437,7 @@ let Command = "log enable" in { def log_file : Option<"file", "f">, Group<1>, Arg<"Filename">, Desc<"Set the destination file to log to.">; def log_handler : Option<"log-handler", "h">, Group<1>, - EnumArg<"LogHandler", "LogHandlerType()">, Desc<"Specify a log handler which determines where log messages are written.">; + EnumArg<"LogHandler">, Desc<"Specify a log handler which determines where log messages are written.">; def log_buffer_size : Option<"buffer", "b">, Group<1>, Arg<"UnsignedInteger">, Desc<"Set the log to be buffered, using the specified buffer size, if supported by the log handler.">; def log_verbose : Option<"verbose", "v">, Group<1>, @@ -468,7 +468,7 @@ let Command = "log dump" in { let Command = "reproducer dump" in { def reproducer_provider : Option<"provider", "p">, Group<1>, - EnumArg<"None", "ReproducerProviderType()">, + EnumArg<"ReproducerProvider">, Required, Desc<"The reproducer provider to dump.">; def reproducer_file : Option<"file", "f">, Group<1>, Arg<"Filename">, Desc<"The reproducer path. If a reproducer is replayed and no path is " @@ -483,7 +483,7 @@ let Command = "reproducer verify" in { let Command = "reproducer xcrash" in { def reproducer_signal : Option<"signal", "s">, Group<1>, - EnumArg<"None", "ReproducerSignalType()">, + EnumArg<"ReproducerSignal">, Required, Desc<"The signal to crash the debugger.">; } @@ -781,7 +781,7 @@ let Command = "process status" in { let Command = "process save_core" in { def process_save_core_style : Option<"style", "s">, Group<1>, - EnumArg<"SaveCoreStyle", "SaveCoreStyles()">, Desc<"Request a specific style " + EnumArg<"SaveCoreStyle">, Desc<"Request a specific style " "of corefile to be saved.">; def process_save_core_plugin_name : Option<"plugin-name", "p">, OptionalArg<"Plugin">, Desc<"Specify a plugin name to create the core file." @@ -812,7 +812,7 @@ let Command = "script add" in { def script_add_overwrite : Option<"overwrite", "o">, Groups<[1,2]>, Desc<"Overwrite an existing command at this node.">; def script_add_synchronicity : Option<"synchronicity", "s">, - EnumArg<"ScriptedCommandSynchronicity", "ScriptSynchroType()">, + EnumArg<"ScriptedCommandSynchronicity">, Desc<"Set the synchronicity of this command's executions with regard to " "LLDB event system.">; } @@ -828,7 +828,7 @@ let Command = "container add" in { let Command = "script" in { def script_language : Option<"language", "l">, - EnumArg<"ScriptLang", "ScriptOptionEnum()">, Desc<"Specify the scripting " + EnumArg<"ScriptLang">, Desc<"Specify the scripting " " language. If none is specific the default scripting language is used.">; } @@ -881,7 +881,7 @@ let Command = "source list" in { let Command = "target dependents" in { def dependents_no_dependents : Option<"no-dependents", "d">, Group<1>, - OptionalEnumArg<"Value", "OptionEnumValues(g_dependents_enumeration)">, + OptionalEnumArg<"Value">, Desc<"Whether or not to load dependents when creating a target. If the " "option is not specified, the value is implicitly 'default'. If the " "option is specified but without a value, the value is implicitly " @@ -1054,7 +1054,7 @@ let Command = "thread step scope" in { " block. This is particularly use in conjunction with --step-target to" " step through a complex calling sequence.">; def thread_step_scope_run_mode : Option<"run-mode", "m">, Group<1>, - EnumArg<"RunMode", "TriRunningModes()">, Desc<"Determine how to run other " + EnumArg<"RunMode">, Desc<"Determine how to run other " "threads while stepping the current thread.">; def thread_step_scope_step_over_regexp : Option<"step-over-regexp", "r">, Group<1>, Arg<"RegularExpression">, Desc<"A regular expression that defines " @@ -1070,7 +1070,7 @@ let Command = "thread until" in { def thread_until_thread : Option<"thread", "t">, Group<1>, Arg<"ThreadIndex">, Desc<"Thread index for the thread for until operation">; def thread_until_run_mode : Option<"run-mode", "m">, Group<1>, - EnumArg<"RunMode", "DuoRunningModes()">, Desc<"Determine how to run other " + EnumArg<"RunMode">, Desc<"Determine how to run other " "threads while stepping this one">; def thread_until_address : Option<"address", "a">, Group<1>, Arg<"AddressOrExpression">, Desc<"Run until we reach the specified address, " @@ -1333,7 +1333,7 @@ let Command = "watchpoint command add" in { Arg<"Boolean">, Desc<"Specify whether watchpoint command execution should " "terminate on error.">; def watchpoint_command_add_script_type : Option<"script-type", "s">, - EnumArg<"None", "ScriptOptionEnum()">, Desc<"Specify the language for the" + EnumArg<"ScriptLang">, Desc<"Specify the language for the" " commands - if none is specified, the lldb command interpreter will be " "used.">; def watchpoint_command_add_python_function : Option<"python-function", "F">, diff --git a/lldb/source/Commands/OptionsBase.td b/lldb/source/Commands/OptionsBase.td index f6967f067bf4..9612cc130334 100644 --- a/lldb/source/Commands/OptionsBase.td +++ b/lldb/source/Commands/OptionsBase.td @@ -151,15 +151,13 @@ class Arg<string type> { } // Gives the option an required argument. -class EnumArg<string type, string enum> { +class EnumArg<string type> { string ArgType = type; - string ArgEnum = enum; } // Gives the option an required argument. -class OptionalEnumArg<string type, string enum> { +class OptionalEnumArg<string type> { string ArgType = type; - string ArgEnum = enum; bit OptionalArg = 1; } diff --git a/lldb/source/Core/DumpDataExtractor.cpp b/lldb/source/Core/DumpDataExtractor.cpp index dc96a3454b72..c48250b07d16 100644 --- a/lldb/source/Core/DumpDataExtractor.cpp +++ b/lldb/source/Core/DumpDataExtractor.cpp @@ -119,7 +119,7 @@ static lldb::offset_t DumpAPInt(Stream *s, const DataExtractor &data, bool is_signed, unsigned radix) { llvm::Optional<llvm::APInt> apint = GetAPInt(data, &offset, byte_size); if (apint) { - std::string apint_str = toString(apint.getValue(), radix, is_signed); + std::string apint_str = toString(apint.value(), radix, is_signed); switch (radix) { case 2: s->Write("0b", 2); @@ -672,7 +672,7 @@ lldb::offset_t lldb_private::DumpDataExtractor( llvm::Optional<llvm::APInt> apint = GetAPInt(DE, &offset, semantics_byte_size); if (apint) { - llvm::APFloat apfloat(semantics, apint.getValue()); + llvm::APFloat apfloat(semantics, apint.value()); apfloat.toString(sv, format_precision, format_max_padding); if (!sv.empty()) { s->Printf("%*.*s", (int)sv.size(), (int)sv.size(), sv.data()); diff --git a/lldb/source/Core/ValueObjectChild.cpp b/lldb/source/Core/ValueObjectChild.cpp index d61d2021c318..ec29c029c174 100644 --- a/lldb/source/Core/ValueObjectChild.cpp +++ b/lldb/source/Core/ValueObjectChild.cpp @@ -83,7 +83,7 @@ ConstString ValueObjectChild::GetDisplayTypeName() { LazyBool ValueObjectChild::CanUpdateWithInvalidExecutionContext() { if (m_can_update_with_invalid_exe_ctx) - return m_can_update_with_invalid_exe_ctx.getValue(); + return m_can_update_with_invalid_exe_ctx.value(); if (m_parent) { ValueObject *opinionated_parent = m_parent->FollowParentChain([](ValueObject *valobj) -> bool { @@ -93,11 +93,11 @@ LazyBool ValueObjectChild::CanUpdateWithInvalidExecutionContext() { if (opinionated_parent) return (m_can_update_with_invalid_exe_ctx = opinionated_parent->CanUpdateWithInvalidExecutionContext()) - .getValue(); + .value(); } return (m_can_update_with_invalid_exe_ctx = this->ValueObject::CanUpdateWithInvalidExecutionContext()) - .getValue(); + .value(); } bool ValueObjectChild::UpdateValue() { diff --git a/lldb/source/Expression/DWARFExpression.cpp b/lldb/source/Expression/DWARFExpression.cpp index 9e6b21fc25ea..25f6a46d805b 100644 --- a/lldb/source/Expression/DWARFExpression.cpp +++ b/lldb/source/Expression/DWARFExpression.cpp @@ -1655,11 +1655,16 @@ bool DWARFExpression::Evaluate( case DW_OP_skip: { int16_t skip_offset = (int16_t)opcodes.GetU16(&offset); lldb::offset_t new_offset = offset + skip_offset; - if (opcodes.ValidOffset(new_offset)) + // New offset can point at the end of the data, in this case we should + // terminate the DWARF expression evaluation (will happen in the loop + // condition). + if (new_offset <= opcodes.GetByteSize()) offset = new_offset; else { if (error_ptr) - error_ptr->SetErrorString("Invalid opcode offset in DW_OP_skip."); + error_ptr->SetErrorStringWithFormatv( + "Invalid opcode offset in DW_OP_skip: {0}+({1}) > {2}", offset, + skip_offset, opcodes.GetByteSize()); return false; } } break; @@ -1684,11 +1689,16 @@ bool DWARFExpression::Evaluate( Scalar zero(0); if (tmp.ResolveValue(exe_ctx) != zero) { lldb::offset_t new_offset = offset + bra_offset; - if (opcodes.ValidOffset(new_offset)) + // New offset can point at the end of the data, in this case we should + // terminate the DWARF expression evaluation (will happen in the loop + // condition). + if (new_offset <= opcodes.GetByteSize()) offset = new_offset; else { if (error_ptr) - error_ptr->SetErrorString("Invalid opcode offset in DW_OP_bra."); + error_ptr->SetErrorStringWithFormatv( + "Invalid opcode offset in DW_OP_bra: {0}+({1}) > {2}", offset, + bra_offset, opcodes.GetByteSize()); return false; } } diff --git a/lldb/source/Expression/Materializer.cpp b/lldb/source/Expression/Materializer.cpp index 965a96b7f909..946ae10d69c2 100644 --- a/lldb/source/Expression/Materializer.cpp +++ b/lldb/source/Expression/Materializer.cpp @@ -22,11 +22,20 @@ #include "lldb/Utility/LLDBLog.h" #include "lldb/Utility/Log.h" #include "lldb/Utility/RegisterValue.h" +#include "lldb/lldb-forward.h" #include <memory> using namespace lldb_private; +// FIXME: these should be retrieved from the target +// instead of being hard-coded. Currently we +// assume that persistent vars are materialized +// as references, and thus pick the size of a +// 64-bit pointer. +static constexpr uint32_t g_default_var_alignment = 8; +static constexpr uint32_t g_default_var_byte_size = 8; + uint32_t Materializer::AddStructMember(Entity &entity) { uint32_t size = entity.GetSize(); uint32_t alignment = entity.GetAlignment(); @@ -54,8 +63,8 @@ public: m_delegate(delegate) { // Hard-coding to maximum size of a pointer since persistent variables are // materialized by reference - m_size = 8; - m_alignment = 8; + m_size = g_default_var_byte_size; + m_alignment = g_default_var_alignment; } void MakeAllocation(IRMemoryMap &map, Status &err) { @@ -412,16 +421,19 @@ uint32_t Materializer::AddPersistentVariable( return ret; } -class EntityVariable : public Materializer::Entity { +/// Base class for materialization of Variables and ValueObjects. +/// +/// Subclasses specify how to obtain the Value which is to be +/// materialized. +class EntityVariableBase : public Materializer::Entity { public: - EntityVariable(lldb::VariableSP &variable_sp) - : Entity(), m_variable_sp(variable_sp) { + virtual ~EntityVariableBase() = default; + + EntityVariableBase() { // Hard-coding to maximum size of a pointer since all variables are // materialized by reference - m_size = 8; - m_alignment = 8; - m_is_reference = - m_variable_sp->GetType()->GetForwardCompilerType().IsReferenceType(); + m_size = g_default_var_byte_size; + m_alignment = g_default_var_alignment; } void Materialize(lldb::StackFrameSP &frame_sp, IRMemoryMap &map, @@ -433,7 +445,7 @@ public: LLDB_LOGF(log, "EntityVariable::Materialize [address = 0x%" PRIx64 ", m_variable_sp = %s]", - (uint64_t)load_addr, m_variable_sp->GetName().AsCString()); + (uint64_t)load_addr, GetName().GetCString()); } ExecutionContextScope *scope = frame_sp.get(); @@ -441,13 +453,11 @@ public: if (!scope) scope = map.GetBestExecutionContextScope(); - lldb::ValueObjectSP valobj_sp = - ValueObjectVariable::Create(scope, m_variable_sp); + lldb::ValueObjectSP valobj_sp = SetupValueObject(scope); if (!valobj_sp) { err.SetErrorStringWithFormat( - "couldn't get a value object for variable %s", - m_variable_sp->GetName().AsCString()); + "couldn't get a value object for variable %s", GetName().AsCString()); return; } @@ -455,7 +465,7 @@ public: if (valobj_error.Fail()) { err.SetErrorStringWithFormat("couldn't get the value of variable %s: %s", - m_variable_sp->GetName().AsCString(), + GetName().AsCString(), valobj_error.AsCString()); return; } @@ -468,7 +478,7 @@ public: if (!extract_error.Success()) { err.SetErrorStringWithFormat( "couldn't read contents of reference variable %s: %s", - m_variable_sp->GetName().AsCString(), extract_error.AsCString()); + GetName().AsCString(), extract_error.AsCString()); return; } @@ -481,7 +491,7 @@ public: if (!write_error.Success()) { err.SetErrorStringWithFormat("couldn't write the contents of reference " "variable %s to memory: %s", - m_variable_sp->GetName().AsCString(), + GetName().AsCString(), write_error.AsCString()); return; } @@ -497,7 +507,7 @@ public: if (!write_error.Success()) { err.SetErrorStringWithFormat( "couldn't write the address of variable %s to memory: %s", - m_variable_sp->GetName().AsCString(), write_error.AsCString()); + GetName().AsCString(), write_error.AsCString()); return; } } else { @@ -506,7 +516,7 @@ public: valobj_sp->GetData(data, extract_error); if (!extract_error.Success()) { err.SetErrorStringWithFormat("couldn't get the value of %s: %s", - m_variable_sp->GetName().AsCString(), + GetName().AsCString(), extract_error.AsCString()); return; } @@ -514,32 +524,29 @@ public: if (m_temporary_allocation != LLDB_INVALID_ADDRESS) { err.SetErrorStringWithFormat( "trying to create a temporary region for %s but one exists", - m_variable_sp->GetName().AsCString()); + GetName().AsCString()); return; } - if (data.GetByteSize() < m_variable_sp->GetType()->GetByteSize(scope)) { - if (data.GetByteSize() == 0 && - !m_variable_sp->LocationExpressionList().IsValid()) { + if (data.GetByteSize() < GetByteSize(scope)) { + if (data.GetByteSize() == 0 && !LocationExpressionIsValid()) { err.SetErrorStringWithFormat("the variable '%s' has no location, " "it may have been optimized out", - m_variable_sp->GetName().AsCString()); + GetName().AsCString()); } else { err.SetErrorStringWithFormat( "size of variable %s (%" PRIu64 ") is larger than the ValueObject's size (%" PRIu64 ")", - m_variable_sp->GetName().AsCString(), - m_variable_sp->GetType()->GetByteSize(scope).value_or(0), + GetName().AsCString(), GetByteSize(scope).value_or(0), data.GetByteSize()); } return; } - llvm::Optional<size_t> opt_bit_align = - m_variable_sp->GetType()->GetLayoutCompilerType().GetTypeBitAlign(scope); + llvm::Optional<size_t> opt_bit_align = GetTypeBitAlign(scope); if (!opt_bit_align) { err.SetErrorStringWithFormat("can't get the type alignment for %s", - m_variable_sp->GetName().AsCString()); + GetName().AsCString()); return; } @@ -561,7 +568,7 @@ public: if (!alloc_error.Success()) { err.SetErrorStringWithFormat( "couldn't allocate a temporary region for %s: %s", - m_variable_sp->GetName().AsCString(), alloc_error.AsCString()); + GetName().AsCString(), alloc_error.AsCString()); return; } @@ -573,7 +580,7 @@ public: if (!write_error.Success()) { err.SetErrorStringWithFormat( "couldn't write to the temporary region for %s: %s", - m_variable_sp->GetName().AsCString(), write_error.AsCString()); + GetName().AsCString(), write_error.AsCString()); return; } @@ -585,8 +592,7 @@ public: if (!pointer_write_error.Success()) { err.SetErrorStringWithFormat( "couldn't write the address of the temporary region for %s: %s", - m_variable_sp->GetName().AsCString(), - pointer_write_error.AsCString()); + GetName().AsCString(), pointer_write_error.AsCString()); } } } @@ -602,7 +608,7 @@ public: LLDB_LOGF(log, "EntityVariable::Dematerialize [address = 0x%" PRIx64 ", m_variable_sp = %s]", - (uint64_t)load_addr, m_variable_sp->GetName().AsCString()); + (uint64_t)load_addr, GetName().AsCString()); } if (m_temporary_allocation != LLDB_INVALID_ADDRESS) { @@ -611,13 +617,12 @@ public: if (!scope) scope = map.GetBestExecutionContextScope(); - lldb::ValueObjectSP valobj_sp = - ValueObjectVariable::Create(scope, m_variable_sp); + lldb::ValueObjectSP valobj_sp = SetupValueObject(scope); if (!valobj_sp) { err.SetErrorStringWithFormat( "couldn't get a value object for variable %s", - m_variable_sp->GetName().AsCString()); + GetName().AsCString()); return; } @@ -630,7 +635,7 @@ public: if (!extract_error.Success()) { err.SetErrorStringWithFormat("couldn't get the data for variable %s", - m_variable_sp->GetName().AsCString()); + GetName().AsCString()); return; } @@ -652,7 +657,7 @@ public: if (!set_error.Success()) { err.SetErrorStringWithFormat( "couldn't write the new contents of %s back into the variable", - m_variable_sp->GetName().AsCString()); + GetName().AsCString()); return; } } @@ -664,7 +669,7 @@ public: if (!free_error.Success()) { err.SetErrorStringWithFormat( "couldn't free the temporary region for %s: %s", - m_variable_sp->GetName().AsCString(), free_error.AsCString()); + GetName().AsCString(), free_error.AsCString()); return; } @@ -748,13 +753,140 @@ public: } private: - lldb::VariableSP m_variable_sp; + virtual ConstString GetName() const = 0; + + /// Creates and returns ValueObject tied to this variable + /// and prepares Entity for materialization. + /// + /// Called each time the Materializer (de)materializes a + /// variable. We re-create the ValueObject based on the + /// current ExecutionContextScope since clients such as + /// conditional breakpoints may materialize the same + /// EntityVariable multiple times with different frames. + /// + /// Each subsequent use of the EntityVariableBase interface + /// will query the newly created ValueObject until this + /// function is called again. + virtual lldb::ValueObjectSP + SetupValueObject(ExecutionContextScope *scope) = 0; + + /// Returns size in bytes of the type associated with this variable + /// + /// \returns On success, returns byte size of the type associated + /// with this variable. Returns NoneType otherwise. + virtual llvm::Optional<uint64_t> + GetByteSize(ExecutionContextScope *scope) const = 0; + + /// Returns 'true' if the location expression associated with this variable + /// is valid. + virtual bool LocationExpressionIsValid() const = 0; + + /// Returns alignment of the type associated with this variable in bits. + /// + /// \returns On success, returns alignment in bits for the type associated + /// with this variable. Returns NoneType otherwise. + virtual llvm::Optional<size_t> + GetTypeBitAlign(ExecutionContextScope *scope) const = 0; + +protected: bool m_is_reference = false; lldb::addr_t m_temporary_allocation = LLDB_INVALID_ADDRESS; size_t m_temporary_allocation_size = 0; lldb::DataBufferSP m_original_data; }; +/// Represents an Entity constructed from a VariableSP. +/// +/// This class is used for materialization of variables for which +/// the user has a VariableSP on hand. The ValueObject is then +/// derived from the associated DWARF location expression when needed +/// by the Materializer. +class EntityVariable : public EntityVariableBase { +public: + EntityVariable(lldb::VariableSP &variable_sp) : m_variable_sp(variable_sp) { + m_is_reference = + m_variable_sp->GetType()->GetForwardCompilerType().IsReferenceType(); + } + + ConstString GetName() const override { return m_variable_sp->GetName(); } + + lldb::ValueObjectSP SetupValueObject(ExecutionContextScope *scope) override { + assert(m_variable_sp != nullptr); + return ValueObjectVariable::Create(scope, m_variable_sp); + } + + llvm::Optional<uint64_t> + GetByteSize(ExecutionContextScope *scope) const override { + return m_variable_sp->GetType()->GetByteSize(scope); + } + + bool LocationExpressionIsValid() const override { + return m_variable_sp->LocationExpressionList().IsValid(); + } + + llvm::Optional<size_t> + GetTypeBitAlign(ExecutionContextScope *scope) const override { + return m_variable_sp->GetType()->GetLayoutCompilerType().GetTypeBitAlign( + scope); + } + +private: + lldb::VariableSP m_variable_sp; ///< Variable that this entity is based on. +}; + +/// Represents an Entity constructed from a VariableSP. +/// +/// This class is used for materialization of variables for +/// which the user does not have a VariableSP available (e.g., +/// when materializing ivars). +class EntityValueObject : public EntityVariableBase { +public: + EntityValueObject(ConstString name, ValueObjectProviderTy provider) + : m_name(name), m_valobj_provider(std::move(provider)) { + assert(m_valobj_provider); + } + + ConstString GetName() const override { return m_name; } + + lldb::ValueObjectSP SetupValueObject(ExecutionContextScope *scope) override { + m_valobj_sp = + m_valobj_provider(GetName(), scope->CalculateStackFrame().get()); + + if (m_valobj_sp) + m_is_reference = m_valobj_sp->GetCompilerType().IsReferenceType(); + + return m_valobj_sp; + } + + llvm::Optional<uint64_t> + GetByteSize(ExecutionContextScope *scope) const override { + if (m_valobj_sp) + return m_valobj_sp->GetCompilerType().GetByteSize(scope); + + return {}; + } + + bool LocationExpressionIsValid() const override { + if (m_valobj_sp) + return m_valobj_sp->GetError().Success(); + + return false; + } + + llvm::Optional<size_t> + GetTypeBitAlign(ExecutionContextScope *scope) const override { + if (m_valobj_sp) + return m_valobj_sp->GetCompilerType().GetTypeBitAlign(scope); + + return {}; + } + +private: + ConstString m_name; + lldb::ValueObjectSP m_valobj_sp; + ValueObjectProviderTy m_valobj_provider; +}; + uint32_t Materializer::AddVariable(lldb::VariableSP &variable_sp, Status &err) { EntityVector::iterator iter = m_entities.insert(m_entities.end(), EntityUP()); *iter = std::make_unique<EntityVariable>(variable_sp); @@ -763,6 +895,17 @@ uint32_t Materializer::AddVariable(lldb::VariableSP &variable_sp, Status &err) { return ret; } +uint32_t Materializer::AddValueObject(ConstString name, + ValueObjectProviderTy valobj_provider, + Status &err) { + assert(valobj_provider); + EntityVector::iterator iter = m_entities.insert(m_entities.end(), EntityUP()); + *iter = std::make_unique<EntityValueObject>(name, std::move(valobj_provider)); + uint32_t ret = AddStructMember(**iter); + (*iter)->SetOffset(ret); + return ret; +} + class EntityResultVariable : public Materializer::Entity { public: EntityResultVariable(const CompilerType &type, bool is_program_reference, @@ -772,8 +915,8 @@ public: m_keep_in_memory(keep_in_memory), m_delegate(delegate) { // Hard-coding to maximum size of a pointer since all results are // materialized by reference - m_size = 8; - m_alignment = 8; + m_size = g_default_var_byte_size; + m_alignment = g_default_var_alignment; } void Materialize(lldb::StackFrameSP &frame_sp, IRMemoryMap &map, @@ -1050,8 +1193,8 @@ class EntitySymbol : public Materializer::Entity { public: EntitySymbol(const Symbol &symbol) : Entity(), m_symbol(symbol) { // Hard-coding to maximum size of a symbol - m_size = 8; - m_alignment = 8; + m_size = g_default_var_byte_size; + m_alignment = g_default_var_alignment; } void Materialize(lldb::StackFrameSP &frame_sp, IRMemoryMap &map, diff --git a/lldb/source/Expression/UserExpression.cpp b/lldb/source/Expression/UserExpression.cpp index f821603f03e5..186e414e6879 100644 --- a/lldb/source/Expression/UserExpression.cpp +++ b/lldb/source/Expression/UserExpression.cpp @@ -98,28 +98,34 @@ bool UserExpression::MatchesContext(ExecutionContext &exe_ctx) { return LockAndCheckContext(exe_ctx, target_sp, process_sp, frame_sp); } -lldb::addr_t UserExpression::GetObjectPointer(lldb::StackFrameSP frame_sp, - ConstString &object_name, - Status &err) { +lldb::ValueObjectSP UserExpression::GetObjectPointerValueObject( + lldb::StackFrameSP frame_sp, ConstString const &object_name, Status &err) { err.Clear(); if (!frame_sp) { err.SetErrorStringWithFormat( "Couldn't load '%s' because the context is incomplete", object_name.AsCString()); - return LLDB_INVALID_ADDRESS; + return {}; } lldb::VariableSP var_sp; lldb::ValueObjectSP valobj_sp; - valobj_sp = frame_sp->GetValueForVariableExpressionPath( + return frame_sp->GetValueForVariableExpressionPath( object_name.GetStringRef(), lldb::eNoDynamicValues, StackFrame::eExpressionPathOptionCheckPtrVsMember | StackFrame::eExpressionPathOptionsNoFragileObjcIvar | StackFrame::eExpressionPathOptionsNoSyntheticChildren | StackFrame::eExpressionPathOptionsNoSyntheticArrayRange, var_sp, err); +} + +lldb::addr_t UserExpression::GetObjectPointer(lldb::StackFrameSP frame_sp, + ConstString &object_name, + Status &err) { + auto valobj_sp = + GetObjectPointerValueObject(std::move(frame_sp), object_name, err); if (!err.Success() || !valobj_sp.get()) return LLDB_INVALID_ADDRESS; diff --git a/lldb/source/Host/common/File.cpp b/lldb/source/Host/common/File.cpp index 760fb98fb496..ce5283a61375 100644 --- a/lldb/source/Host/common/File.cpp +++ b/lldb/source/Host/common/File.cpp @@ -833,22 +833,20 @@ SerialPort::Create(int fd, OpenOptions options, Options serial_options, if (llvm::Error error = term.SetRaw()) return std::move(error); if (serial_options.BaudRate) { - if (llvm::Error error = - term.SetBaudRate(serial_options.BaudRate.getValue())) + if (llvm::Error error = term.SetBaudRate(serial_options.BaudRate.value())) return std::move(error); } if (serial_options.Parity) { - if (llvm::Error error = term.SetParity(serial_options.Parity.getValue())) + if (llvm::Error error = term.SetParity(serial_options.Parity.value())) return std::move(error); } if (serial_options.ParityCheck) { if (llvm::Error error = - term.SetParityCheck(serial_options.ParityCheck.getValue())) + term.SetParityCheck(serial_options.ParityCheck.value())) return std::move(error); } if (serial_options.StopBits) { - if (llvm::Error error = - term.SetStopBits(serial_options.StopBits.getValue())) + if (llvm::Error error = term.SetStopBits(serial_options.StopBits.value())) return std::move(error); } diff --git a/lldb/source/Host/common/Terminal.cpp b/lldb/source/Host/common/Terminal.cpp index 831e9dff4eb1..5da5ee8f1468 100644 --- a/lldb/source/Host/common/Terminal.cpp +++ b/lldb/source/Host/common/Terminal.cpp @@ -281,11 +281,11 @@ llvm::Error Terminal::SetBaudRate(unsigned int baud_rate) { return llvm::createStringError(llvm::inconvertibleErrorCode(), "baud rate %d unsupported by the platform", baud_rate); - if (::cfsetispeed(&fd_termios, val.getValue()) != 0) + if (::cfsetispeed(&fd_termios, val.value()) != 0) return llvm::createStringError( std::error_code(errno, std::generic_category()), "setting input baud rate failed"); - if (::cfsetospeed(&fd_termios, val.getValue()) != 0) + if (::cfsetospeed(&fd_termios, val.value()) != 0) return llvm::createStringError( std::error_code(errno, std::generic_category()), "setting output baud rate failed"); diff --git a/lldb/source/Interpreter/CommandObject.cpp b/lldb/source/Interpreter/CommandObject.cpp index 910d740625e9..719cfbc9e802 100644 --- a/lldb/source/Interpreter/CommandObject.cpp +++ b/lldb/source/Interpreter/CommandObject.cpp @@ -16,6 +16,7 @@ #include <cstdlib> #include "lldb/Core/Address.h" +#include "lldb/Interpreter/CommandOptionArgumentTable.h" #include "lldb/Interpreter/Options.h" #include "lldb/Utility/ArchSpec.h" #include "llvm/ADT/ScopeExit.h" @@ -31,6 +32,7 @@ #include "lldb/Target/Language.h" #include "lldb/Interpreter/CommandInterpreter.h" +#include "lldb/Interpreter/CommandOptionArgumentTable.h" #include "lldb/Interpreter/CommandReturnObject.h" using namespace lldb; @@ -365,19 +367,16 @@ CommandObject::GetArgumentEntryAtIndex(int idx) { const CommandObject::ArgumentTableEntry * CommandObject::FindArgumentDataByType(CommandArgumentType arg_type) { - const ArgumentTableEntry *table = CommandObject::GetArgumentTable(); - for (int i = 0; i < eArgTypeLastArg; ++i) - if (table[i].arg_type == arg_type) - return &(table[i]); + if (g_argument_table[i].arg_type == arg_type) + return &(g_argument_table[i]); return nullptr; } void CommandObject::GetArgumentHelp(Stream &str, CommandArgumentType arg_type, CommandInterpreter &interpreter) { - const ArgumentTableEntry *table = CommandObject::GetArgumentTable(); - const ArgumentTableEntry *entry = &(table[arg_type]); + const ArgumentTableEntry *entry = &(g_argument_table[arg_type]); // The table is *supposed* to be kept in arg_type order, but someone *could* // have messed it up... @@ -400,14 +399,32 @@ void CommandObject::GetArgumentHelp(Stream &str, CommandArgumentType arg_type, interpreter.OutputHelpText(str, name_str.GetString(), "--", help_text, name_str.GetSize()); } - } else + } else { interpreter.OutputFormattedHelpText(str, name_str.GetString(), "--", entry->help_text, name_str.GetSize()); + + // Print enum values and their description if any. + OptionEnumValues enum_values = g_argument_table[arg_type].enum_values; + if (!enum_values.empty()) { + str.EOL(); + size_t longest = 0; + for (const OptionEnumValueElement &element : enum_values) + longest = + std::max(longest, llvm::StringRef(element.string_value).size()); + str.IndentMore(5); + for (const OptionEnumValueElement &element : enum_values) { + str.Indent(); + interpreter.OutputHelpText(str, element.string_value, ":", + element.usage, longest); + } + str.IndentLess(5); + str.EOL(); + } + } } const char *CommandObject::GetArgumentName(CommandArgumentType arg_type) { - const ArgumentTableEntry *entry = - &(CommandObject::GetArgumentTable()[arg_type]); + const ArgumentTableEntry *entry = &(g_argument_table[arg_type]); // The table is *supposed* to be kept in arg_type order, but someone *could* // have messed it up... @@ -544,287 +561,13 @@ CommandObject::LookupArgumentName(llvm::StringRef arg_name) { arg_name = arg_name.ltrim('<').rtrim('>'); - const ArgumentTableEntry *table = GetArgumentTable(); for (int i = 0; i < eArgTypeLastArg; ++i) - if (arg_name == table[i].arg_name) - return_type = GetArgumentTable()[i].arg_type; + if (arg_name == g_argument_table[i].arg_name) + return_type = g_argument_table[i].arg_type; return return_type; } -static llvm::StringRef RegisterNameHelpTextCallback() { - return "Register names can be specified using the architecture specific " - "names. " - "They can also be specified using generic names. Not all generic " - "entities have " - "registers backing them on all architectures. When they don't the " - "generic name " - "will return an error.\n" - "The generic names defined in lldb are:\n" - "\n" - "pc - program counter register\n" - "ra - return address register\n" - "fp - frame pointer register\n" - "sp - stack pointer register\n" - "flags - the flags register\n" - "arg{1-6} - integer argument passing registers.\n"; -} - -static llvm::StringRef BreakpointIDHelpTextCallback() { - return "Breakpoints are identified using major and minor numbers; the major " - "number corresponds to the single entity that was created with a " - "'breakpoint " - "set' command; the minor numbers correspond to all the locations that " - "were " - "actually found/set based on the major breakpoint. A full breakpoint " - "ID might " - "look like 3.14, meaning the 14th location set for the 3rd " - "breakpoint. You " - "can specify all the locations of a breakpoint by just indicating the " - "major " - "breakpoint number. A valid breakpoint ID consists either of just the " - "major " - "number, or the major number followed by a dot and the location " - "number (e.g. " - "3 or 3.2 could both be valid breakpoint IDs.)"; -} - -static llvm::StringRef BreakpointIDRangeHelpTextCallback() { - return "A 'breakpoint ID list' is a manner of specifying multiple " - "breakpoints. " - "This can be done through several mechanisms. The easiest way is to " - "just " - "enter a space-separated list of breakpoint IDs. To specify all the " - "breakpoint locations under a major breakpoint, you can use the major " - "breakpoint number followed by '.*', eg. '5.*' means all the " - "locations under " - "breakpoint 5. You can also indicate a range of breakpoints by using " - "<start-bp-id> - <end-bp-id>. The start-bp-id and end-bp-id for a " - "range can " - "be any valid breakpoint IDs. It is not legal, however, to specify a " - "range " - "using specific locations that cross major breakpoint numbers. I.e. " - "3.2 - 3.7" - " is legal; 2 - 5 is legal; but 3.2 - 4.4 is not legal."; -} - -static llvm::StringRef BreakpointNameHelpTextCallback() { - return "A name that can be added to a breakpoint when it is created, or " - "later " - "on with the \"breakpoint name add\" command. " - "Breakpoint names can be used to specify breakpoints in all the " - "places breakpoint IDs " - "and breakpoint ID ranges can be used. As such they provide a " - "convenient way to group breakpoints, " - "and to operate on breakpoints you create without having to track the " - "breakpoint number. " - "Note, the attributes you set when using a breakpoint name in a " - "breakpoint command don't " - "adhere to the name, but instead are set individually on all the " - "breakpoints currently tagged with that " - "name. Future breakpoints " - "tagged with that name will not pick up the attributes previously " - "given using that name. " - "In order to distinguish breakpoint names from breakpoint IDs and " - "ranges, " - "names must start with a letter from a-z or A-Z and cannot contain " - "spaces, \".\" or \"-\". " - "Also, breakpoint names can only be applied to breakpoints, not to " - "breakpoint locations."; -} - -static llvm::StringRef GDBFormatHelpTextCallback() { - return "A GDB format consists of a repeat count, a format letter and a size " - "letter. " - "The repeat count is optional and defaults to 1. The format letter is " - "optional " - "and defaults to the previous format that was used. The size letter " - "is optional " - "and defaults to the previous size that was used.\n" - "\n" - "Format letters include:\n" - "o - octal\n" - "x - hexadecimal\n" - "d - decimal\n" - "u - unsigned decimal\n" - "t - binary\n" - "f - float\n" - "a - address\n" - "i - instruction\n" - "c - char\n" - "s - string\n" - "T - OSType\n" - "A - float as hex\n" - "\n" - "Size letters include:\n" - "b - 1 byte (byte)\n" - "h - 2 bytes (halfword)\n" - "w - 4 bytes (word)\n" - "g - 8 bytes (giant)\n" - "\n" - "Example formats:\n" - "32xb - show 32 1 byte hexadecimal integer values\n" - "16xh - show 16 2 byte hexadecimal integer values\n" - "64 - show 64 2 byte hexadecimal integer values (format and size " - "from the last format)\n" - "dw - show 1 4 byte decimal integer value\n"; -} - -static llvm::StringRef FormatHelpTextCallback() { - static std::string help_text; - - if (!help_text.empty()) - return help_text; - - StreamString sstr; - sstr << "One of the format names (or one-character names) that can be used " - "to show a variable's value:\n"; - for (Format f = eFormatDefault; f < kNumFormats; f = Format(f + 1)) { - if (f != eFormatDefault) - sstr.PutChar('\n'); - - char format_char = FormatManager::GetFormatAsFormatChar(f); - if (format_char) - sstr.Printf("'%c' or ", format_char); - - sstr.Printf("\"%s\"", FormatManager::GetFormatAsCString(f)); - } - - sstr.Flush(); - - help_text = std::string(sstr.GetString()); - - return help_text; -} - -static llvm::StringRef LanguageTypeHelpTextCallback() { - static std::string help_text; - - if (!help_text.empty()) - return help_text; - - StreamString sstr; - sstr << "One of the following languages:\n"; - - Language::PrintAllLanguages(sstr, " ", "\n"); - - sstr.Flush(); - - help_text = std::string(sstr.GetString()); - - return help_text; -} - -static llvm::StringRef SummaryStringHelpTextCallback() { - return "A summary string is a way to extract information from variables in " - "order to present them using a summary.\n" - "Summary strings contain static text, variables, scopes and control " - "sequences:\n" - " - Static text can be any sequence of non-special characters, i.e. " - "anything but '{', '}', '$', or '\\'.\n" - " - Variables are sequences of characters beginning with ${, ending " - "with } and that contain symbols in the format described below.\n" - " - Scopes are any sequence of text between { and }. Anything " - "included in a scope will only appear in the output summary if there " - "were no errors.\n" - " - Control sequences are the usual C/C++ '\\a', '\\n', ..., plus " - "'\\$', '\\{' and '\\}'.\n" - "A summary string works by copying static text verbatim, turning " - "control sequences into their character counterpart, expanding " - "variables and trying to expand scopes.\n" - "A variable is expanded by giving it a value other than its textual " - "representation, and the way this is done depends on what comes after " - "the ${ marker.\n" - "The most common sequence if ${var followed by an expression path, " - "which is the text one would type to access a member of an aggregate " - "types, given a variable of that type" - " (e.g. if type T has a member named x, which has a member named y, " - "and if t is of type T, the expression path would be .x.y and the way " - "to fit that into a summary string would be" - " ${var.x.y}). You can also use ${*var followed by an expression path " - "and in that case the object referred by the path will be " - "dereferenced before being displayed." - " If the object is not a pointer, doing so will cause an error. For " - "additional details on expression paths, you can type 'help " - "expr-path'. \n" - "By default, summary strings attempt to display the summary for any " - "variable they reference, and if that fails the value. If neither can " - "be shown, nothing is displayed." - "In a summary string, you can also use an array index [n], or a " - "slice-like range [n-m]. This can have two different meanings " - "depending on what kind of object the expression" - " path refers to:\n" - " - if it is a scalar type (any basic type like int, float, ...) the " - "expression is a bitfield, i.e. the bits indicated by the indexing " - "operator are extracted out of the number" - " and displayed as an individual variable\n" - " - if it is an array or pointer the array items indicated by the " - "indexing operator are shown as the result of the variable. if the " - "expression is an array, real array items are" - " printed; if it is a pointer, the pointer-as-array syntax is used to " - "obtain the values (this means, the latter case can have no range " - "checking)\n" - "If you are trying to display an array for which the size is known, " - "you can also use [] instead of giving an exact range. This has the " - "effect of showing items 0 thru size - 1.\n" - "Additionally, a variable can contain an (optional) format code, as " - "in ${var.x.y%code}, where code can be any of the valid formats " - "described in 'help format', or one of the" - " special symbols only allowed as part of a variable:\n" - " %V: show the value of the object by default\n" - " %S: show the summary of the object by default\n" - " %@: show the runtime-provided object description (for " - "Objective-C, it calls NSPrintForDebugger; for C/C++ it does " - "nothing)\n" - " %L: show the location of the object (memory address or a " - "register name)\n" - " %#: show the number of children of the object\n" - " %T: show the type of the object\n" - "Another variable that you can use in summary strings is ${svar . " - "This sequence works exactly like ${var, including the fact that " - "${*svar is an allowed sequence, but uses" - " the object's synthetic children provider instead of the actual " - "objects. For instance, if you are using STL synthetic children " - "providers, the following summary string would" - " count the number of actual elements stored in an std::list:\n" - "type summary add -s \"${svar%#}\" -x \"std::list<\""; -} - -static llvm::StringRef ExprPathHelpTextCallback() { - return "An expression path is the sequence of symbols that is used in C/C++ " - "to access a member variable of an aggregate object (class).\n" - "For instance, given a class:\n" - " class foo {\n" - " int a;\n" - " int b; .\n" - " foo* next;\n" - " };\n" - "the expression to read item b in the item pointed to by next for foo " - "aFoo would be aFoo.next->b.\n" - "Given that aFoo could just be any object of type foo, the string " - "'.next->b' is the expression path, because it can be attached to any " - "foo instance to achieve the effect.\n" - "Expression paths in LLDB include dot (.) and arrow (->) operators, " - "and most commands using expression paths have ways to also accept " - "the star (*) operator.\n" - "The meaning of these operators is the same as the usual one given to " - "them by the C/C++ standards.\n" - "LLDB also has support for indexing ([ ]) in expression paths, and " - "extends the traditional meaning of the square brackets operator to " - "allow bitfield extraction:\n" - "for objects of native types (int, float, char, ...) saying '[n-m]' " - "as an expression path (where n and m are any positive integers, e.g. " - "[3-5]) causes LLDB to extract" - " bits n thru m from the value of the variable. If n == m, [n] is " - "also allowed as a shortcut syntax. For arrays and pointers, " - "expression paths can only contain one index" - " and the meaning of the operation is the same as the one defined by " - "C/C++ (item extraction). Some commands extend bitfield-like syntax " - "for arrays and pointers with the" - " meaning of array slicing (taking elements n thru m inside the array " - "or pointed-to memory)."; -} - void CommandObject::FormatLongHelpText(Stream &output_strm, llvm::StringRef long_help) { CommandInterpreter &interpreter = GetCommandInterpreter(); @@ -924,14 +667,14 @@ const char *CommandObject::GetArgumentTypeAsCString( const lldb::CommandArgumentType arg_type) { assert(arg_type < eArgTypeLastArg && "Invalid argument type passed to GetArgumentTypeAsCString"); - return GetArgumentTable()[arg_type].arg_name; + return g_argument_table[arg_type].arg_name; } const char *CommandObject::GetArgumentDescriptionAsCString( const lldb::CommandArgumentType arg_type) { assert(arg_type < eArgTypeLastArg && "Invalid argument type passed to GetArgumentDescriptionAsCString"); - return GetArgumentTable()[arg_type].help_text; + return g_argument_table[arg_type].help_text; } Target &CommandObject::GetDummyTarget() { @@ -1028,124 +771,3 @@ bool CommandObjectRaw::Execute(const char *args_string, } return handled; } - -static llvm::StringRef arch_helper() { - static StreamString g_archs_help; - if (g_archs_help.Empty()) { - StringList archs; - - ArchSpec::ListSupportedArchNames(archs); - g_archs_help.Printf("These are the supported architecture names:\n"); - archs.Join("\n", g_archs_help); - } - return g_archs_help.GetString(); -} - -static constexpr CommandObject::ArgumentTableEntry g_arguments_data[] = { - // clang-format off - { eArgTypeAddress, "address", CommandCompletions::eNoCompletion, { nullptr, false }, "A valid address in the target program's execution space." }, - { eArgTypeAddressOrExpression, "address-expression", CommandCompletions::eNoCompletion, { nullptr, false }, "An expression that resolves to an address." }, - { eArgTypeAliasName, "alias-name", CommandCompletions::eNoCompletion, { nullptr, false }, "The name of an abbreviation (alias) for a debugger command." }, - { eArgTypeAliasOptions, "options-for-aliased-command", CommandCompletions::eNoCompletion, { nullptr, false }, "Command options to be used as part of an alias (abbreviation) definition. (See 'help commands alias' for more information.)" }, - { eArgTypeArchitecture, "arch", CommandCompletions::eArchitectureCompletion, { arch_helper, true }, "The architecture name, e.g. i386 or x86_64." }, - { eArgTypeBoolean, "boolean", CommandCompletions::eNoCompletion, { nullptr, false }, "A Boolean value: 'true' or 'false'" }, - { eArgTypeBreakpointID, "breakpt-id", CommandCompletions::eNoCompletion, { BreakpointIDHelpTextCallback, false }, nullptr }, - { eArgTypeBreakpointIDRange, "breakpt-id-list", CommandCompletions::eNoCompletion, { BreakpointIDRangeHelpTextCallback, false }, nullptr }, - { eArgTypeBreakpointName, "breakpoint-name", CommandCompletions::eBreakpointNameCompletion, { BreakpointNameHelpTextCallback, false }, nullptr }, - { eArgTypeByteSize, "byte-size", CommandCompletions::eNoCompletion, { nullptr, false }, "Number of bytes to use." }, - { eArgTypeClassName, "class-name", CommandCompletions::eNoCompletion, { nullptr, false }, "Then name of a class from the debug information in the program." }, - { eArgTypeCommandName, "cmd-name", CommandCompletions::eNoCompletion, { nullptr, false }, "A debugger command (may be multiple words), without any options or arguments." }, - { eArgTypeCount, "count", CommandCompletions::eNoCompletion, { nullptr, false }, "An unsigned integer." }, - { eArgTypeDirectoryName, "directory", CommandCompletions::eDiskDirectoryCompletion, { nullptr, false }, "A directory name." }, - { eArgTypeDisassemblyFlavor, "disassembly-flavor", CommandCompletions::eDisassemblyFlavorCompletion, { nullptr, false }, "A disassembly flavor recognized by your disassembly plugin. Currently the only valid options are \"att\" and \"intel\" for Intel targets" }, - { eArgTypeDescriptionVerbosity, "description-verbosity", CommandCompletions::eNoCompletion, { nullptr, false }, "How verbose the output of 'po' should be." }, - { eArgTypeEndAddress, "end-address", CommandCompletions::eNoCompletion, { nullptr, false }, "Help text goes here." }, - { eArgTypeExpression, "expr", CommandCompletions::eNoCompletion, { nullptr, false }, "Help text goes here." }, - { eArgTypeExpressionPath, "expr-path", CommandCompletions::eNoCompletion, { ExprPathHelpTextCallback, true }, nullptr }, - { eArgTypeExprFormat, "expression-format", CommandCompletions::eNoCompletion, { nullptr, false }, "[ [bool|b] | [bin] | [char|c] | [oct|o] | [dec|i|d|u] | [hex|x] | [float|f] | [cstr|s] ]" }, - { eArgTypeFilename, "filename", CommandCompletions::eDiskFileCompletion, { nullptr, false }, "The name of a file (can include path)." }, - { eArgTypeFormat, "format", CommandCompletions::eNoCompletion, { FormatHelpTextCallback, true }, nullptr }, - { eArgTypeFrameIndex, "frame-index", CommandCompletions::eFrameIndexCompletion, { nullptr, false }, "Index into a thread's list of frames." }, - { eArgTypeFullName, "fullname", CommandCompletions::eNoCompletion, { nullptr, false }, "Help text goes here." }, - { eArgTypeFunctionName, "function-name", CommandCompletions::eNoCompletion, { nullptr, false }, "The name of a function." }, - { eArgTypeFunctionOrSymbol, "function-or-symbol", CommandCompletions::eNoCompletion, { nullptr, false }, "The name of a function or symbol." }, - { eArgTypeGDBFormat, "gdb-format", CommandCompletions::eNoCompletion, { GDBFormatHelpTextCallback, true }, nullptr }, - { eArgTypeHelpText, "help-text", CommandCompletions::eNoCompletion, { nullptr, false }, "Text to be used as help for some other entity in LLDB" }, - { eArgTypeIndex, "index", CommandCompletions::eNoCompletion, { nullptr, false }, "An index into a list." }, - { eArgTypeLanguage, "source-language", CommandCompletions::eTypeLanguageCompletion, { LanguageTypeHelpTextCallback, true }, nullptr }, - { eArgTypeLineNum, "linenum", CommandCompletions::eNoCompletion, { nullptr, false }, "Line number in a source file." }, - { eArgTypeFileLineColumn, "linespec", CommandCompletions::eNoCompletion, { nullptr, false }, "A source specifier in the form file:line[:column]" }, - { eArgTypeLogCategory, "log-category", CommandCompletions::eNoCompletion, { nullptr, false }, "The name of a category within a log channel, e.g. all (try \"log list\" to see a list of all channels and their categories." }, - { eArgTypeLogChannel, "log-channel", CommandCompletions::eNoCompletion, { nullptr, false }, "The name of a log channel, e.g. process.gdb-remote (try \"log list\" to see a list of all channels and their categories)." }, - { eArgTypeMethod, "method", CommandCompletions::eNoCompletion, { nullptr, false }, "A C++ method name." }, - { eArgTypeName, "name", CommandCompletions::eTypeCategoryNameCompletion, { nullptr, false }, "Help text goes here." }, - { eArgTypeNewPathPrefix, "new-path-prefix", CommandCompletions::eNoCompletion, { nullptr, false }, "Help text goes here." }, - { eArgTypeNumLines, "num-lines", CommandCompletions::eNoCompletion, { nullptr, false }, "The number of lines to use." }, - { eArgTypeNumberPerLine, "number-per-line", CommandCompletions::eNoCompletion, { nullptr, false }, "The number of items per line to display." }, - { eArgTypeOffset, "offset", CommandCompletions::eNoCompletion, { nullptr, false }, "Help text goes here." }, - { eArgTypeOldPathPrefix, "old-path-prefix", CommandCompletions::eNoCompletion, { nullptr, false }, "Help text goes here." }, - { eArgTypeOneLiner, "one-line-command", CommandCompletions::eNoCompletion, { nullptr, false }, "A command that is entered as a single line of text." }, - { eArgTypePath, "path", CommandCompletions::eDiskFileCompletion, { nullptr, false }, "Path." }, - { eArgTypePermissionsNumber, "perms-numeric", CommandCompletions::eNoCompletion, { nullptr, false }, "Permissions given as an octal number (e.g. 755)." }, - { eArgTypePermissionsString, "perms=string", CommandCompletions::eNoCompletion, { nullptr, false }, "Permissions given as a string value (e.g. rw-r-xr--)." }, - { eArgTypePid, "pid", CommandCompletions::eProcessIDCompletion, { nullptr, false }, "The process ID number." }, - { eArgTypePlugin, "plugin", CommandCompletions::eProcessPluginCompletion, { nullptr, false }, "Help text goes here." }, - { eArgTypeProcessName, "process-name", CommandCompletions::eProcessNameCompletion, { nullptr, false }, "The name of the process." }, - { eArgTypePythonClass, "python-class", CommandCompletions::eNoCompletion, { nullptr, false }, "The name of a Python class." }, - { eArgTypePythonFunction, "python-function", CommandCompletions::eNoCompletion, { nullptr, false }, "The name of a Python function." }, - { eArgTypePythonScript, "python-script", CommandCompletions::eNoCompletion, { nullptr, false }, "Source code written in Python." }, - { eArgTypeQueueName, "queue-name", CommandCompletions::eNoCompletion, { nullptr, false }, "The name of the thread queue." }, - { eArgTypeRegisterName, "register-name", CommandCompletions::eNoCompletion, { RegisterNameHelpTextCallback, true }, nullptr }, - { eArgTypeRegularExpression, "regular-expression", CommandCompletions::eNoCompletion, { nullptr, false }, "A POSIX-compliant extended regular expression." }, - { eArgTypeRunArgs, "run-args", CommandCompletions::eNoCompletion, { nullptr, false }, "Arguments to be passed to the target program when it starts executing." }, - { eArgTypeRunMode, "run-mode", CommandCompletions::eNoCompletion, { nullptr, false }, "Help text goes here." }, - { eArgTypeScriptedCommandSynchronicity, "script-cmd-synchronicity", CommandCompletions::eNoCompletion, { nullptr, false }, "The synchronicity to use to run scripted commands with regard to LLDB event system." }, - { eArgTypeScriptLang, "script-language", CommandCompletions::eNoCompletion, { nullptr, false }, "The scripting language to be used for script-based commands. Supported languages are python and lua." }, - { eArgTypeSearchWord, "search-word", CommandCompletions::eNoCompletion, { nullptr, false }, "Any word of interest for search purposes." }, - { eArgTypeSelector, "selector", CommandCompletions::eNoCompletion, { nullptr, false }, "An Objective-C selector name." }, - { eArgTypeSettingIndex, "setting-index", CommandCompletions::eNoCompletion, { nullptr, false }, "An index into a settings variable that is an array (try 'settings list' to see all the possible settings variables and their types)." }, - { eArgTypeSettingKey, "setting-key", CommandCompletions::eNoCompletion, { nullptr, false }, "A key into a settings variables that is a dictionary (try 'settings list' to see all the possible settings variables and their types)." }, - { eArgTypeSettingPrefix, "setting-prefix", CommandCompletions::eNoCompletion, { nullptr, false }, "The name of a settable internal debugger variable up to a dot ('.'), e.g. 'target.process.'" }, - { eArgTypeSettingVariableName, "setting-variable-name", CommandCompletions::eNoCompletion, { nullptr, false }, "The name of a settable internal debugger variable. Type 'settings list' to see a complete list of such variables." }, - { eArgTypeShlibName, "shlib-name", CommandCompletions::eNoCompletion, { nullptr, false }, "The name of a shared library." }, - { eArgTypeSourceFile, "source-file", CommandCompletions::eSourceFileCompletion, { nullptr, false }, "The name of a source file.." }, - { eArgTypeSortOrder, "sort-order", CommandCompletions::eNoCompletion, { nullptr, false }, "Specify a sort order when dumping lists." }, - { eArgTypeStartAddress, "start-address", CommandCompletions::eNoCompletion, { nullptr, false }, "Help text goes here." }, - { eArgTypeSummaryString, "summary-string", CommandCompletions::eNoCompletion, { SummaryStringHelpTextCallback, true }, nullptr }, - { eArgTypeSymbol, "symbol", CommandCompletions::eSymbolCompletion, { nullptr, false }, "Any symbol name (function name, variable, argument, etc.)" }, - { eArgTypeThreadID, "thread-id", CommandCompletions::eNoCompletion, { nullptr, false }, "Thread ID number." }, - { eArgTypeThreadIndex, "thread-index", CommandCompletions::eNoCompletion, { nullptr, false }, "Index into the process' list of threads." }, - { eArgTypeThreadName, "thread-name", CommandCompletions::eNoCompletion, { nullptr, false }, "The thread's name." }, - { eArgTypeTypeName, "type-name", CommandCompletions::eNoCompletion, { nullptr, false }, "A type name." }, - { eArgTypeUnsignedInteger, "unsigned-integer", CommandCompletions::eNoCompletion, { nullptr, false }, "An unsigned integer." }, - { eArgTypeUnixSignal, "unix-signal", CommandCompletions::eNoCompletion, { nullptr, false }, "A valid Unix signal name or number (e.g. SIGKILL, KILL or 9)." }, - { eArgTypeVarName, "variable-name", CommandCompletions::eNoCompletion, { nullptr, false }, "The name of a variable in your program." }, - { eArgTypeValue, "value", CommandCompletions::eNoCompletion, { nullptr, false }, "A value could be anything, depending on where and how it is used." }, - { eArgTypeWidth, "width", CommandCompletions::eNoCompletion, { nullptr, false }, "Help text goes here." }, - { eArgTypeNone, "none", CommandCompletions::eNoCompletion, { nullptr, false }, "No help available for this." }, - { eArgTypePlatform, "platform-name", CommandCompletions::ePlatformPluginCompletion, { nullptr, false }, "The name of an installed platform plug-in . Type 'platform list' to see a complete list of installed platforms." }, - { eArgTypeWatchpointID, "watchpt-id", CommandCompletions::eNoCompletion, { nullptr, false }, "Watchpoint IDs are positive integers." }, - { eArgTypeWatchpointIDRange, "watchpt-id-list", CommandCompletions::eNoCompletion, { nullptr, false }, "For example, '1-3' or '1 to 3'." }, - { eArgTypeWatchType, "watch-type", CommandCompletions::eNoCompletion, { nullptr, false }, "Specify the type for a watchpoint." }, - { eArgRawInput, "raw-input", CommandCompletions::eNoCompletion, { nullptr, false }, "Free-form text passed to a command without prior interpretation, allowing spaces without requiring quotes. To pass arguments and free form text put two dashes ' -- ' between the last argument and any raw input." }, - { eArgTypeCommand, "command", CommandCompletions::eNoCompletion, { nullptr, false }, "An LLDB Command line command element." }, - { eArgTypeColumnNum, "column", CommandCompletions::eNoCompletion, { nullptr, false }, "Column number in a source file." }, - { eArgTypeModuleUUID, "module-uuid", CommandCompletions::eModuleUUIDCompletion, { nullptr, false }, "A module UUID value." }, - { eArgTypeSaveCoreStyle, "corefile-style", CommandCompletions::eNoCompletion, { nullptr, false }, "The type of corefile that lldb will try to create, dependant on this target's capabilities." }, - { eArgTypeLogHandler, "log-handler", CommandCompletions::eNoCompletion, { nullptr, false }, "The log handle that will be used to write out log messages." }, - { eArgTypeSEDStylePair, "substitution-pair", CommandCompletions::eNoCompletion, { nullptr, false }, "A sed-style pattern and target pair." }, - { eArgTypeRecognizerID, "frame-recognizer-id", CommandCompletions::eNoCompletion, { nullptr, false }, "The ID for a stack frame recognizer." }, - { eArgTypeConnectURL, "process-connect-url", CommandCompletions::eNoCompletion, { nullptr, false }, "A URL-style specification for a remote connection." }, - { eArgTypeTargetID, "target-id", CommandCompletions::eNoCompletion, { nullptr, false }, "The index ID for an lldb Target." }, - { eArgTypeStopHookID, "stop-hook-id", CommandCompletions::eNoCompletion, { nullptr, false }, "The ID you receive when you create a stop-hook." } - // clang-format on -}; - -static_assert( - (sizeof(g_arguments_data) / sizeof(CommandObject::ArgumentTableEntry)) == - eArgTypeLastArg, - "g_arguments_data out of sync with CommandArgumentType enumeration"); - -const CommandObject::ArgumentTableEntry *CommandObject::GetArgumentTable() { - return g_arguments_data; -} diff --git a/lldb/source/Interpreter/OptionValueArray.cpp b/lldb/source/Interpreter/OptionValueArray.cpp index 4468fe57702e..c202a188fe2a 100644 --- a/lldb/source/Interpreter/OptionValueArray.cpp +++ b/lldb/source/Interpreter/OptionValueArray.cpp @@ -218,7 +218,7 @@ Status OptionValueArray::SetArgs(const Args &args, VarSetOperationType op) { if (num_remove_indexes) { // Sort and then erase in reverse so indexes are always valid if (num_remove_indexes > 1) { - llvm::sort(remove_indexes.begin(), remove_indexes.end()); + llvm::sort(remove_indexes); for (std::vector<int>::const_reverse_iterator pos = remove_indexes.rbegin(), end = remove_indexes.rend(); diff --git a/lldb/source/Interpreter/OptionValueFileSpecList.cpp b/lldb/source/Interpreter/OptionValueFileSpecList.cpp index 6566eee09d73..9b4114e2ceb2 100644 --- a/lldb/source/Interpreter/OptionValueFileSpecList.cpp +++ b/lldb/source/Interpreter/OptionValueFileSpecList.cpp @@ -137,7 +137,7 @@ Status OptionValueFileSpecList::SetValueFromString(llvm::StringRef value, size_t num_remove_indexes = remove_indexes.size(); if (num_remove_indexes) { // Sort and then erase in reverse so indexes are always valid - llvm::sort(remove_indexes.begin(), remove_indexes.end()); + llvm::sort(remove_indexes); for (size_t j = num_remove_indexes - 1; j < num_remove_indexes; ++j) { m_current_value.Remove(j); } diff --git a/lldb/source/Interpreter/OptionValuePathMappings.cpp b/lldb/source/Interpreter/OptionValuePathMappings.cpp index 543b0e1b8ea8..6096f4564629 100644 --- a/lldb/source/Interpreter/OptionValuePathMappings.cpp +++ b/lldb/source/Interpreter/OptionValuePathMappings.cpp @@ -174,7 +174,7 @@ Status OptionValuePathMappings::SetValueFromString(llvm::StringRef value, } // Sort and then erase in reverse so indexes are always valid - llvm::sort(remove_indexes.begin(), remove_indexes.end()); + llvm::sort(remove_indexes); for (auto index : llvm::reverse(remove_indexes)) m_path_mappings.Remove(index, m_notify_changes); NotifyValueChanged(); diff --git a/lldb/source/Plugins/ABI/X86/ABIX86.cpp b/lldb/source/Plugins/ABI/X86/ABIX86.cpp index 2cd653fe2c5e..ee568310d387 100644 --- a/lldb/source/Plugins/ABI/X86/ABIX86.cpp +++ b/lldb/source/Plugins/ABI/X86/ABIX86.cpp @@ -100,8 +100,8 @@ addCombinedRegisters(std::vector<DynamicRegisterInfo::Register> ®s, if (regdata1->subreg_name != regdata2->subreg_name) continue; - uint32_t base_index1 = regdata1->base_index.getValue(); - uint32_t base_index2 = regdata2->base_index.getValue(); + uint32_t base_index1 = regdata1->base_index.value(); + uint32_t base_index2 = regdata2->base_index.value(); if (regs[base_index1].byte_size != base_size || regs[base_index2].byte_size != base_size) continue; diff --git a/lldb/source/Plugins/Disassembler/LLVMC/DisassemblerLLVMC.cpp b/lldb/source/Plugins/Disassembler/LLVMC/DisassemblerLLVMC.cpp index a774d5b61cfe..fb404e985f80 100644 --- a/lldb/source/Plugins/Disassembler/LLVMC/DisassemblerLLVMC.cpp +++ b/lldb/source/Plugins/Disassembler/LLVMC/DisassemblerLLVMC.cpp @@ -1379,13 +1379,13 @@ const char *DisassemblerLLVMC::SymbolLookup(uint64_t value, uint64_t *type_ptr, // then this is a pc-relative address calculation. if (*type_ptr == LLVMDisassembler_ReferenceType_In_ARM64_ADDXri && m_adrp_insn && m_adrp_address == pc - 4 && - (m_adrp_insn.getValue() & 0x1f) == ((value >> 5) & 0x1f)) { + (m_adrp_insn.value() & 0x1f) == ((value >> 5) & 0x1f)) { uint32_t addxri_inst; uint64_t adrp_imm, addxri_imm; // Get immlo and immhi bits, OR them together to get the ADRP imm // value. - adrp_imm = ((m_adrp_insn.getValue() & 0x00ffffe0) >> 3) | - ((m_adrp_insn.getValue() >> 29) & 0x3); + adrp_imm = ((m_adrp_insn.value() & 0x00ffffe0) >> 3) | + ((m_adrp_insn.value() >> 29) & 0x3); // if high bit of immhi after right-shifting set, sign extend if (adrp_imm & (1ULL << 20)) adrp_imm |= ~((1ULL << 21) - 1); diff --git a/lldb/source/Plugins/DynamicLoader/Hexagon-DYLD/HexagonDYLDRendezvous.cpp b/lldb/source/Plugins/DynamicLoader/Hexagon-DYLD/HexagonDYLDRendezvous.cpp index 5e2a866adb22..5dbbd209bd9b 100644 --- a/lldb/source/Plugins/DynamicLoader/Hexagon-DYLD/HexagonDYLDRendezvous.cpp +++ b/lldb/source/Plugins/DynamicLoader/Hexagon-DYLD/HexagonDYLDRendezvous.cpp @@ -49,6 +49,10 @@ HexagonDYLDRendezvous::HexagonDYLDRendezvous(Process *process) : m_process(process), m_rendezvous_addr(LLDB_INVALID_ADDRESS), m_current(), m_previous(), m_soentries(), m_added_soentries(), m_removed_soentries() { m_thread_info.valid = false; + m_thread_info.dtv_offset = 0; + m_thread_info.dtv_slot_size = 0; + m_thread_info.modid_offset = 0; + m_thread_info.tls_offset = 0; // Cache a copy of the executable path if (m_process) { diff --git a/lldb/source/Plugins/ExpressionParser/Clang/ASTResultSynthesizer.cpp b/lldb/source/Plugins/ExpressionParser/Clang/ASTResultSynthesizer.cpp index 7a1ac7870547..799ae29e2841 100644 --- a/lldb/source/Plugins/ExpressionParser/Clang/ASTResultSynthesizer.cpp +++ b/lldb/source/Plugins/ExpressionParser/Clang/ASTResultSynthesizer.cpp @@ -197,6 +197,27 @@ bool ASTResultSynthesizer::SynthesizeObjCMethodResult( return ret; } +/// Returns true if LLDB can take the address of the given lvalue for the sake +/// of capturing the expression result. Returns false if LLDB should instead +/// store the expression result in a result variable. +static bool CanTakeAddressOfLValue(const Expr *lvalue_expr) { + assert(lvalue_expr->getValueKind() == VK_LValue && + "lvalue_expr not a lvalue"); + + QualType qt = lvalue_expr->getType(); + // If the lvalue has const-qualified non-volatile integral or enum type, then + // the underlying value might come from a const static data member as + // described in C++11 [class.static.data]p3. If that's the case, then the + // value might not have an address if the user didn't also define the member + // in a namespace scope. Taking the address would cause that LLDB later fails + // to link the expression, so those lvalues should be stored in a result + // variable. + if (qt->isIntegralOrEnumerationType() && qt.isConstQualified() && + !qt.isVolatileQualified()) + return false; + return true; +} + bool ASTResultSynthesizer::SynthesizeBodyResult(CompoundStmt *Body, DeclContext *DC) { Log *log = GetLog(LLDBLog::Expressions); @@ -265,6 +286,10 @@ bool ASTResultSynthesizer::SynthesizeBodyResult(CompoundStmt *Body, // - During dematerialization, $0 is marked up as a load address with value // equal to the contents of the structure entry. // + // - Note: if we cannot take an address of the resulting Lvalue (e.g. it's + // a static const member without an out-of-class definition), then we + // follow the Rvalue route. + // // For Rvalues // // - In AST result synthesis the expression E is transformed into an @@ -304,7 +329,7 @@ bool ASTResultSynthesizer::SynthesizeBodyResult(CompoundStmt *Body, clang::VarDecl *result_decl = nullptr; - if (is_lvalue) { + if (is_lvalue && CanTakeAddressOfLValue(last_expr)) { IdentifierInfo *result_ptr_id; if (expr_type->isFunctionType()) diff --git a/lldb/source/Plugins/ExpressionParser/Clang/ClangASTSource.cpp b/lldb/source/Plugins/ExpressionParser/Clang/ClangASTSource.cpp index 1bf29efb0bee..f8443d608ac3 100644 --- a/lldb/source/Plugins/ExpressionParser/Clang/ClangASTSource.cpp +++ b/lldb/source/Plugins/ExpressionParser/Clang/ClangASTSource.cpp @@ -1430,10 +1430,9 @@ static bool ImportOffsetMap(llvm::DenseMap<const D *, O> &destination_map, std::vector<PairType> sorted_items; sorted_items.reserve(source_map.size()); sorted_items.assign(source_map.begin(), source_map.end()); - llvm::sort(sorted_items.begin(), sorted_items.end(), - [](const PairType &lhs, const PairType &rhs) { - return lhs.second < rhs.second; - }); + llvm::sort(sorted_items, [](const PairType &lhs, const PairType &rhs) { + return lhs.second < rhs.second; + }); for (const auto &item : sorted_items) { DeclFromUser<D> user_decl(const_cast<D *>(item.first)); diff --git a/lldb/source/Plugins/ExpressionParser/Clang/ClangExpressionDeclMap.cpp b/lldb/source/Plugins/ExpressionParser/Clang/ClangExpressionDeclMap.cpp index 4305a9982343..6ba03dad98d1 100644 --- a/lldb/source/Plugins/ExpressionParser/Clang/ClangExpressionDeclMap.cpp +++ b/lldb/source/Plugins/ExpressionParser/Clang/ClangExpressionDeclMap.cpp @@ -9,10 +9,13 @@ #include "ClangExpressionDeclMap.h" #include "ClangASTSource.h" +#include "ClangExpressionUtil.h" +#include "ClangExpressionVariable.h" #include "ClangModulesDeclVendor.h" #include "ClangPersistentVariables.h" #include "ClangUtil.h" +#include "NameSearchContext.h" #include "Plugins/TypeSystem/Clang/TypeSystemClang.h" #include "lldb/Core/Address.h" #include "lldb/Core/Module.h" @@ -44,6 +47,7 @@ #include "lldb/Utility/Log.h" #include "lldb/Utility/RegisterValue.h" #include "lldb/Utility/Status.h" +#include "lldb/lldb-private-types.h" #include "lldb/lldb-private.h" #include "clang/AST/ASTConsumer.h" #include "clang/AST/ASTContext.h" @@ -62,6 +66,24 @@ using namespace clang; static const char *g_lldb_local_vars_namespace_cstr = "$__lldb_local_vars"; +namespace { +/// A lambda is represented by Clang as an artifical class whose +/// members are the lambda captures. If we capture a 'this' pointer, +/// the artifical class will contain a member variable named 'this'. +/// The function returns a ValueObject for the captured 'this' if such +/// member exists. If no 'this' was captured, return a nullptr. +lldb::ValueObjectSP GetCapturedThisValueObject(StackFrame *frame) { + assert(frame); + + if (auto thisValSP = frame->FindVariable(ConstString("this"))) + if (auto thisThisValSP = + thisValSP->GetChildMemberWithName(ConstString("this"), true)) + return thisThisValSP; + + return nullptr; +} +} // namespace + ClangExpressionDeclMap::ClangExpressionDeclMap( bool keep_result_in_memory, Materializer::PersistentVariableDelegate *result_delegate, @@ -394,6 +416,10 @@ bool ClangExpressionDeclMap::AddValueToStruct(const NamedDecl *decl, else if (parser_vars->m_lldb_var) offset = m_parser_vars->m_materializer->AddVariable( parser_vars->m_lldb_var, err); + else if (parser_vars->m_lldb_valobj_provider) { + offset = m_parser_vars->m_materializer->AddValueObject( + name, parser_vars->m_lldb_valobj_provider, err); + } } if (!err.Success()) @@ -795,6 +821,28 @@ void ClangExpressionDeclMap::LookUpLldbClass(NameSearchContext &context) { TypeSystemClang::DeclContextGetAsCXXMethodDecl(function_decl_ctx); if (method_decl) { + if (auto capturedThis = GetCapturedThisValueObject(frame)) { + // We're inside a lambda and we captured a 'this'. + // Import the outer class's AST instead of the + // (unnamed) lambda structure AST so unqualified + // member lookups are understood by the Clang parser. + // + // If we're in a lambda which didn't capture 'this', + // $__lldb_class will correspond to the lambda closure + // AST and references to captures will resolve like + // regular member varaiable accesses do. + TypeFromUser pointee_type = + capturedThis->GetCompilerType().GetPointeeType(); + + LLDB_LOG(log, + " CEDM::FEVD Adding captured type ({0} for" + " $__lldb_class: {1}", + capturedThis->GetTypeName(), capturedThis->GetName()); + + AddContextClassType(context, pointee_type); + return; + } + clang::CXXRecordDecl *class_decl = method_decl->getParent(); QualType class_qual_type(class_decl->getTypeForDecl(), 0); @@ -1053,6 +1101,30 @@ bool ClangExpressionDeclMap::LookupLocalVariable( context.m_found_variable = true; } } + + // We're in a local_var_lookup but haven't found any local variables + // so far. When performing a variable lookup from within the context of + // a lambda, we count the lambda captures as local variables. Thus, + // see if we captured any variables with the requested 'name'. + if (!variable_found) { + auto find_capture = [](ConstString varname, + StackFrame *frame) -> ValueObjectSP { + if (auto lambda = ClangExpressionUtil::GetLambdaValueObject(frame)) { + if (auto capture = lambda->GetChildMemberWithName(varname, true)) { + return capture; + } + } + + return nullptr; + }; + + if (auto capture = find_capture(name, frame)) { + variable_found = true; + context.m_found_variable = true; + AddOneVariable(context, std::move(capture), std::move(find_capture)); + } + } + return variable_found; } @@ -1493,25 +1565,15 @@ bool ClangExpressionDeclMap::GetVariableValue(VariableSP &var, return true; } -void ClangExpressionDeclMap::AddOneVariable(NameSearchContext &context, - VariableSP var, - ValueObjectSP valobj) { - assert(m_parser_vars.get()); - - Log *log = GetLog(LLDBLog::Expressions); - - TypeFromUser ut; - TypeFromParser pt; - Value var_location; - - if (!GetVariableValue(var, var_location, &ut, &pt)) - return; - +ClangExpressionVariable::ParserVars * +ClangExpressionDeclMap::AddExpressionVariable(NameSearchContext &context, + TypeFromParser const &pt, + ValueObjectSP valobj) { clang::QualType parser_opaque_type = QualType::getFromOpaquePtr(pt.GetOpaqueQualType()); if (parser_opaque_type.isNull()) - return; + return nullptr; if (const clang::Type *parser_type = parser_opaque_type.getTypePtr()) { if (const TagType *tag_type = dyn_cast<TagType>(parser_type)) @@ -1538,16 +1600,89 @@ void ClangExpressionDeclMap::AddOneVariable(NameSearchContext &context, entity->EnableParserVars(GetParserID()); ClangExpressionVariable::ParserVars *parser_vars = entity->GetParserVars(GetParserID()); + parser_vars->m_named_decl = var_decl; - parser_vars->m_llvm_value = nullptr; - parser_vars->m_lldb_value = var_location; - parser_vars->m_lldb_var = var; if (is_reference) entity->m_flags |= ClangExpressionVariable::EVTypeIsReference; + return parser_vars; +} + +void ClangExpressionDeclMap::AddOneVariable( + NameSearchContext &context, ValueObjectSP valobj, + ValueObjectProviderTy valobj_provider) { + assert(m_parser_vars.get()); + assert(valobj); + + Log *log = GetLog(LLDBLog::Expressions); + + Value var_location = valobj->GetValue(); + + TypeFromUser user_type = valobj->GetCompilerType(); + + TypeSystemClang *clang_ast = + llvm::dyn_cast_or_null<TypeSystemClang>(user_type.GetTypeSystem()); + + if (!clang_ast) { + LLDB_LOG(log, "Skipped a definition because it has no Clang AST"); + return; + } + + TypeFromParser parser_type = GuardedCopyType(user_type); + + if (!parser_type) { + LLDB_LOG(log, + "Couldn't copy a variable's type into the parser's AST context"); + + return; + } + + if (var_location.GetContextType() == Value::ContextType::Invalid) + var_location.SetCompilerType(parser_type); + + ClangExpressionVariable::ParserVars *parser_vars = + AddExpressionVariable(context, parser_type, valobj); + + if (!parser_vars) + return; + LLDB_LOG(log, " CEDM::FEVD Found variable {0}, returned\n{1} (original {2})", - decl_name, ClangUtil::DumpDecl(var_decl), ClangUtil::ToString(ut)); + context.m_decl_name, ClangUtil::DumpDecl(parser_vars->m_named_decl), + ClangUtil::ToString(user_type)); + + parser_vars->m_llvm_value = nullptr; + parser_vars->m_lldb_value = std::move(var_location); + parser_vars->m_lldb_valobj_provider = std::move(valobj_provider); +} + +void ClangExpressionDeclMap::AddOneVariable(NameSearchContext &context, + VariableSP var, + ValueObjectSP valobj) { + assert(m_parser_vars.get()); + + Log *log = GetLog(LLDBLog::Expressions); + + TypeFromUser ut; + TypeFromParser pt; + Value var_location; + + if (!GetVariableValue(var, var_location, &ut, &pt)) + return; + + ClangExpressionVariable::ParserVars *parser_vars = + AddExpressionVariable(context, pt, std::move(valobj)); + + if (!parser_vars) + return; + + LLDB_LOG(log, " CEDM::FEVD Found variable {0}, returned\n{1} (original {2})", + context.m_decl_name, ClangUtil::DumpDecl(parser_vars->m_named_decl), + ClangUtil::ToString(ut)); + + parser_vars->m_llvm_value = nullptr; + parser_vars->m_lldb_value = var_location; + parser_vars->m_lldb_var = var; } void ClangExpressionDeclMap::AddOneVariable(NameSearchContext &context, diff --git a/lldb/source/Plugins/ExpressionParser/Clang/ClangExpressionDeclMap.h b/lldb/source/Plugins/ExpressionParser/Clang/ClangExpressionDeclMap.h index f968f859cc72..bf7646ccaedf 100644 --- a/lldb/source/Plugins/ExpressionParser/Clang/ClangExpressionDeclMap.h +++ b/lldb/source/Plugins/ExpressionParser/Clang/ClangExpressionDeclMap.h @@ -531,6 +531,23 @@ private: TypeFromParser *parser_type = nullptr); /// Use the NameSearchContext to generate a Decl for the given LLDB + /// ValueObject, and put it in the list of found entities. + /// + /// Helper function used by the other AddOneVariable APIs. + /// + /// \param[in,out] context + /// The NameSearchContext to use when constructing the Decl. + /// + /// \param[in] pt + /// The CompilerType of the variable we're adding a Decl for. + /// + /// \param[in] var + /// The LLDB ValueObject that needs a Decl. + ClangExpressionVariable::ParserVars * + AddExpressionVariable(NameSearchContext &context, TypeFromParser const &pt, + lldb::ValueObjectSP valobj); + + /// Use the NameSearchContext to generate a Decl for the given LLDB /// Variable, and put it in the Tuple list. /// /// \param[in] context @@ -544,6 +561,20 @@ private: void AddOneVariable(NameSearchContext &context, lldb::VariableSP var, lldb::ValueObjectSP valobj); + /// Use the NameSearchContext to generate a Decl for the given ValueObject + /// and put it in the list of found entities. + /// + /// \param[in,out] context + /// The NameSearchContext to use when constructing the Decl. + /// + /// \param[in] valobj + /// The ValueObject that needs a Decl. + /// + /// \param[in] valobj_provider Callback that fetches a ValueObjectSP + /// from the specified frame + void AddOneVariable(NameSearchContext &context, lldb::ValueObjectSP valobj, + ValueObjectProviderTy valobj_provider); + /// Use the NameSearchContext to generate a Decl for the given persistent /// variable, and put it in the list of found entities. /// diff --git a/lldb/source/Plugins/ExpressionParser/Clang/ClangExpressionSourceCode.cpp b/lldb/source/Plugins/ExpressionParser/Clang/ClangExpressionSourceCode.cpp index 5168f637c443..56c00b35ba11 100644 --- a/lldb/source/Plugins/ExpressionParser/Clang/ClangExpressionSourceCode.cpp +++ b/lldb/source/Plugins/ExpressionParser/Clang/ClangExpressionSourceCode.cpp @@ -8,6 +8,8 @@ #include "ClangExpressionSourceCode.h" +#include "ClangExpressionUtil.h" + #include "clang/Basic/CharInfo.h" #include "clang/Basic/FileManager.h" #include "clang/Basic/SourceManager.h" @@ -27,6 +29,7 @@ #include "lldb/Target/StackFrame.h" #include "lldb/Target/Target.h" #include "lldb/Utility/StreamString.h" +#include "lldb/lldb-forward.h" using namespace lldb_private; @@ -200,6 +203,34 @@ public: return m_tokens.find(token) != m_tokens.end(); } }; + +// If we're evaluating from inside a lambda that captures a 'this' pointer, +// add a "using" declaration to 'stream' for each capture used in the +// expression (tokenized by 'verifier'). +// +// If no 'this' capture exists, generate no using declarations. Instead +// capture lookups will get resolved by the same mechanism as class member +// variable lookup. That's because Clang generates an unnamed structure +// representing the lambda closure whose members are the captured variables. +void AddLambdaCaptureDecls(StreamString &stream, StackFrame *frame, + TokenVerifier const &verifier) { + assert(frame); + + if (auto thisValSP = ClangExpressionUtil::GetLambdaValueObject(frame)) { + uint32_t numChildren = thisValSP->GetNumChildren(); + for (uint32_t i = 0; i < numChildren; ++i) { + auto childVal = thisValSP->GetChildAtIndex(i, true); + ConstString childName(childVal ? childVal->GetName() : ConstString("")); + + if (!childName.IsEmpty() && verifier.hasToken(childName.GetStringRef()) && + childName != "this") { + stream.Printf("using $__lldb_local_vars::%s;\n", + childName.GetCString()); + } + } + } +} + } // namespace TokenVerifier::TokenVerifier(std::string body) { @@ -264,16 +295,24 @@ TokenVerifier::TokenVerifier(std::string body) { } } -void ClangExpressionSourceCode::AddLocalVariableDecls( - const lldb::VariableListSP &var_list_sp, StreamString &stream, - const std::string &expr) const { +void ClangExpressionSourceCode::AddLocalVariableDecls(StreamString &stream, + const std::string &expr, + StackFrame *frame) const { + assert(frame); TokenVerifier tokens(expr); + lldb::VariableListSP var_list_sp = frame->GetInScopeVariableList(false, true); + for (size_t i = 0; i < var_list_sp->GetSize(); i++) { lldb::VariableSP var_sp = var_list_sp->GetVariableAtIndex(i); ConstString var_name = var_sp->GetName(); + if (var_name == "this" && m_wrap_kind == WrapKind::CppMemberFunction) { + AddLambdaCaptureDecls(stream, frame, tokens); + + continue; + } // We can check for .block_descriptor w/o checking for langauge since this // is not a valid identifier in either C or C++. @@ -288,9 +327,6 @@ void ClangExpressionSourceCode::AddLocalVariableDecls( if ((var_name == "self" || var_name == "_cmd") && is_objc) continue; - if (var_name == "this" && m_wrap_kind == WrapKind::CppMemberFunction) - continue; - stream.Printf("using $__lldb_local_vars::%s;\n", var_name.AsCString()); } } @@ -376,10 +412,8 @@ bool ClangExpressionSourceCode::GetText( if (add_locals) if (target->GetInjectLocalVariables(&exe_ctx)) { - lldb::VariableListSP var_list_sp = - frame->GetInScopeVariableList(false, true); - AddLocalVariableDecls(var_list_sp, lldb_local_var_decls, - force_add_all_locals ? "" : m_body); + AddLocalVariableDecls(lldb_local_var_decls, + force_add_all_locals ? "" : m_body, frame); } } diff --git a/lldb/source/Plugins/ExpressionParser/Clang/ClangExpressionSourceCode.h b/lldb/source/Plugins/ExpressionParser/Clang/ClangExpressionSourceCode.h index 54ae837fb30f..f721bb2f319e 100644 --- a/lldb/source/Plugins/ExpressionParser/Clang/ClangExpressionSourceCode.h +++ b/lldb/source/Plugins/ExpressionParser/Clang/ClangExpressionSourceCode.h @@ -78,9 +78,19 @@ protected: Wrapping wrap, WrapKind wrap_kind); private: - void AddLocalVariableDecls(const lldb::VariableListSP &var_list_sp, - StreamString &stream, - const std::string &expr) const; + /// Writes "using" declarations for local variables into the specified stream. + /// + /// Behaviour is undefined if 'frame == nullptr'. + /// + /// \param[out] stream Stream that this function generates "using" + /// declarations into. + /// + /// \param[in] expr Expression source that we're evaluating. + /// + /// \param[in] frame StackFrame which carries information about the local + /// variables that we're generating "using" declarations for. + void AddLocalVariableDecls(StreamString &stream, const std::string &expr, + StackFrame *frame) const; /// String marking the start of the user expression. std::string m_start_marker; diff --git a/lldb/source/Plugins/ExpressionParser/Clang/ClangExpressionUtil.cpp b/lldb/source/Plugins/ExpressionParser/Clang/ClangExpressionUtil.cpp new file mode 100644 index 000000000000..9b490e1c036e --- /dev/null +++ b/lldb/source/Plugins/ExpressionParser/Clang/ClangExpressionUtil.cpp @@ -0,0 +1,27 @@ +//===-- ClangExpressionUtil.cpp -------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "ClangExpressionUtil.h" + +#include "lldb/Core/ValueObject.h" +#include "lldb/Target/StackFrame.h" +#include "lldb/Utility/ConstString.h" + +namespace lldb_private { +namespace ClangExpressionUtil { +lldb::ValueObjectSP GetLambdaValueObject(StackFrame *frame) { + assert(frame); + + if (auto this_val_sp = frame->FindVariable(ConstString("this"))) + if (this_val_sp->GetChildMemberWithName(ConstString("this"), true)) + return this_val_sp; + + return nullptr; +} +} // namespace ClangExpressionUtil +} // namespace lldb_private diff --git a/lldb/source/Plugins/ExpressionParser/Clang/ClangExpressionUtil.h b/lldb/source/Plugins/ExpressionParser/Clang/ClangExpressionUtil.h new file mode 100644 index 000000000000..fb8b857256c0 --- /dev/null +++ b/lldb/source/Plugins/ExpressionParser/Clang/ClangExpressionUtil.h @@ -0,0 +1,30 @@ +//===-- ClangExpressionUtil.h -----------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLDB_SOURCE_PLUGINS_EXPRESSIONPARSER_CLANG_CLANGEXPRESSIONUTIL_H +#define LLDB_SOURCE_PLUGINS_EXPRESSIONPARSER_CLANG_CLANGEXPRESSIONUTIL_H + +#include "lldb/lldb-private.h" + +namespace lldb_private { +namespace ClangExpressionUtil { +/// Returns a ValueObject for the lambda class in the current frame +/// +/// To represent a lambda, Clang generates an artificial class +/// whose members are the captures and whose operator() is the +/// lambda implementation. If we capture a 'this' pointer, +/// the artifical class will contain a member variable named 'this'. +/// +/// This method returns the 'this' pointer to the artificial lambda +/// class if a real 'this' was captured. Otherwise, returns nullptr. +lldb::ValueObjectSP GetLambdaValueObject(StackFrame *frame); + +} // namespace ClangExpressionUtil +} // namespace lldb_private + +#endif // LLDB_SOURCE_PLUGINS_EXPRESSIONPARSER_CLANG_CLANGEXPRESSIONHELPER_H diff --git a/lldb/source/Plugins/ExpressionParser/Clang/ClangExpressionVariable.h b/lldb/source/Plugins/ExpressionParser/Clang/ClangExpressionVariable.h index 7bb68e78373f..c7d9e05269fa 100644 --- a/lldb/source/Plugins/ExpressionParser/Clang/ClangExpressionVariable.h +++ b/lldb/source/Plugins/ExpressionParser/Clang/ClangExpressionVariable.h @@ -116,7 +116,7 @@ public: /// The following values should not live beyond parsing class ParserVars { public: - ParserVars() : m_lldb_value(), m_lldb_var() {} + ParserVars() = default; const clang::NamedDecl *m_named_decl = nullptr; ///< The Decl corresponding to this variable @@ -129,6 +129,12 @@ public: const lldb_private::Symbol *m_lldb_sym = nullptr; ///< The original symbol for this /// variable, if it was a symbol + + /// Callback that provides a ValueObject for the + /// specified frame. Used by the materializer for + /// re-fetching ValueObjects when materializing + /// ivars. + ValueObjectProviderTy m_lldb_valobj_provider; }; private: diff --git a/lldb/source/Plugins/ExpressionParser/Clang/ClangUserExpression.cpp b/lldb/source/Plugins/ExpressionParser/Clang/ClangUserExpression.cpp index 78b8bf11220a..7145e7804e68 100644 --- a/lldb/source/Plugins/ExpressionParser/Clang/ClangUserExpression.cpp +++ b/lldb/source/Plugins/ExpressionParser/Clang/ClangUserExpression.cpp @@ -872,6 +872,34 @@ bool ClangUserExpression::Complete(ExecutionContext &exe_ctx, return true; } +lldb::addr_t ClangUserExpression::GetCppObjectPointer( + lldb::StackFrameSP frame_sp, ConstString &object_name, Status &err) { + auto valobj_sp = + GetObjectPointerValueObject(std::move(frame_sp), object_name, err); + + // We're inside a C++ class method. This could potentially be an unnamed + // lambda structure. If the lambda captured a "this", that should be + // the object pointer. + if (auto thisChildSP = + valobj_sp->GetChildMemberWithName(ConstString("this"), true)) { + valobj_sp = thisChildSP; + } + + if (!err.Success() || !valobj_sp.get()) + return LLDB_INVALID_ADDRESS; + + lldb::addr_t ret = valobj_sp->GetValueAsUnsigned(LLDB_INVALID_ADDRESS); + + if (ret == LLDB_INVALID_ADDRESS) { + err.SetErrorStringWithFormat( + "Couldn't load '%s' because its value couldn't be evaluated", + object_name.AsCString()); + return LLDB_INVALID_ADDRESS; + } + + return ret; +} + bool ClangUserExpression::AddArguments(ExecutionContext &exe_ctx, std::vector<lldb::addr_t> &args, lldb::addr_t struct_address, @@ -906,8 +934,14 @@ bool ClangUserExpression::AddArguments(ExecutionContext &exe_ctx, address_type != eAddressTypeLoad) object_ptr_error.SetErrorString("Can't get context object's " "debuggee address"); - } else - object_ptr = GetObjectPointer(frame_sp, object_name, object_ptr_error); + } else { + if (m_in_cplusplus_method) { + object_ptr = + GetCppObjectPointer(frame_sp, object_name, object_ptr_error); + } else { + object_ptr = GetObjectPointer(frame_sp, object_name, object_ptr_error); + } + } if (!object_ptr_error.Success()) { exe_ctx.GetTargetRef().GetDebugger().GetAsyncOutputStream()->Printf( diff --git a/lldb/source/Plugins/ExpressionParser/Clang/ClangUserExpression.h b/lldb/source/Plugins/ExpressionParser/Clang/ClangUserExpression.h index 30cdd2f3e990..4d5458f1807d 100644 --- a/lldb/source/Plugins/ExpressionParser/Clang/ClangUserExpression.h +++ b/lldb/source/Plugins/ExpressionParser/Clang/ClangUserExpression.h @@ -198,6 +198,10 @@ private: ExecutionContext &exe_ctx, std::vector<std::string> modules_to_import, bool for_completion); + + lldb::addr_t GetCppObjectPointer(lldb::StackFrameSP frame, + ConstString &object_name, Status &err); + /// Defines how the current expression should be wrapped. ClangExpressionSourceCode::WrapKind GetWrapKind() const; bool SetupPersistentState(DiagnosticManager &diagnostic_manager, diff --git a/lldb/source/Plugins/Instruction/ARM/EmulateInstructionARM.h b/lldb/source/Plugins/Instruction/ARM/EmulateInstructionARM.h index 8b167dd347ad..c877724a9d30 100644 --- a/lldb/source/Plugins/Instruction/ARM/EmulateInstructionARM.h +++ b/lldb/source/Plugins/Instruction/ARM/EmulateInstructionARM.h @@ -91,7 +91,8 @@ public: EmulateInstructionARM(const ArchSpec &arch) : EmulateInstruction(arch), m_arm_isa(0), m_opcode_mode(eModeInvalid), - m_opcode_cpsr(0), m_it_session(), m_ignore_conditions(false) { + m_opcode_cpsr(0), m_new_inst_cpsr(0), m_it_session(), + m_ignore_conditions(false) { SetArchitecture(arch); } diff --git a/lldb/source/Plugins/Language/CPlusPlus/CPlusPlusLanguage.cpp b/lldb/source/Plugins/Language/CPlusPlus/CPlusPlusLanguage.cpp index 89bee3e000c0..23ce1654fb83 100644 --- a/lldb/source/Plugins/Language/CPlusPlus/CPlusPlusLanguage.cpp +++ b/lldb/source/Plugins/Language/CPlusPlus/CPlusPlusLanguage.cpp @@ -218,10 +218,10 @@ void CPlusPlusLanguage::MethodName::Parse() { } else { CPlusPlusNameParser parser(m_full.GetStringRef()); if (auto function = parser.ParseAsFunctionDefinition()) { - m_basename = function.getValue().name.basename; - m_context = function.getValue().name.context; - m_arguments = function.getValue().arguments; - m_qualifiers = function.getValue().qualifiers; + m_basename = function.value().name.basename; + m_context = function.value().name.context; + m_arguments = function.value().arguments; + m_qualifiers = function.value().qualifiers; m_parse_error = false; } else { m_parse_error = true; @@ -329,8 +329,8 @@ bool CPlusPlusLanguage::ExtractContextAndIdentifier( CPlusPlusNameParser parser(name); if (auto full_name = parser.ParseAsFullName()) { - identifier = full_name.getValue().basename; - context = full_name.getValue().context; + identifier = full_name.value().basename; + context = full_name.value().context; return true; } return false; @@ -409,12 +409,12 @@ protected: private: /// Input character until which we have constructed the respective output /// already. - const char *Written; + const char *Written = ""; llvm::SmallString<128> Result; /// Whether we have performed any substitutions. - bool Substituted; + bool Substituted = false; const char *currentParserPos() const { return this->First; } @@ -909,7 +909,7 @@ static void LoadLibCxxFormatters(lldb::TypeCategoryImplSP cpp_category_sp) { cpp_category_sp, lldb_private::formatters::LibCxxMapIteratorSyntheticFrontEndCreator, "std::map iterator synthetic children", - ConstString("^std::__[[:alnum:]]+::__map_iterator<.+>$"), stl_synth_flags, + ConstString("^std::__[[:alnum:]]+::__map_(const_)?iterator<.+>$"), stl_synth_flags, true); AddCXXSynthetic( diff --git a/lldb/source/Plugins/Language/CPlusPlus/CPlusPlusNameParser.cpp b/lldb/source/Plugins/Language/CPlusPlus/CPlusPlusNameParser.cpp index eca36fff18f8..a9a4e98775d9 100644 --- a/lldb/source/Plugins/Language/CPlusPlus/CPlusPlusNameParser.cpp +++ b/lldb/source/Plugins/Language/CPlusPlus/CPlusPlusNameParser.cpp @@ -55,8 +55,8 @@ Optional<ParsedName> CPlusPlusNameParser::ParseAsFullName() { if (HasMoreTokens()) return None; ParsedName result; - result.basename = GetTextForRange(name_ranges.getValue().basename_range); - result.context = GetTextForRange(name_ranges.getValue().context_range); + result.basename = GetTextForRange(name_ranges.value().basename_range); + result.context = GetTextForRange(name_ranges.value().context_range); return result; } @@ -125,8 +125,8 @@ CPlusPlusNameParser::ParseFunctionImpl(bool expect_return_type) { size_t end_position = GetCurrentPosition(); ParsedFunction result; - result.name.basename = GetTextForRange(maybe_name.getValue().basename_range); - result.name.context = GetTextForRange(maybe_name.getValue().context_range); + result.name.basename = GetTextForRange(maybe_name.value().basename_range); + result.name.context = GetTextForRange(maybe_name.value().context_range); result.arguments = GetTextForRange(Range(argument_start, qualifiers_start)); result.qualifiers = GetTextForRange(Range(qualifiers_start, end_position)); start_position.Remove(); @@ -617,9 +617,9 @@ CPlusPlusNameParser::ParseFullNameImpl() { ParsedNameRanges result; if (last_coloncolon_position) { result.context_range = Range(start_position.GetSavedPosition(), - last_coloncolon_position.getValue()); + last_coloncolon_position.value()); result.basename_range = - Range(last_coloncolon_position.getValue() + 1, GetCurrentPosition()); + Range(last_coloncolon_position.value() + 1, GetCurrentPosition()); } else { result.basename_range = Range(start_position.GetSavedPosition(), GetCurrentPosition()); diff --git a/lldb/source/Plugins/Language/CPlusPlus/LibCxxList.cpp b/lldb/source/Plugins/Language/CPlusPlus/LibCxxList.cpp index 9f5624de4e63..a1953a1c7a22 100644 --- a/lldb/source/Plugins/Language/CPlusPlus/LibCxxList.cpp +++ b/lldb/source/Plugins/Language/CPlusPlus/LibCxxList.cpp @@ -119,16 +119,16 @@ protected: AbstractListFrontEnd(ValueObject &valobj) : SyntheticChildrenFrontEnd(valobj) {} - size_t m_count; - ValueObject *m_head; + size_t m_count = 0; + ValueObject *m_head = nullptr; static constexpr bool g_use_loop_detect = true; - size_t m_loop_detected; // The number of elements that have had loop detection - // run over them. + size_t m_loop_detected = 0; // The number of elements that have had loop + // detection run over them. ListEntry m_slow_runner; // Used for loop detection ListEntry m_fast_runner; // Used for loop detection - size_t m_list_capping_size; + size_t m_list_capping_size = 0; CompilerType m_element_type; std::map<size_t, ListIterator> m_iterators; diff --git a/lldb/source/Plugins/Language/CPlusPlus/LibCxxMap.cpp b/lldb/source/Plugins/Language/CPlusPlus/LibCxxMap.cpp index 4aae524e3701..ca6f92d003ad 100644 --- a/lldb/source/Plugins/Language/CPlusPlus/LibCxxMap.cpp +++ b/lldb/source/Plugins/Language/CPlusPlus/LibCxxMap.cpp @@ -165,8 +165,8 @@ private: } MapEntry m_entry; - size_t m_max_depth; - bool m_error; + size_t m_max_depth = 0; + bool m_error = false; }; namespace lldb_private { diff --git a/lldb/source/Plugins/Language/ObjC/CFBasicHash.h b/lldb/source/Plugins/Language/ObjC/CFBasicHash.h index fd30f5f7845f..f850c50342a3 100644 --- a/lldb/source/Plugins/Language/ObjC/CFBasicHash.h +++ b/lldb/source/Plugins/Language/ObjC/CFBasicHash.h @@ -68,7 +68,7 @@ private: ExecutionContextRef m_exe_ctx_ref; bool m_mutable = true; bool m_multi = false; - HashType m_type; + HashType m_type = HashType::set; }; } // namespace lldb_private diff --git a/lldb/source/Plugins/Language/ObjC/NSDictionary.cpp b/lldb/source/Plugins/Language/ObjC/NSDictionary.cpp index 2c6998451ad6..e5e62b534560 100644 --- a/lldb/source/Plugins/Language/ObjC/NSDictionary.cpp +++ b/lldb/source/Plugins/Language/ObjC/NSDictionary.cpp @@ -137,7 +137,7 @@ private: lldb::ByteOrder m_order = lldb::eByteOrderInvalid; DataDescriptor_32 *m_data_32 = nullptr; DataDescriptor_64 *m_data_64 = nullptr; - lldb::addr_t m_data_ptr; + lldb::addr_t m_data_ptr = LLDB_INVALID_ADDRESS; CompilerType m_pair_type; std::vector<DictionaryItemDescriptor> m_children; }; diff --git a/lldb/source/Plugins/Language/ObjC/NSError.cpp b/lldb/source/Plugins/Language/ObjC/NSError.cpp index 937c7204d3c4..4f237824c4b0 100644 --- a/lldb/source/Plugins/Language/ObjC/NSError.cpp +++ b/lldb/source/Plugins/Language/ObjC/NSError.cpp @@ -177,7 +177,7 @@ private: // values to leak if the latter, then I need to store a SharedPointer to it - // so that it only goes away when everyone else in the cluster goes away oh // joy! - ValueObject *m_child_ptr; + ValueObject *m_child_ptr = nullptr; ValueObjectSP m_child_sp; }; diff --git a/lldb/source/Plugins/Language/ObjC/NSSet.cpp b/lldb/source/Plugins/Language/ObjC/NSSet.cpp index 0a6b445d97c8..b5c8e849abcc 100644 --- a/lldb/source/Plugins/Language/ObjC/NSSet.cpp +++ b/lldb/source/Plugins/Language/ObjC/NSSet.cpp @@ -76,7 +76,7 @@ private: uint8_t m_ptr_size = 8; DataDescriptor_32 *m_data_32 = nullptr; DataDescriptor_64 *m_data_64 = nullptr; - lldb::addr_t m_data_ptr; + lldb::addr_t m_data_ptr = LLDB_INVALID_ADDRESS; std::vector<SetItemDescriptor> m_children; }; diff --git a/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCClassDescriptorV2.h b/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCClassDescriptorV2.h index 7ba957940ae7..0c6eccab9f27 100644 --- a/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCClassDescriptorV2.h +++ b/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCClassDescriptorV2.h @@ -347,12 +347,12 @@ public: private: ConstString m_name; - uint8_t m_pointer_size; - bool m_valid; - uint64_t m_info_bits; - uint64_t m_value_bits; - int64_t m_value_bits_signed; - uint64_t m_payload; + uint8_t m_pointer_size = 0; + bool m_valid = false; + uint64_t m_info_bits = 0; + uint64_t m_value_bits = 0; + int64_t m_value_bits_signed = 0; + uint64_t m_payload = 0; }; } // namespace lldb_private diff --git a/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCTrampolineHandler.cpp b/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCTrampolineHandler.cpp index f9ccaf0115a2..18e1d12ef980 100644 --- a/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCTrampolineHandler.cpp +++ b/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCTrampolineHandler.cpp @@ -578,7 +578,8 @@ AppleObjCTrampolineHandler::AppleObjCTrampolineHandler( : m_process_wp(), m_objc_module_sp(objc_module_sp), m_impl_fn_addr(LLDB_INVALID_ADDRESS), m_impl_stret_fn_addr(LLDB_INVALID_ADDRESS), - m_msg_forward_addr(LLDB_INVALID_ADDRESS) { + m_msg_forward_addr(LLDB_INVALID_ADDRESS), + m_msg_forward_stret_addr(LLDB_INVALID_ADDRESS) { if (process_sp) m_process_wp = process_sp; // Look up the known resolution functions: @@ -780,10 +781,8 @@ AppleObjCTrampolineHandler::FindDispatchFunction(lldb::addr_t addr) { return nullptr; } -void -AppleObjCTrampolineHandler::ForEachDispatchFunction( - std::function<void(lldb::addr_t, - const DispatchFunction &)> callback) { +void AppleObjCTrampolineHandler::ForEachDispatchFunction( + std::function<void(lldb::addr_t, const DispatchFunction &)> callback) { for (auto elem : m_msgSend_map) { callback(elem.first, g_dispatch_functions[elem.second]); } diff --git a/lldb/source/Plugins/LanguageRuntime/RenderScript/RenderScriptRuntime/RenderScriptRuntime.cpp b/lldb/source/Plugins/LanguageRuntime/RenderScript/RenderScriptRuntime/RenderScriptRuntime.cpp index bc8e43764af6..0c032f8a7c88 100644 --- a/lldb/source/Plugins/LanguageRuntime/RenderScript/RenderScriptRuntime/RenderScriptRuntime.cpp +++ b/lldb/source/Plugins/LanguageRuntime/RenderScript/RenderScriptRuntime/RenderScriptRuntime.cpp @@ -4163,7 +4163,7 @@ public: int m_kernel_types = RSReduceBreakpointResolver::eKernelTypeAll; llvm::StringRef m_reduce_name; RSCoordinate m_coord; - bool m_have_coord; + bool m_have_coord = false; }; Options *GetOptions() override { return &m_options; } @@ -4268,7 +4268,7 @@ public: } RSCoordinate m_coord; - bool m_have_coord; + bool m_have_coord = false; }; bool DoExecute(Args &command, CommandReturnObject &result) override { diff --git a/lldb/source/Plugins/LanguageRuntime/RenderScript/RenderScriptRuntime/RenderScriptRuntime.h b/lldb/source/Plugins/LanguageRuntime/RenderScript/RenderScriptRuntime/RenderScriptRuntime.h index 4ddf996dedb2..bc460706fd29 100644 --- a/lldb/source/Plugins/LanguageRuntime/RenderScript/RenderScriptRuntime/RenderScriptRuntime.h +++ b/lldb/source/Plugins/LanguageRuntime/RenderScript/RenderScriptRuntime/RenderScriptRuntime.h @@ -171,7 +171,8 @@ struct RSReductionDescriptor { llvm::StringRef halter_name = ".") : m_module(module), m_reduce_name(name), m_init_name(init_name), m_accum_name(accum_name), m_comb_name(comb_name), - m_outc_name(outc_name), m_halter_name(halter_name) { + m_outc_name(outc_name), m_halter_name(halter_name), m_accum_sig(0), + m_accum_data_size(0), m_comb_name_generated(false) { // TODO Check whether the combiner is an autogenerated name, and track // this } diff --git a/lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.cpp b/lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.cpp index f9fb36890d5a..122298d87bf8 100644 --- a/lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.cpp +++ b/lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.cpp @@ -1607,7 +1607,7 @@ lldb::user_id_t ObjectFileELF::GetSectionIndexByName(const char *name) { } static SectionType GetSectionTypeFromName(llvm::StringRef Name) { - if (Name.consume_front(".debug_") || Name.consume_front(".zdebug_")) { + if (Name.consume_front(".debug_")) { return llvm::StringSwitch<SectionType>(Name) .Case("abbrev", eSectionTypeDWARFDebugAbbrev) .Case("abbrev.dwo", eSectionTypeDWARFDebugAbbrevDwo) @@ -3365,8 +3365,7 @@ size_t ObjectFileELF::ReadSectionData(Section *section, return section->GetObjectFile()->ReadSectionData(section, section_data); size_t result = ObjectFile::ReadSectionData(section, section_data); - if (result == 0 || !llvm::object::Decompressor::isCompressedELFSection( - section->Get(), section->GetName().GetStringRef())) + if (result == 0 || !(section->Get() & llvm::ELF::SHF_COMPRESSED)) return result; auto Decompressor = llvm::object::Decompressor::create( diff --git a/lldb/source/Plugins/ObjectFile/Minidump/MinidumpFileBuilder.cpp b/lldb/source/Plugins/ObjectFile/Minidump/MinidumpFileBuilder.cpp index 516bcb21b019..acb131b8a775 100644 --- a/lldb/source/Plugins/ObjectFile/Minidump/MinidumpFileBuilder.cpp +++ b/lldb/source/Plugins/ObjectFile/Minidump/MinidumpFileBuilder.cpp @@ -349,6 +349,7 @@ llvm::support::ulittle64_t read_register_u64(RegisterContext *reg_ctx, lldb_private::minidump::MinidumpContext_x86_64 GetThreadContext_64(RegisterContext *reg_ctx) { lldb_private::minidump::MinidumpContext_x86_64 thread_context; + thread_context.p1_home = {}; thread_context.context_flags = static_cast<uint32_t>( lldb_private::minidump::MinidumpContext_x86_64_Flags::x86_64_Flag | lldb_private::minidump::MinidumpContext_x86_64_Flags::Control | diff --git a/lldb/source/Plugins/Process/Utility/RegisterContextPOSIX_x86.cpp b/lldb/source/Plugins/Process/Utility/RegisterContextPOSIX_x86.cpp index 67e03ff1ea39..40f70e8a0a75 100644 --- a/lldb/source/Plugins/Process/Utility/RegisterContextPOSIX_x86.cpp +++ b/lldb/source/Plugins/Process/Utility/RegisterContextPOSIX_x86.cpp @@ -444,6 +444,7 @@ RegisterContextPOSIX_x86::RegisterContextPOSIX_x86( } ::memset(&m_fpr, 0, sizeof(FPR)); + ::memset(&m_ymm_set, 0, sizeof(YMM)); m_fpr_type = eNotValid; } diff --git a/lldb/source/Plugins/Process/Utility/RegisterInfoPOSIX_arm64.cpp b/lldb/source/Plugins/Process/Utility/RegisterInfoPOSIX_arm64.cpp index 579ac6e36d0b..4db7abe603d4 100644 --- a/lldb/source/Plugins/Process/Utility/RegisterInfoPOSIX_arm64.cpp +++ b/lldb/source/Plugins/Process/Utility/RegisterInfoPOSIX_arm64.cpp @@ -396,15 +396,11 @@ bool RegisterInfoPOSIX_arm64::IsSVERegVG(unsigned reg) const { } bool RegisterInfoPOSIX_arm64::IsPAuthReg(unsigned reg) const { - return std::find(pauth_regnum_collection.begin(), - pauth_regnum_collection.end(), - reg) != pauth_regnum_collection.end(); + return llvm::is_contained(pauth_regnum_collection, reg); } bool RegisterInfoPOSIX_arm64::IsMTEReg(unsigned reg) const { - return std::find(m_mte_regnum_collection.begin(), - m_mte_regnum_collection.end(), - reg) != m_mte_regnum_collection.end(); + return llvm::is_contained(m_mte_regnum_collection, reg); } uint32_t RegisterInfoPOSIX_arm64::GetRegNumSVEZ0() const { return sve_z0; } diff --git a/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunication.cpp b/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunication.cpp index e5461c1899ec..755b8220c49e 100644 --- a/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunication.cpp +++ b/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunication.cpp @@ -67,8 +67,8 @@ GDBRemoteCommunication::GDBRemoteCommunication(const char *comm_name, m_packet_timeout(1), #endif m_echo_number(0), m_supports_qEcho(eLazyBoolCalculate), m_history(512), - m_send_acks(true), m_compression_type(CompressionType::None), - m_listen_url() { + m_send_acks(true), m_is_platform(false), + m_compression_type(CompressionType::None), m_listen_url() { } // Destructor @@ -1266,7 +1266,7 @@ GDBRemoteCommunication::ConnectLocally(GDBRemoteCommunication &client, GDBRemoteCommunication::ScopedTimeout::ScopedTimeout( GDBRemoteCommunication &gdb_comm, std::chrono::seconds timeout) - : m_gdb_comm(gdb_comm), m_timeout_modified(false) { + : m_gdb_comm(gdb_comm), m_saved_timeout(0), m_timeout_modified(false) { auto curr_timeout = gdb_comm.GetPacketTimeout(); // Only update the timeout if the timeout is greater than the current // timeout. If the current timeout is larger, then just use that. diff --git a/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationServerLLGS.cpp b/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationServerLLGS.cpp index 63174ef55219..5804c13fe7b6 100644 --- a/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationServerLLGS.cpp +++ b/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationServerLLGS.cpp @@ -246,6 +246,9 @@ void GDBRemoteCommunicationServerLLGS::RegisterPacketHandlers() { StringExtractorGDBRemote::eServerPacketType_QNonStop, &GDBRemoteCommunicationServerLLGS::Handle_QNonStop); RegisterMemberFunctionHandler( + StringExtractorGDBRemote::eServerPacketType_vStdio, + &GDBRemoteCommunicationServerLLGS::Handle_vStdio); + RegisterMemberFunctionHandler( StringExtractorGDBRemote::eServerPacketType_vStopped, &GDBRemoteCommunicationServerLLGS::Handle_vStopped); RegisterMemberFunctionHandler( @@ -290,7 +293,9 @@ Status GDBRemoteCommunicationServerLLGS::LaunchProcess() { if (!process_or) return Status(process_or.takeError()); m_continue_process = m_current_process = process_or->get(); - m_debugged_processes[m_current_process->GetID()] = std::move(*process_or); + m_debugged_processes.emplace( + m_current_process->GetID(), + DebuggedProcess{std::move(*process_or), DebuggedProcess::Flag{}}); } SetEnabledExtensions(*m_current_process); @@ -361,7 +366,9 @@ Status GDBRemoteCommunicationServerLLGS::AttachToProcess(lldb::pid_t pid) { return status; } m_continue_process = m_current_process = process_or->get(); - m_debugged_processes[m_current_process->GetID()] = std::move(*process_or); + m_debugged_processes.emplace( + m_current_process->GetID(), + DebuggedProcess{std::move(*process_or), DebuggedProcess::Flag{}}); SetEnabledExtensions(*m_current_process); // Setup stdout/stderr mapping from inferior. @@ -489,12 +496,14 @@ GDBRemoteCommunicationServerLLGS::SendWResponse( *wait_status); // If the process was killed through vKill, return "OK". - if (m_vkilled_processes.find(process->GetID()) != m_vkilled_processes.end()) + if (bool(m_debugged_processes.at(process->GetID()).flags & + DebuggedProcess::Flag::vkilled)) return SendOKResponse(); StreamGDBRemote response; response.Format("{0:g}", *wait_status); - if (bool(m_extensions_supported & NativeProcessProtocol::Extension::multiprocess)) + if (bool(m_extensions_supported & + NativeProcessProtocol::Extension::multiprocess)) response.Format(";process:{0:x-}", process->GetID()); if (m_non_stop) return SendNotificationPacketNoLock("Stop", m_stop_notification_queue, @@ -1016,9 +1025,11 @@ void GDBRemoteCommunicationServerLLGS::EnqueueStopReplyPackets( return; for (NativeThreadProtocol &listed_thread : m_current_process->Threads()) { - if (listed_thread.GetID() != thread_to_skip) - m_stop_notification_queue.push_back( - PrepareStopReplyPacketForThread(listed_thread).GetString().str()); + if (listed_thread.GetID() != thread_to_skip) { + StreamString stop_reply = PrepareStopReplyPacketForThread(listed_thread); + if (!stop_reply.Empty()) + m_stop_notification_queue.push_back(stop_reply.GetString().str()); + } } } @@ -1045,14 +1056,14 @@ void GDBRemoteCommunicationServerLLGS::HandleInferiorState_Exited( lldb::pid_t pid = process->GetID(); m_mainloop.AddPendingCallback([this, pid](MainLoopBase &loop) { - m_debugged_processes.erase(pid); - auto vkill_it = m_vkilled_processes.find(pid); - if (vkill_it != m_vkilled_processes.end()) - m_vkilled_processes.erase(vkill_it); + auto find_it = m_debugged_processes.find(pid); + assert(find_it != m_debugged_processes.end()); + bool vkilled = bool(find_it->second.flags & DebuggedProcess::Flag::vkilled); + m_debugged_processes.erase(find_it); // Terminate the main loop only if vKill has not been used. // When running in non-stop mode, wait for the vStopped to clear // the notification queue. - else if (m_debugged_processes.empty() && !m_non_stop) { + if (m_debugged_processes.empty() && !m_non_stop && !vkilled) { // Close the pipe to the inferior terminal i/o if we launched it and set // one up. MaybeCloseInferiorTerminalConnection(); @@ -1071,23 +1082,13 @@ void GDBRemoteCommunicationServerLLGS::HandleInferiorState_Stopped( Log *log = GetLog(LLDBLog::Process); LLDB_LOGF(log, "GDBRemoteCommunicationServerLLGS::%s called", __FUNCTION__); - // Send the stop reason unless this is the stop after the launch or attach. - switch (m_inferior_prev_state) { - case eStateLaunching: - case eStateAttaching: - // Don't send anything per debugserver behavior. - break; - default: - // In all other cases, send the stop reason. - PacketResult result = SendStopReasonForState( - *process, StateType::eStateStopped, /*force_synchronous=*/false); - if (result != PacketResult::Success) { - LLDB_LOGF(log, - "GDBRemoteCommunicationServerLLGS::%s failed to send stop " - "notification for PID %" PRIu64 ", state: eStateExited", - __FUNCTION__, process->GetID()); - } - break; + PacketResult result = SendStopReasonForState( + *process, StateType::eStateStopped, /*force_synchronous=*/false); + if (result != PacketResult::Success) { + LLDB_LOGF(log, + "GDBRemoteCommunicationServerLLGS::%s failed to send stop " + "notification for PID %" PRIu64 ", state: eStateExited", + __FUNCTION__, process->GetID()); } } @@ -1112,14 +1113,16 @@ void GDBRemoteCommunicationServerLLGS::ProcessStateChanged( SendProcessOutput(); // Then stop the forwarding, so that any late output (see llvm.org/pr25652) // does not interfere with our protocol. - StopSTDIOForwarding(); + if (!m_non_stop) + StopSTDIOForwarding(); HandleInferiorState_Stopped(process); break; case StateType::eStateExited: // Same as above SendProcessOutput(); - StopSTDIOForwarding(); + if (!m_non_stop) + StopSTDIOForwarding(); HandleInferiorState_Exited(process); break; @@ -1132,9 +1135,6 @@ void GDBRemoteCommunicationServerLLGS::ProcessStateChanged( } break; } - - // Remember the previous state reported to us. - m_inferior_prev_state = state; } void GDBRemoteCommunicationServerLLGS::DidExec(NativeProcessProtocol *process) { @@ -1147,7 +1147,9 @@ void GDBRemoteCommunicationServerLLGS::NewSubprocess( lldb::pid_t child_pid = child_process->GetID(); assert(child_pid != LLDB_INVALID_PROCESS_ID); assert(m_debugged_processes.find(child_pid) == m_debugged_processes.end()); - m_debugged_processes[child_pid] = std::move(child_process); + m_debugged_processes.emplace( + child_pid, + DebuggedProcess{std::move(child_process), DebuggedProcess::Flag{}}); } void GDBRemoteCommunicationServerLLGS::DataAvailableCallback() { @@ -1197,6 +1199,9 @@ GDBRemoteCommunicationServerLLGS::SendONotification(const char *buffer, response.PutChar('O'); response.PutBytesAsRawHex8(buffer, len); + if (m_non_stop) + return SendNotificationPacketNoLock("Stdio", m_stdio_notification_queue, + response.GetString()); return SendPacketNoLock(response.GetString()); } @@ -1422,7 +1427,8 @@ GDBRemoteCommunication::PacketResult GDBRemoteCommunicationServerLLGS::Handle_k(StringExtractorGDBRemote &packet) { Log *log = GetLog(LLDBLog::Process); - StopSTDIOForwarding(); + if (!m_non_stop) + StopSTDIOForwarding(); if (m_debugged_processes.empty()) { LLDB_LOG(log, "No debugged process found."); @@ -1432,7 +1438,7 @@ GDBRemoteCommunicationServerLLGS::Handle_k(StringExtractorGDBRemote &packet) { for (auto it = m_debugged_processes.begin(); it != m_debugged_processes.end(); ++it) { LLDB_LOG(log, "Killing process {0}", it->first); - Status error = it->second->Kill(); + Status error = it->second.process_up->Kill(); if (error.Fail()) LLDB_LOG(log, "Failed to kill debugged process {0}: {1}", it->first, error); @@ -1448,7 +1454,8 @@ GDBRemoteCommunicationServerLLGS::Handle_k(StringExtractorGDBRemote &packet) { GDBRemoteCommunication::PacketResult GDBRemoteCommunicationServerLLGS::Handle_vKill( StringExtractorGDBRemote &packet) { - StopSTDIOForwarding(); + if (!m_non_stop) + StopSTDIOForwarding(); packet.SetFilePos(6); // vKill; uint32_t pid = packet.GetU32(LLDB_INVALID_PROCESS_ID, 16); @@ -1460,12 +1467,12 @@ GDBRemoteCommunicationServerLLGS::Handle_vKill( if (it == m_debugged_processes.end()) return SendErrorResponse(42); - Status error = it->second->Kill(); + Status error = it->second.process_up->Kill(); if (error.Fail()) return SendErrorResponse(error.ToError()); // OK response is sent when the process dies. - m_vkilled_processes.insert(pid); + it->second.flags |= DebuggedProcess::Flag::vkilled; return PacketResult::Success; } @@ -1518,6 +1525,30 @@ GDBRemoteCommunicationServerLLGS::Handle_QListThreadsInStopReply( } GDBRemoteCommunication::PacketResult +GDBRemoteCommunicationServerLLGS::ResumeProcess( + NativeProcessProtocol &process, const ResumeActionList &actions) { + Log *log = GetLog(LLDBLog::Process | LLDBLog::Thread); + + // In non-stop protocol mode, the process could be running already. + // We do not support resuming threads independently, so just error out. + if (!process.CanResume()) { + LLDB_LOG(log, "process {0} cannot be resumed (state={1})", process.GetID(), + process.GetState()); + return SendErrorResponse(0x37); + } + + Status error = process.Resume(actions); + if (error.Fail()) { + LLDB_LOG(log, "process {0} failed to resume: {1}", process.GetID(), error); + return SendErrorResponse(GDBRemoteServerError::eErrorResume); + } + + LLDB_LOG(log, "process {0} resumed", process.GetID()); + + return PacketResult::Success; +} + +GDBRemoteCommunication::PacketResult GDBRemoteCommunicationServerLLGS::Handle_C(StringExtractorGDBRemote &packet) { Log *log = GetLog(LLDBLog::Process | LLDBLog::Thread); LLDB_LOGF(log, "GDBRemoteCommunicationServerLLGS::%s called", __FUNCTION__); @@ -1552,6 +1583,14 @@ GDBRemoteCommunicationServerLLGS::Handle_C(StringExtractorGDBRemote &packet) { packet, "unexpected content after $C{signal-number}"); } + // In non-stop protocol mode, the process could be running already. + // We do not support resuming threads independently, so just error out. + if (!m_continue_process->CanResume()) { + LLDB_LOG(log, "process cannot be resumed (state={0})", + m_continue_process->GetState()); + return SendErrorResponse(0x37); + } + ResumeActionList resume_actions(StateType::eStateRunning, LLDB_INVALID_SIGNAL_NUMBER); Status error; @@ -1585,14 +1624,11 @@ GDBRemoteCommunicationServerLLGS::Handle_C(StringExtractorGDBRemote &packet) { } } - // Resume the threads. - error = m_continue_process->Resume(resume_actions); - if (error.Fail()) { - LLDB_LOG(log, "failed to resume threads for process {0}: {1}", - m_continue_process->GetID(), error); - - return SendErrorResponse(0x38); - } + // NB: this checks CanResume() twice but using a single code path for + // resuming still seems worth it. + PacketResult resume_res = ResumeProcess(*m_continue_process, resume_actions); + if (resume_res != PacketResult::Success) + return resume_res; // Don't send an "OK" packet, except in non-stop mode; // otherwise, the response is the stopped/exited message. @@ -1627,14 +1663,9 @@ GDBRemoteCommunicationServerLLGS::Handle_c(StringExtractorGDBRemote &packet) { ResumeActionList actions(StateType::eStateRunning, LLDB_INVALID_SIGNAL_NUMBER); - Status error = m_continue_process->Resume(actions); - if (error.Fail()) { - LLDB_LOG(log, "c failed for process {0}: {1}", m_continue_process->GetID(), - error); - return SendErrorResponse(GDBRemoteServerError::eErrorResume); - } - - LLDB_LOG(log, "continued process {0}", m_continue_process->GetID()); + PacketResult resume_res = ResumeProcess(*m_continue_process, actions); + if (resume_res != PacketResult::Success) + return resume_res; return SendContinueSuccessResponse(); } @@ -1648,6 +1679,18 @@ GDBRemoteCommunicationServerLLGS::Handle_vCont_actions( return SendPacketNoLock(response.GetString()); } +static bool ResumeActionListStopsAllThreads(ResumeActionList &actions) { + // We're doing a stop-all if and only if our only action is a "t" for all + // threads. + if (const ResumeAction *default_action = + actions.GetActionForThread(LLDB_INVALID_THREAD_ID, false)) { + if (default_action->state == eStateSuspended && actions.GetSize() == 1) + return true; + } + + return false; +} + GDBRemoteCommunication::PacketResult GDBRemoteCommunicationServerLLGS::Handle_vCont( StringExtractorGDBRemote &packet) { @@ -1669,9 +1712,6 @@ GDBRemoteCommunicationServerLLGS::Handle_vCont( // Move past the ';', then do a simple 's'. packet.SetFilePos(packet.GetFilePos() + 1); return Handle_s(packet); - } else if (m_non_stop && ::strcmp(packet.Peek(), ";t") == 0) { - // TODO: add full support for "t" action - return SendOKResponse(); } std::unordered_map<lldb::pid_t, ResumeActionList> thread_actions; @@ -1738,6 +1778,12 @@ GDBRemoteCommunicationServerLLGS::Handle_vCont( tid = pid_tid->second; } + if (thread_action.state == eStateSuspended && + tid != StringExtractorGDBRemote::AllThreads) { + return SendIllFormedResponse( + packet, "'t' action not supported for individual threads"); + } + if (pid == StringExtractorGDBRemote::AllProcesses) { if (m_debugged_processes.size() > 1) return SendIllFormedResponse( @@ -1770,13 +1816,43 @@ GDBRemoteCommunicationServerLLGS::Handle_vCont( return SendErrorResponse(GDBRemoteServerError::eErrorResume); } - Status error = process_it->second->Resume(x.second); - if (error.Fail()) { - LLDB_LOG(log, "vCont failed for process {0}: {1}", x.first, error); - return SendErrorResponse(GDBRemoteServerError::eErrorResume); - } + // There are four possible scenarios here. These are: + // 1. vCont on a stopped process that resumes at least one thread. + // In this case, we call Resume(). + // 2. vCont on a stopped process that leaves all threads suspended. + // A no-op. + // 3. vCont on a running process that requests suspending all + // running threads. In this case, we call Interrupt(). + // 4. vCont on a running process that requests suspending a subset + // of running threads or resuming a subset of suspended threads. + // Since we do not support full nonstop mode, this is unsupported + // and we return an error. + + assert(process_it->second.process_up); + if (ResumeActionListStopsAllThreads(x.second)) { + if (process_it->second.process_up->IsRunning()) { + assert(m_non_stop); + + Status error = process_it->second.process_up->Interrupt(); + if (error.Fail()) { + LLDB_LOG(log, "vCont failed to halt process {0}: {1}", x.first, + error); + return SendErrorResponse(GDBRemoteServerError::eErrorResume); + } + + LLDB_LOG(log, "halted process {0}", x.first); - LLDB_LOG(log, "continued process {0}", x.first); + // hack to avoid enabling stdio forwarding after stop + // TODO: remove this when we improve stdio forwarding for nonstop + assert(thread_actions.size() == 1); + return SendOKResponse(); + } + } else { + PacketResult resume_res = + ResumeProcess(*process_it->second.process_up, x.second); + if (resume_res != PacketResult::Success) + return resume_res; + } } return SendContinueSuccessResponse(); @@ -1815,9 +1891,11 @@ GDBRemoteCommunicationServerLLGS::Handle_stop_reason( // the current thread (for clients that don't actually support multiple // stop reasons). NativeThreadProtocol *thread = m_current_process->GetCurrentThread(); - if (thread) - m_stop_notification_queue.push_back( - PrepareStopReplyPacketForThread(*thread).GetString().str()); + if (thread) { + StreamString stop_reply = PrepareStopReplyPacketForThread(*thread); + if (!stop_reply.Empty()) + m_stop_notification_queue.push_back(stop_reply.GetString().str()); + } EnqueueStopReplyPackets(thread ? thread->GetID() : LLDB_INVALID_THREAD_ID); } @@ -1845,6 +1923,20 @@ GDBRemoteCommunicationServerLLGS::SendStopReasonForState( bool force_synchronous) { Log *log = GetLog(LLDBLog::Process); + if (m_disabling_non_stop) { + // Check if we are waiting for any more processes to stop. If we are, + // do not send the OK response yet. + for (const auto &it : m_debugged_processes) { + if (it.second.process_up->IsRunning()) + return PacketResult::Success; + } + + // If all expected processes were stopped after a QNonStop:0 request, + // send the OK response. + m_disabling_non_stop = false; + return SendOKResponse(); + } + switch (process_state) { case eStateAttaching: case eStateLaunching: @@ -1998,7 +2090,7 @@ GDBRemoteCommunicationServerLLGS::Handle_qfThreadInfo( StreamGDBRemote response; for (auto &pid_ptr : m_debugged_processes) - AddProcessThreads(response, *pid_ptr.second, had_any); + AddProcessThreads(response, *pid_ptr.second.process_up, had_any); if (!had_any) return SendOKResponse(); @@ -2284,7 +2376,8 @@ GDBRemoteCommunicationServerLLGS::Handle_H(StringExtractorGDBRemote &packet) { // Ensure we have the given thread when not specifying -1 (all threads) or 0 // (any thread). if (tid != LLDB_INVALID_THREAD_ID && tid != 0) { - NativeThreadProtocol *thread = new_process_it->second->GetThreadByID(tid); + NativeThreadProtocol *thread = + new_process_it->second.process_up->GetThreadByID(tid); if (!thread) { LLDB_LOGF(log, "GDBRemoteCommunicationServerLLGS::%s failed, tid %" PRIu64 @@ -2297,12 +2390,12 @@ GDBRemoteCommunicationServerLLGS::Handle_H(StringExtractorGDBRemote &packet) { // Now switch the given process and thread type. switch (h_variant) { case 'g': - m_current_process = new_process_it->second.get(); + m_current_process = new_process_it->second.process_up.get(); SetCurrentThreadID(tid); break; case 'c': - m_continue_process = new_process_it->second.get(); + m_continue_process = new_process_it->second.process_up.get(); SetContinueThreadID(tid); break; @@ -2944,15 +3037,10 @@ GDBRemoteCommunicationServerLLGS::Handle_s(StringExtractorGDBRemote &packet) { // All other threads stop while we're single stepping a thread. actions.SetDefaultThreadActionIfNeeded(eStateStopped, 0); - Status error = m_continue_process->Resume(actions); - if (error.Fail()) { - LLDB_LOGF(log, - "GDBRemoteCommunicationServerLLGS::%s pid %" PRIu64 - " tid %" PRIu64 " Resume() failed with error: %s", - __FUNCTION__, m_continue_process->GetID(), tid, - error.AsCString()); - return SendErrorResponse(0x49); - } + + PacketResult resume_res = ResumeProcess(*m_continue_process, actions); + if (resume_res != PacketResult::Success) + return resume_res; // No response here, unless in non-stop mode. // Otherwise, the stop or exit will come from the resulting action. @@ -3440,7 +3528,8 @@ GDBRemoteCommunicationServerLLGS::Handle_vRun( GDBRemoteCommunication::PacketResult GDBRemoteCommunicationServerLLGS::Handle_D(StringExtractorGDBRemote &packet) { Log *log = GetLog(LLDBLog::Process); - StopSTDIOForwarding(); + if (!m_non_stop) + StopSTDIOForwarding(); lldb::pid_t pid = LLDB_INVALID_PROCESS_ID; @@ -3466,12 +3555,12 @@ GDBRemoteCommunicationServerLLGS::Handle_D(StringExtractorGDBRemote &packet) { LLDB_LOGF(log, "GDBRemoteCommunicationServerLLGS::%s detaching %" PRId64, __FUNCTION__, it->first); - if (llvm::Error e = it->second->Detach().ToError()) + if (llvm::Error e = it->second.process_up->Detach().ToError()) detach_error = llvm::joinErrors(std::move(detach_error), std::move(e)); else { - if (it->second.get() == m_current_process) + if (it->second.process_up.get() == m_current_process) m_current_process = nullptr; - if (it->second.get() == m_continue_process) + if (it->second.process_up.get() == m_continue_process) m_continue_process = nullptr; it = m_debugged_processes.erase(it); detached = true; @@ -3833,13 +3922,38 @@ GDBRemoteCommunicationServerLLGS::Handle_qSaveCore( GDBRemoteCommunication::PacketResult GDBRemoteCommunicationServerLLGS::Handle_QNonStop( StringExtractorGDBRemote &packet) { + Log *log = GetLog(LLDBLog::Process); + StringRef packet_str{packet.GetStringRef()}; assert(packet_str.startswith("QNonStop:")); packet_str.consume_front("QNonStop:"); if (packet_str == "0") { + if (m_non_stop) + StopSTDIOForwarding(); + for (auto &process_it : m_debugged_processes) { + if (process_it.second.process_up->IsRunning()) { + assert(m_non_stop); + Status error = process_it.second.process_up->Interrupt(); + if (error.Fail()) { + LLDB_LOG(log, + "while disabling nonstop, failed to halt process {0}: {1}", + process_it.first, error); + return SendErrorResponse(0x41); + } + // we must not send stop reasons after QNonStop + m_disabling_non_stop = true; + } + } + m_stdio_notification_queue.clear(); + m_stop_notification_queue.clear(); m_non_stop = false; - // TODO: stop all threads + // If we are stopping anything, defer sending the OK response until we're + // done. + if (m_disabling_non_stop) + return PacketResult::Success; } else if (packet_str == "1") { + if (!m_non_stop) + StartSTDIOForwarding(); m_non_stop = true; } else return SendErrorResponse(Status("Invalid QNonStop packet")); @@ -3847,26 +3961,38 @@ GDBRemoteCommunicationServerLLGS::Handle_QNonStop( } GDBRemoteCommunication::PacketResult -GDBRemoteCommunicationServerLLGS::Handle_vStopped( - StringExtractorGDBRemote &packet) { +GDBRemoteCommunicationServerLLGS::HandleNotificationAck( + std::deque<std::string> &queue) { // Per the protocol, the first message put into the queue is sent - // immediately. However, it remains the queue until the client ACKs - // it via vStopped -- then we pop it and send the next message. - // The process repeats until the last message in the queue is ACK-ed, - // in which case the vStopped packet sends an OK response. - - if (m_stop_notification_queue.empty()) + // immediately. However, it remains the queue until the client ACKs it -- + // then we pop it and send the next message. The process repeats until + // the last message in the queue is ACK-ed, in which case the packet sends + // an OK response. + if (queue.empty()) return SendErrorResponse(Status("No pending notification to ack")); - m_stop_notification_queue.pop_front(); - if (!m_stop_notification_queue.empty()) - return SendPacketNoLock(m_stop_notification_queue.front()); + queue.pop_front(); + if (!queue.empty()) + return SendPacketNoLock(queue.front()); + return SendOKResponse(); +} + +GDBRemoteCommunication::PacketResult +GDBRemoteCommunicationServerLLGS::Handle_vStdio( + StringExtractorGDBRemote &packet) { + return HandleNotificationAck(m_stdio_notification_queue); +} + +GDBRemoteCommunication::PacketResult +GDBRemoteCommunicationServerLLGS::Handle_vStopped( + StringExtractorGDBRemote &packet) { + PacketResult ret = HandleNotificationAck(m_stop_notification_queue); // If this was the last notification and all the processes exited, // terminate the server. - if (m_debugged_processes.empty()) { + if (m_stop_notification_queue.empty() && m_debugged_processes.empty()) { m_exit_now = true; m_mainloop.RequestTermination(); } - return SendOKResponse(); + return ret; } GDBRemoteCommunication::PacketResult @@ -3907,7 +4033,7 @@ GDBRemoteCommunicationServerLLGS::Handle_T(StringExtractorGDBRemote &packet) { return SendErrorResponse(1); // Check the thread ID - if (!new_process_it->second->GetThreadByID(tid)) + if (!new_process_it->second.process_up->GetThreadByID(tid)) return SendErrorResponse(2); return SendOKResponse(); @@ -4108,7 +4234,7 @@ std::vector<std::string> GDBRemoteCommunicationServerLLGS::HandleFeatures( ret.push_back("vfork-events+"); for (auto &x : m_debugged_processes) - SetEnabledExtensions(*x.second); + SetEnabledExtensions(*x.second.process_up); return ret; } @@ -4121,9 +4247,10 @@ void GDBRemoteCommunicationServerLLGS::SetEnabledExtensions( GDBRemoteCommunication::PacketResult GDBRemoteCommunicationServerLLGS::SendContinueSuccessResponse() { - // TODO: how to handle forwarding in non-stop mode? + if (m_non_stop) + return SendOKResponse(); StartSTDIOForwarding(); - return m_non_stop ? SendOKResponse() : PacketResult::Success; + return PacketResult::Success; } void GDBRemoteCommunicationServerLLGS::AppendThreadIDToResponse( diff --git a/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationServerLLGS.h b/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationServerLLGS.h index 5187a953f957..1165b60ac762 100644 --- a/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationServerLLGS.h +++ b/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationServerLLGS.h @@ -85,6 +85,17 @@ public: Status InitializeConnection(std::unique_ptr<Connection> connection); + struct DebuggedProcess { + enum class Flag { + vkilled = (1u << 0), + + LLVM_MARK_AS_BITMASK_ENUM(vkilled) + }; + + std::unique_ptr<NativeProcessProtocol> process_up; + Flag flags; + }; + protected: MainLoop &m_mainloop; MainLoop::ReadHandleUP m_network_handle_up; @@ -94,14 +105,11 @@ protected: NativeProcessProtocol *m_current_process; NativeProcessProtocol *m_continue_process; std::recursive_mutex m_debugged_process_mutex; - std::unordered_map<lldb::pid_t, std::unique_ptr<NativeProcessProtocol>> - m_debugged_processes; - std::unordered_set<lldb::pid_t> m_vkilled_processes; + std::unordered_map<lldb::pid_t, DebuggedProcess> m_debugged_processes; Communication m_stdio_communication; MainLoop::ReadHandleUP m_stdio_handle_up; - lldb::StateType m_inferior_prev_state = lldb::StateType::eStateInvalid; llvm::StringMap<std::unique_ptr<llvm::MemoryBuffer>> m_xfer_buffer_map; std::mutex m_saved_registers_mutex; std::unordered_map<uint32_t, lldb::DataBufferSP> m_saved_registers_map; @@ -109,6 +117,8 @@ protected: bool m_thread_suffix_supported = false; bool m_list_threads_in_stop_reply = false; bool m_non_stop = false; + bool m_disabling_non_stop = false; + std::deque<std::string> m_stdio_notification_queue; std::deque<std::string> m_stop_notification_queue; NativeProcessProtocol::Extension m_extensions_supported = {}; @@ -147,6 +157,9 @@ protected: PacketResult Handle_QListThreadsInStopReply(StringExtractorGDBRemote &packet); + PacketResult ResumeProcess(NativeProcessProtocol &process, + const ResumeActionList &actions); + PacketResult Handle_C(StringExtractorGDBRemote &packet); PacketResult Handle_c(StringExtractorGDBRemote &packet); @@ -236,6 +249,10 @@ protected: PacketResult Handle_QNonStop(StringExtractorGDBRemote &packet); + PacketResult HandleNotificationAck(std::deque<std::string> &queue); + + PacketResult Handle_vStdio(StringExtractorGDBRemote &packet); + PacketResult Handle_vStopped(StringExtractorGDBRemote &packet); PacketResult Handle_vCtrlC(StringExtractorGDBRemote &packet); diff --git a/lldb/source/Plugins/Process/gdb-remote/ProcessGDBRemote.cpp b/lldb/source/Plugins/Process/gdb-remote/ProcessGDBRemote.cpp index fe6a3f9ed6c1..5f18706f67e5 100644 --- a/lldb/source/Plugins/Process/gdb-remote/ProcessGDBRemote.cpp +++ b/lldb/source/Plugins/Process/gdb-remote/ProcessGDBRemote.cpp @@ -5307,8 +5307,11 @@ void ProcessGDBRemote::DidFork(lldb::pid_t child_pid, lldb::tid_t child_tid) { // Hardware breakpoints/watchpoints are not inherited implicitly, // so we need to readd them if we're following child. - if (GetFollowForkMode() == eFollowChild) + if (GetFollowForkMode() == eFollowChild) { DidForkSwitchHardwareTraps(true); + // Update our PID + SetID(child_pid); + } } void ProcessGDBRemote::DidVFork(lldb::pid_t child_pid, lldb::tid_t child_tid) { @@ -5361,6 +5364,11 @@ void ProcessGDBRemote::DidVFork(lldb::pid_t child_pid, lldb::tid_t child_tid) { error.AsCString() ? error.AsCString() : "<unknown error>"); return; } + + if (GetFollowForkMode() == eFollowChild) { + // Update our PID + SetID(child_pid); + } } void ProcessGDBRemote::DidVForkDone() { diff --git a/lldb/source/Plugins/ScriptInterpreter/Python/ScriptInterpreterPython.cpp b/lldb/source/Plugins/ScriptInterpreter/Python/ScriptInterpreterPython.cpp index 53e2bcaccafb..a21adcfbdbd6 100644 --- a/lldb/source/Plugins/ScriptInterpreter/Python/ScriptInterpreterPython.cpp +++ b/lldb/source/Plugins/ScriptInterpreter/Python/ScriptInterpreterPython.cpp @@ -2637,7 +2637,7 @@ bool ScriptInterpreterPythonImpl::LoadScriptingModule( .SetSetLLDBGlobals(false); if (!pathname || !pathname[0]) { - error.SetErrorString("invalid pathname"); + error.SetErrorString("empty path"); return false; } @@ -2707,14 +2707,14 @@ bool ScriptInterpreterPythonImpl::LoadScriptingModule( // if not a valid file of any sort, check if it might be a filename still // dot can't be used but / and \ can, and if either is found, reject if (strchr(pathname, '\\') || strchr(pathname, '/')) { - error.SetErrorString("invalid pathname"); + error.SetErrorStringWithFormatv("invalid pathname '{0}'", pathname); return false; } // Not a filename, probably a package of some sort, let it go through. possible_package = true; } else if (is_directory(st) || is_regular_file(st)) { if (module_file.GetDirectory().IsEmpty()) { - error.SetErrorString("invalid directory name"); + error.SetErrorStringWithFormatv("invalid directory name '{0}'", pathname); return false; } if (llvm::Error e = diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp index 4b9354371bda..10dc8d1fb7c3 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp @@ -2390,6 +2390,7 @@ struct MemberAttributes { uint64_t data_bit_offset = UINT64_MAX; AccessType accessibility = eAccessNone; llvm::Optional<uint64_t> byte_size; + llvm::Optional<DWARFFormValue> const_value_form; DWARFFormValue encoding_form; /// Indicates the byte offset of the word from the base address of the /// structure. @@ -2436,6 +2437,9 @@ MemberAttributes::MemberAttributes(const DWARFDIE &die, case DW_AT_byte_size: byte_size = form_value.Unsigned(); break; + case DW_AT_const_value: + const_value_form = form_value; + break; case DW_AT_data_bit_offset: data_bit_offset = form_value.Unsigned(); break; @@ -2587,12 +2591,65 @@ void DWARFASTParserClang::ParseObjCProperty( propAttrs.prop_getter_name, propAttrs.prop_attributes, &metadata)); } +llvm::Expected<llvm::APInt> DWARFASTParserClang::ExtractIntFromFormValue( + const CompilerType &int_type, const DWARFFormValue &form_value) const { + clang::QualType qt = ClangUtil::GetQualType(int_type); + assert(qt->isIntegralOrEnumerationType()); + TypeSystemClang &ts = *llvm::cast<TypeSystemClang>(int_type.GetTypeSystem()); + clang::ASTContext &ast = ts.getASTContext(); + + const unsigned type_bits = ast.getIntWidth(qt); + const bool is_unsigned = qt->isUnsignedIntegerType(); + + // The maximum int size supported at the moment by this function. Limited + // by the uint64_t return type of DWARFFormValue::Signed/Unsigned. + constexpr std::size_t max_bit_size = 64; + + // For values bigger than 64 bit (e.g. __int128_t values), + // DWARFFormValue's Signed/Unsigned functions will return wrong results so + // emit an error for now. + if (type_bits > max_bit_size) { + auto msg = llvm::formatv("Can only parse integers with up to {0} bits, but " + "given integer has {1} bits.", + max_bit_size, type_bits); + return llvm::createStringError(llvm::inconvertibleErrorCode(), msg.str()); + } + + // Construct an APInt with the maximum bit size and the given integer. + llvm::APInt result(max_bit_size, form_value.Unsigned(), !is_unsigned); + + // Calculate how many bits are required to represent the input value. + // For unsigned types, take the number of active bits in the APInt. + // For signed types, ask APInt how many bits are required to represent the + // signed integer. + const unsigned required_bits = + is_unsigned ? result.getActiveBits() : result.getMinSignedBits(); + + // If the input value doesn't fit into the integer type, return an error. + if (required_bits > type_bits) { + std::string value_as_str = is_unsigned + ? std::to_string(form_value.Unsigned()) + : std::to_string(form_value.Signed()); + auto msg = llvm::formatv("Can't store {0} value {1} in integer with {2} " + "bits.", + (is_unsigned ? "unsigned" : "signed"), + value_as_str, type_bits); + return llvm::createStringError(llvm::inconvertibleErrorCode(), msg.str()); + } + + // Trim the result to the bit width our the int type. + if (result.getBitWidth() > type_bits) + result = result.trunc(type_bits); + return result; +} + void DWARFASTParserClang::ParseSingleMember( const DWARFDIE &die, const DWARFDIE &parent_die, const lldb_private::CompilerType &class_clang_type, lldb::AccessType default_accessibility, lldb_private::ClangASTImporter::LayoutInfo &layout_info, FieldInfo &last_field_info) { + Log *log = GetLog(DWARFLog::TypeCompletion | DWARFLog::Lookups); // This function can only parse DW_TAG_member. assert(die.Tag() == DW_TAG_member); @@ -2623,9 +2680,27 @@ void DWARFASTParserClang::ParseSingleMember( if (var_type) { if (attrs.accessibility == eAccessNone) attrs.accessibility = eAccessPublic; - TypeSystemClang::AddVariableToRecordType( - class_clang_type, attrs.name, var_type->GetForwardCompilerType(), - attrs.accessibility); + CompilerType ct = var_type->GetForwardCompilerType(); + clang::VarDecl *v = TypeSystemClang::AddVariableToRecordType( + class_clang_type, attrs.name, ct, attrs.accessibility); + if (!v) { + LLDB_LOG(log, "Failed to add variable to the record type"); + return; + } + + bool unused; + // TODO: Support float/double static members as well. + if (!attrs.const_value_form || !ct.IsIntegerOrEnumerationType(unused)) + return; + llvm::Expected<llvm::APInt> const_value_or_err = + ExtractIntFromFormValue(ct, *attrs.const_value_form); + if (!const_value_or_err) { + LLDB_LOG_ERROR(log, const_value_or_err.takeError(), + "Failed to add const value to variable {1}: {0}", + v->getQualifiedNameAsString()); + return; + } + TypeSystemClang::SetIntegerInitializerForVariable(v, *const_value_or_err); } return; } diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.h b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.h index f97c0c470ab0..733ffa230f1e 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.h @@ -68,6 +68,22 @@ public: lldb_private::ClangASTImporter &GetClangASTImporter(); + /// Extracts an value for a given Clang integer type from a DWARFFormValue. + /// + /// \param int_type The Clang type that defines the bit size and signedness + /// of the integer that should be extracted. Has to be either + /// an integer type or an enum type. For enum types the + /// underlying integer type will be considered as the + /// expected integer type that should be extracted. + /// \param form_value The DWARFFormValue that contains the integer value. + /// \return An APInt containing the same integer value as the given + /// DWARFFormValue with the bit width of the given integer type. + /// Returns an error if the value in the DWARFFormValue does not fit + /// into the given integer type or the integer type isn't supported. + llvm::Expected<llvm::APInt> + ExtractIntFromFormValue(const lldb_private::CompilerType &int_type, + const DWARFFormValue &form_value) const; + protected: /// Protected typedefs and members. /// @{ diff --git a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp index cbc24b1550c7..8ee709db9cdb 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp @@ -3475,10 +3475,9 @@ VariableSP SymbolFileDWARF::ParseVariableDIE(const SymbolContext &sc, if (use_type_size_for_value && type_sp->GetType()) { DWARFExpression *location = location_list.GetMutableExpressionAtAddress(); - location->UpdateValue( - const_value_form.Unsigned(), - type_sp->GetType()->GetByteSize(nullptr).getValueOr(0), - die.GetCU()->GetAddressByteSize()); + location->UpdateValue(const_value_form.Unsigned(), + type_sp->GetType()->GetByteSize(nullptr).value_or(0), + die.GetCU()->GetAddressByteSize()); } return std::make_shared<Variable>( diff --git a/lldb/source/Plugins/SymbolFile/NativePDB/PdbAstBuilder.cpp b/lldb/source/Plugins/SymbolFile/NativePDB/PdbAstBuilder.cpp index 6dcce738b79f..25425f914088 100644 --- a/lldb/source/Plugins/SymbolFile/NativePDB/PdbAstBuilder.cpp +++ b/lldb/source/Plugins/SymbolFile/NativePDB/PdbAstBuilder.cpp @@ -816,6 +816,40 @@ clang::QualType PdbAstBuilder::CreatePointerType(const PointerRecord &pointer) { clang::QualType class_type = GetOrCreateType(mpi.ContainingType); if (class_type.isNull()) return {}; + if (clang::TagDecl *tag = class_type->getAsTagDecl()) { + clang::MSInheritanceAttr::Spelling spelling; + switch (mpi.Representation) { + case llvm::codeview::PointerToMemberRepresentation::SingleInheritanceData: + case llvm::codeview::PointerToMemberRepresentation:: + SingleInheritanceFunction: + spelling = + clang::MSInheritanceAttr::Spelling::Keyword_single_inheritance; + break; + case llvm::codeview::PointerToMemberRepresentation:: + MultipleInheritanceData: + case llvm::codeview::PointerToMemberRepresentation:: + MultipleInheritanceFunction: + spelling = + clang::MSInheritanceAttr::Spelling::Keyword_multiple_inheritance; + break; + case llvm::codeview::PointerToMemberRepresentation:: + VirtualInheritanceData: + case llvm::codeview::PointerToMemberRepresentation:: + VirtualInheritanceFunction: + spelling = + clang::MSInheritanceAttr::Spelling::Keyword_virtual_inheritance; + break; + case llvm::codeview::PointerToMemberRepresentation::Unknown: + spelling = + clang::MSInheritanceAttr::Spelling::Keyword_unspecified_inheritance; + break; + default: + spelling = clang::MSInheritanceAttr::Spelling::SpellingNotCalculated; + break; + } + tag->addAttr(clang::MSInheritanceAttr::CreateImplicit( + m_clang.getASTContext(), spelling)); + } return m_clang.getASTContext().getMemberPointerType( pointee_type, class_type.getTypePtr()); } diff --git a/lldb/source/Plugins/Trace/intel-pt/CommandObjectTraceStartIntelPT.cpp b/lldb/source/Plugins/Trace/intel-pt/CommandObjectTraceStartIntelPT.cpp index 91eafdaa11bc..dd28292c1daf 100644 --- a/lldb/source/Plugins/Trace/intel-pt/CommandObjectTraceStartIntelPT.cpp +++ b/lldb/source/Plugins/Trace/intel-pt/CommandObjectTraceStartIntelPT.cpp @@ -11,6 +11,7 @@ #include "TraceIntelPT.h" #include "TraceIntelPTConstants.h" #include "lldb/Host/OptionParser.h" +#include "lldb/Interpreter/CommandOptionArgumentTable.h" #include "lldb/Target/Process.h" #include "lldb/Target/Trace.h" diff --git a/lldb/source/Plugins/TraceExporter/ctf/CommandObjectThreadTraceExportCTF.cpp b/lldb/source/Plugins/TraceExporter/ctf/CommandObjectThreadTraceExportCTF.cpp index 194cc7459027..384abd2166df 100644 --- a/lldb/source/Plugins/TraceExporter/ctf/CommandObjectThreadTraceExportCTF.cpp +++ b/lldb/source/Plugins/TraceExporter/ctf/CommandObjectThreadTraceExportCTF.cpp @@ -10,6 +10,7 @@ #include "../common/TraceHTR.h" #include "lldb/Host/OptionParser.h" +#include "lldb/Interpreter/CommandOptionArgumentTable.h" #include "lldb/Target/Process.h" #include "lldb/Target/Trace.h" diff --git a/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp b/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp index e0f646b15641..c6eb693bba6b 100644 --- a/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp +++ b/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp @@ -7538,7 +7538,7 @@ void TypeSystemClang::SetIntegerInitializerForVariable( "only integer or enum types supported"); // If the variable is an enum type, take the underlying integer type as // the type of the integer literal. - if (const EnumType *enum_type = llvm::dyn_cast<EnumType>(qt.getTypePtr())) { + if (const EnumType *enum_type = qt->getAs<EnumType>()) { const EnumDecl *enum_decl = enum_type->getDecl(); qt = enum_decl->getIntegerType(); } diff --git a/lldb/source/Plugins/UnwindAssembly/InstEmulation/UnwindAssemblyInstEmulation.h b/lldb/source/Plugins/UnwindAssembly/InstEmulation/UnwindAssemblyInstEmulation.h index 97cb04e51408..796e7e50a8b9 100644 --- a/lldb/source/Plugins/UnwindAssembly/InstEmulation/UnwindAssemblyInstEmulation.h +++ b/lldb/source/Plugins/UnwindAssembly/InstEmulation/UnwindAssemblyInstEmulation.h @@ -64,8 +64,8 @@ private: lldb_private::EmulateInstruction *inst_emulator) : UnwindAssembly(arch), m_inst_emulator_up(inst_emulator), m_range_ptr(nullptr), m_unwind_plan_ptr(nullptr), m_curr_row(), - m_cfa_reg_info(), m_fp_is_cfa(false), m_register_values(), - m_pushed_regs(), m_curr_row_modified(false), + m_initial_sp(0), m_cfa_reg_info(), m_fp_is_cfa(false), + m_register_values(), m_pushed_regs(), m_curr_row_modified(false), m_forward_branch_offset(0) { if (m_inst_emulator_up.get()) { m_inst_emulator_up->SetBaton(this); diff --git a/lldb/source/Plugins/UnwindAssembly/x86/x86AssemblyInspectionEngine.cpp b/lldb/source/Plugins/UnwindAssembly/x86/x86AssemblyInspectionEngine.cpp index 36e7b90cad24..92eec139e07c 100644 --- a/lldb/source/Plugins/UnwindAssembly/x86/x86AssemblyInspectionEngine.cpp +++ b/lldb/source/Plugins/UnwindAssembly/x86/x86AssemblyInspectionEngine.cpp @@ -24,11 +24,12 @@ x86AssemblyInspectionEngine::x86AssemblyInspectionEngine(const ArchSpec &arch) : m_cur_insn(nullptr), m_machine_ip_regnum(LLDB_INVALID_REGNUM), m_machine_sp_regnum(LLDB_INVALID_REGNUM), m_machine_fp_regnum(LLDB_INVALID_REGNUM), + m_machine_alt_fp_regnum(LLDB_INVALID_REGNUM), m_lldb_ip_regnum(LLDB_INVALID_REGNUM), m_lldb_sp_regnum(LLDB_INVALID_REGNUM), m_lldb_fp_regnum(LLDB_INVALID_REGNUM), - - m_reg_map(), m_arch(arch), m_cpu(k_cpu_unspecified), m_wordsize(-1), + m_lldb_alt_fp_regnum(LLDB_INVALID_REGNUM), m_reg_map(), m_arch(arch), + m_cpu(k_cpu_unspecified), m_wordsize(-1), m_register_map_initialized(false), m_disasm_context() { m_disasm_context = ::LLVMCreateDisasm(arch.GetTriple().getTriple().c_str(), nullptr, diff --git a/lldb/source/Symbol/ArmUnwindInfo.cpp b/lldb/source/Symbol/ArmUnwindInfo.cpp index 07852485f44e..ae6cddfbc463 100644 --- a/lldb/source/Symbol/ArmUnwindInfo.cpp +++ b/lldb/source/Symbol/ArmUnwindInfo.cpp @@ -65,7 +65,7 @@ ArmUnwindInfo::ArmUnwindInfo(ObjectFile &objfile, SectionSP &arm_exidx, // Sort the entries in the exidx section. The entries should be sorted inside // the section but some old compiler isn't sorted them. - llvm::sort(m_exidx_entries.begin(), m_exidx_entries.end()); + llvm::sort(m_exidx_entries); } ArmUnwindInfo::~ArmUnwindInfo() = default; diff --git a/lldb/source/Symbol/CompileUnit.cpp b/lldb/source/Symbol/CompileUnit.cpp index cacb78de2426..2bae08c8b930 100644 --- a/lldb/source/Symbol/CompileUnit.cpp +++ b/lldb/source/Symbol/CompileUnit.cpp @@ -63,7 +63,7 @@ void CompileUnit::ForeachFunction( sorted_functions.reserve(m_functions_by_uid.size()); for (auto &p : m_functions_by_uid) sorted_functions.push_back(p.second); - llvm::sort(sorted_functions.begin(), sorted_functions.end(), + llvm::sort(sorted_functions, [](const lldb::FunctionSP &a, const lldb::FunctionSP &b) { return a->GetID() < b->GetID(); }); diff --git a/lldb/source/Symbol/Symtab.cpp b/lldb/source/Symbol/Symtab.cpp index eb2447efbad1..936ee04ed492 100644 --- a/lldb/source/Symbol/Symtab.cpp +++ b/lldb/source/Symbol/Symtab.cpp @@ -1137,7 +1137,7 @@ void Symtab::FindFunctionSymbols(ConstString name, uint32_t name_type_mask, } if (!symbol_indexes.empty()) { - llvm::sort(symbol_indexes.begin(), symbol_indexes.end()); + llvm::sort(symbol_indexes); symbol_indexes.erase( std::unique(symbol_indexes.begin(), symbol_indexes.end()), symbol_indexes.end()); diff --git a/lldb/source/Target/DynamicRegisterInfo.cpp b/lldb/source/Target/DynamicRegisterInfo.cpp index e2962b02ed7f..14c3faae38df 100644 --- a/lldb/source/Target/DynamicRegisterInfo.cpp +++ b/lldb/source/Target/DynamicRegisterInfo.cpp @@ -483,7 +483,7 @@ void DynamicRegisterInfo::Finalize(const ArchSpec &arch) { end = m_invalidate_regs_map.end(); pos != end; ++pos) { if (pos->second.size() > 1) { - llvm::sort(pos->second.begin(), pos->second.end()); + llvm::sort(pos->second); reg_num_collection::iterator unique_end = std::unique(pos->second.begin(), pos->second.end()); if (unique_end != pos->second.end()) diff --git a/lldb/source/Target/Target.cpp b/lldb/source/Target/Target.cpp index 65064ecf75b1..f16fc6b5da85 100644 --- a/lldb/source/Target/Target.cpp +++ b/lldb/source/Target/Target.cpp @@ -776,7 +776,7 @@ void Target::GetBreakpointNames(std::vector<std::string> &names) { for (auto bp_name : m_breakpoint_names) { names.push_back(bp_name.first.AsCString()); } - llvm::sort(names.begin(), names.end()); + llvm::sort(names); } bool Target::ProcessIsValid() { diff --git a/lldb/source/Target/TargetList.cpp b/lldb/source/Target/TargetList.cpp index 214e98ee91ed..829036976a21 100644 --- a/lldb/source/Target/TargetList.cpp +++ b/lldb/source/Target/TargetList.cpp @@ -509,8 +509,7 @@ uint32_t TargetList::GetIndexOfTarget(lldb::TargetSP target_sp) const { } void TargetList::AddTargetInternal(TargetSP target_sp, bool do_select) { - lldbassert(std::find(m_target_list.begin(), m_target_list.end(), target_sp) == - m_target_list.end() && + lldbassert(!llvm::is_contained(m_target_list, target_sp) && "target already exists it the list"); m_target_list.push_back(std::move(target_sp)); if (do_select) diff --git a/lldb/source/Target/Thread.cpp b/lldb/source/Target/Thread.cpp index 3803748be297..f63b57fd4e62 100644 --- a/lldb/source/Target/Thread.cpp +++ b/lldb/source/Target/Thread.cpp @@ -2022,7 +2022,8 @@ lldb::ValueObjectSP Thread::GetSiginfoValue() { llvm::Optional<uint64_t> type_size = type.GetByteSize(nullptr); assert(type_size); - llvm::Expected<std::unique_ptr<llvm::MemoryBuffer>> data = GetSiginfo(type_size.getValue()); + llvm::Expected<std::unique_ptr<llvm::MemoryBuffer>> data = + GetSiginfo(type_size.value()); if (!data) return ValueObjectConstResult::Create(&target, Status(data.takeError())); diff --git a/lldb/source/Target/UnixSignals.cpp b/lldb/source/Target/UnixSignals.cpp index 4ae848a98edd..3ece3ee24cbe 100644 --- a/lldb/source/Target/UnixSignals.cpp +++ b/lldb/source/Target/UnixSignals.cpp @@ -300,14 +300,13 @@ UnixSignals::GetFilteredSignals(llvm::Optional<bool> should_suppress, // If any of filtering conditions are not met, we move on to the next // signal. - if (should_suppress && - signal_suppress != should_suppress.getValue()) + if (should_suppress && signal_suppress != should_suppress.value()) continue; - if (should_stop && signal_stop != should_stop.getValue()) + if (should_stop && signal_stop != should_stop.value()) continue; - if (should_notify && signal_notify != should_notify.getValue()) + if (should_notify && signal_notify != should_notify.value()) continue; result.push_back(signo); diff --git a/lldb/source/Utility/ReproducerProvider.cpp b/lldb/source/Utility/ReproducerProvider.cpp index 0d1581abda64..44f24e44f38d 100644 --- a/lldb/source/Utility/ReproducerProvider.cpp +++ b/lldb/source/Utility/ReproducerProvider.cpp @@ -131,7 +131,7 @@ void SymbolFileProvider::Keep() { return; // Remove duplicates. - llvm::sort(m_symbol_files.begin(), m_symbol_files.end()); + llvm::sort(m_symbol_files); m_symbol_files.erase( std::unique(m_symbol_files.begin(), m_symbol_files.end()), m_symbol_files.end()); diff --git a/lldb/source/Utility/SelectHelper.cpp b/lldb/source/Utility/SelectHelper.cpp index a25bdfdaee8e..05dbd1cce09d 100644 --- a/lldb/source/Utility/SelectHelper.cpp +++ b/lldb/source/Utility/SelectHelper.cpp @@ -197,8 +197,8 @@ lldb_private::Status SelectHelper::Select() { // Setup out relative timeout based on the end time if we have one if (m_end_time) { tv_ptr = &tv; - const auto remaining_dur = duration_cast<microseconds>( - m_end_time.getValue() - steady_clock::now()); + const auto remaining_dur = + duration_cast<microseconds>(m_end_time.value() - steady_clock::now()); if (remaining_dur.count() > 0) { // Wait for a specific amount of time const auto dur_secs = duration_cast<seconds>(remaining_dur); diff --git a/lldb/source/Utility/StringExtractorGDBRemote.cpp b/lldb/source/Utility/StringExtractorGDBRemote.cpp index 07954408f6d0..fc740615dd05 100644 --- a/lldb/source/Utility/StringExtractorGDBRemote.cpp +++ b/lldb/source/Utility/StringExtractorGDBRemote.cpp @@ -380,6 +380,8 @@ StringExtractorGDBRemote::GetServerPacketType() const { return eServerPacketType_vStopped; if (PACKET_MATCHES("vCtrlC")) return eServerPacketType_vCtrlC; + if (PACKET_MATCHES("vStdio")) + return eServerPacketType_vStdio; break; } diff --git a/lldb/source/Utility/Timer.cpp b/lldb/source/Utility/Timer.cpp index b190f35007d5..477541d7bb3d 100644 --- a/lldb/source/Utility/Timer.cpp +++ b/lldb/source/Utility/Timer.cpp @@ -150,7 +150,7 @@ void Timer::DumpCategoryTimes(Stream *s) { return; // Later code will break without any elements. // Sort by time - llvm::sort(sorted.begin(), sorted.end(), CategoryMapIteratorSortCriterion); + llvm::sort(sorted, CategoryMapIteratorSortCriterion); for (const auto &stats : sorted) s->Printf("%.9f sec (total: %.3fs; child: %.3fs; count: %" PRIu64 diff --git a/lldb/utils/TableGen/LLDBOptionDefEmitter.cpp b/lldb/utils/TableGen/LLDBOptionDefEmitter.cpp index ccf48275f42c..d73f0a2914c1 100644 --- a/lldb/utils/TableGen/LLDBOptionDefEmitter.cpp +++ b/lldb/utils/TableGen/LLDBOptionDefEmitter.cpp @@ -31,7 +31,6 @@ struct CommandOption { std::string ArgType; bool OptionalArg = false; std::string Validator; - std::string ArgEnum; std::vector<StringRef> Completions; std::string Description; @@ -65,9 +64,6 @@ struct CommandOption { if (Option->getValue("Validator")) Validator = std::string(Option->getValueAsString("Validator")); - if (Option->getValue("ArgEnum")) - ArgEnum = std::string(Option->getValueAsString("ArgEnum")); - if (Option->getValue("Completions")) Completions = Option->getValueAsListOfStrings("Completions"); @@ -114,8 +110,8 @@ static void emitOption(const CommandOption &O, raw_ostream &OS) { OS << "nullptr"; OS << ", "; - if (!O.ArgEnum.empty()) - OS << O.ArgEnum; + if (!O.ArgType.empty()) + OS << "g_argument_table[eArgType" << O.ArgType << "].enum_values"; else OS << "{}"; OS << ", "; diff --git a/llvm/include/llvm/ADT/APInt.h b/llvm/include/llvm/ADT/APInt.h index 4155cb260a2a..5bdc1541f630 100644 --- a/llvm/include/llvm/ADT/APInt.h +++ b/llvm/include/llvm/ADT/APInt.h @@ -147,7 +147,7 @@ public: APInt(unsigned numBits, StringRef str, uint8_t radix); /// Default constructor that creates an APInt with a 1-bit zero value. - explicit APInt() : BitWidth(1) { U.VAL = 0; } + explicit APInt() { U.VAL = 0; } /// Copy Constructor. APInt(const APInt &that) : BitWidth(that.BitWidth) { @@ -1824,7 +1824,7 @@ private: uint64_t *pVal; ///< Used to store the >64 bits integer value. } U; - unsigned BitWidth; ///< The number of bits in this APInt. + unsigned BitWidth = 1; ///< The number of bits in this APInt. friend struct DenseMapInfo<APInt, void>; friend class APSInt; diff --git a/llvm/include/llvm/ADT/APSInt.h b/llvm/include/llvm/ADT/APSInt.h index 7b6af436f577..727d95ed8c1c 100644 --- a/llvm/include/llvm/ADT/APSInt.h +++ b/llvm/include/llvm/ADT/APSInt.h @@ -21,11 +21,11 @@ namespace llvm { /// An arbitrary precision integer that knows its signedness. class LLVM_NODISCARD APSInt : public APInt { - bool IsUnsigned; + bool IsUnsigned = false; public: /// Default constructor that creates an uninitialized APInt. - explicit APSInt() : IsUnsigned(false) {} + explicit APSInt() = default; /// Create an APSInt with the specified width, default to unsigned. explicit APSInt(uint32_t BitWidth, bool isUnsigned = true) diff --git a/llvm/include/llvm/ADT/AddressRanges.h b/llvm/include/llvm/ADT/AddressRanges.h index 1953680d5222..c02844a095d1 100644 --- a/llvm/include/llvm/ADT/AddressRanges.h +++ b/llvm/include/llvm/ADT/AddressRanges.h @@ -10,9 +10,10 @@ #define LLVM_ADT_ADDRESSRANGES_H #include "llvm/ADT/Optional.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallVector.h" #include <cassert> #include <stdint.h> -#include <vector> namespace llvm { @@ -47,20 +48,29 @@ private: /// The AddressRanges class helps normalize address range collections. /// This class keeps a sorted vector of AddressRange objects and can perform /// insertions and searches efficiently. The address ranges are always sorted -/// and never contain any invalid or empty address ranges. Intersecting +/// and never contain any invalid or empty address ranges. +/// Intersecting([100,200), [150,300)) and adjacent([100,200), [200,300)) /// address ranges are combined during insertion. class AddressRanges { protected: - using Collection = std::vector<AddressRange>; + using Collection = SmallVector<AddressRange>; Collection Ranges; public: void clear() { Ranges.clear(); } bool empty() const { return Ranges.empty(); } - bool contains(uint64_t Addr) const; - bool contains(AddressRange Range) const; - Optional<AddressRange> getRangeThatContains(uint64_t Addr) const; - void insert(AddressRange Range); + bool contains(uint64_t Addr) const { return find(Addr) != Ranges.end(); } + bool contains(AddressRange Range) const { + return find(Range) != Ranges.end(); + } + Optional<AddressRange> getRangeThatContains(uint64_t Addr) const { + Collection::const_iterator It = find(Addr); + if (It == Ranges.end()) + return None; + + return *It; + } + Collection::const_iterator insert(AddressRange Range); void reserve(size_t Capacity) { Ranges.reserve(Capacity); } size_t size() const { return Ranges.size(); } bool operator==(const AddressRanges &RHS) const { @@ -72,6 +82,64 @@ public: } Collection::const_iterator begin() const { return Ranges.begin(); } Collection::const_iterator end() const { return Ranges.end(); } + +protected: + Collection::const_iterator find(uint64_t Addr) const; + Collection::const_iterator find(AddressRange Range) const; +}; + +/// AddressRangesMap class maps values to the address ranges. +/// It keeps address ranges and corresponding values. If ranges +/// are combined during insertion, then combined range keeps +/// newly inserted value. +template <typename T> class AddressRangesMap : protected AddressRanges { +public: + void clear() { + Ranges.clear(); + Values.clear(); + } + bool empty() const { return AddressRanges::empty(); } + bool contains(uint64_t Addr) const { return AddressRanges::contains(Addr); } + bool contains(AddressRange Range) const { + return AddressRanges::contains(Range); + } + void insert(AddressRange Range, T Value) { + size_t InputSize = Ranges.size(); + Collection::const_iterator RangesIt = AddressRanges::insert(Range); + if (RangesIt == Ranges.end()) + return; + + // make Values match to Ranges. + size_t Idx = RangesIt - Ranges.begin(); + typename ValuesCollection::iterator ValuesIt = Values.begin() + Idx; + if (InputSize < Ranges.size()) + Values.insert(ValuesIt, T()); + else if (InputSize > Ranges.size()) + Values.erase(ValuesIt, ValuesIt + InputSize - Ranges.size()); + assert(Ranges.size() == Values.size()); + + // set value to the inserted or combined range. + Values[Idx] = Value; + } + size_t size() const { + assert(Ranges.size() == Values.size()); + return AddressRanges::size(); + } + Optional<std::pair<AddressRange, T>> + getRangeValueThatContains(uint64_t Addr) const { + Collection::const_iterator It = find(Addr); + if (It == Ranges.end()) + return None; + + return std::make_pair(*It, Values[It - Ranges.begin()]); + } + std::pair<AddressRange, T> operator[](size_t Idx) const { + return std::make_pair(Ranges[Idx], Values[Idx]); + } + +protected: + using ValuesCollection = SmallVector<T>; + ValuesCollection Values; }; } // namespace llvm diff --git a/llvm/include/llvm/ADT/BitVector.h b/llvm/include/llvm/ADT/BitVector.h index 9540b3985963..2ba485777816 100644 --- a/llvm/include/llvm/ADT/BitVector.h +++ b/llvm/include/llvm/ADT/BitVector.h @@ -83,7 +83,7 @@ class BitVector { using Storage = SmallVector<BitWord>; Storage Bits; // Actual bits. - unsigned Size; // Size of bitvector in bits. + unsigned Size = 0; // Size of bitvector in bits. public: using size_type = unsigned; @@ -135,7 +135,7 @@ public: } /// BitVector default ctor - Creates an empty bitvector. - BitVector() : Size(0) {} + BitVector() = default; /// BitVector ctor - Creates a bitvector of specified number of bits. All /// bits are initialized to the specified value. diff --git a/llvm/include/llvm/ADT/EpochTracker.h b/llvm/include/llvm/ADT/EpochTracker.h index b46989bc5111..a639d1b5b3ec 100644 --- a/llvm/include/llvm/ADT/EpochTracker.h +++ b/llvm/include/llvm/ADT/EpochTracker.h @@ -56,11 +56,11 @@ public: /// make an iterator-invalidating modification. /// class HandleBase { - const uint64_t *EpochAddress; - uint64_t EpochAtCreation; + const uint64_t *EpochAddress = nullptr; + uint64_t EpochAtCreation = UINT64_MAX; public: - HandleBase() : EpochAddress(nullptr), EpochAtCreation(UINT64_MAX) {} + HandleBase() = default; explicit HandleBase(const DebugEpochBase *Parent) : EpochAddress(&Parent->Epoch), EpochAtCreation(Parent->Epoch) {} diff --git a/llvm/include/llvm/ADT/IntEqClasses.h b/llvm/include/llvm/ADT/IntEqClasses.h index 84bb58cb736c..9ee8a46be411 100644 --- a/llvm/include/llvm/ADT/IntEqClasses.h +++ b/llvm/include/llvm/ADT/IntEqClasses.h @@ -35,11 +35,11 @@ class IntEqClasses { /// NumClasses - The number of equivalence classes when compressed, or 0 when /// uncompressed. - unsigned NumClasses; + unsigned NumClasses = 0; public: /// IntEqClasses - Create an equivalence class mapping for 0 .. N-1. - IntEqClasses(unsigned N = 0) : NumClasses(0) { grow(N); } + IntEqClasses(unsigned N = 0) { grow(N); } /// grow - Increase capacity to hold 0 .. N-1, putting new integers in unique /// equivalence classes. diff --git a/llvm/include/llvm/ADT/Triple.h b/llvm/include/llvm/ADT/Triple.h index 9d85a28fbf04..ba4584dc60fa 100644 --- a/llvm/include/llvm/ADT/Triple.h +++ b/llvm/include/llvm/ADT/Triple.h @@ -283,22 +283,22 @@ private: std::string Data; /// The parsed arch type. - ArchType Arch; + ArchType Arch{}; /// The parsed subarchitecture type. - SubArchType SubArch; + SubArchType SubArch{}; /// The parsed vendor type. - VendorType Vendor; + VendorType Vendor{}; /// The parsed OS type. - OSType OS; + OSType OS{}; /// The parsed Environment type. - EnvironmentType Environment; + EnvironmentType Environment{}; /// The object format type. - ObjectFormatType ObjectFormat; + ObjectFormatType ObjectFormat{}; public: /// @name Constructors @@ -306,7 +306,7 @@ public: /// Default constructor is the same as an empty string and leaves all /// triple fields unknown. - Triple() : Arch(), SubArch(), Vendor(), OS(), Environment(), ObjectFormat() {} + Triple() = default; explicit Triple(const Twine &Str); Triple(const Twine &ArchStr, const Twine &VendorStr, const Twine &OSStr); diff --git a/llvm/include/llvm/Analysis/FunctionPropertiesAnalysis.h b/llvm/include/llvm/Analysis/FunctionPropertiesAnalysis.h index a0f5331fdba5..cd32979b9ea5 100644 --- a/llvm/include/llvm/Analysis/FunctionPropertiesAnalysis.h +++ b/llvm/include/llvm/Analysis/FunctionPropertiesAnalysis.h @@ -20,7 +20,6 @@ #include "llvm/IR/PassManager.h" namespace llvm { -class DominatorTree; class Function; class LoopInfo; diff --git a/llvm/include/llvm/Analysis/IVDescriptors.h b/llvm/include/llvm/Analysis/IVDescriptors.h index 231d3bbf534b..a23b64ef20cc 100644 --- a/llvm/include/llvm/Analysis/IVDescriptors.h +++ b/llvm/include/llvm/Analysis/IVDescriptors.h @@ -215,9 +215,6 @@ public: /// Returns true if the recurrence kind is a floating point kind. static bool isFloatingPointRecurrenceKind(RecurKind Kind); - /// Returns true if the recurrence kind is an arithmetic kind. - static bool isArithmeticRecurrenceKind(RecurKind Kind); - /// Returns true if the recurrence kind is an integer min/max kind. static bool isIntMinMaxRecurrenceKind(RecurKind Kind) { return Kind == RecurKind::UMin || Kind == RecurKind::UMax || diff --git a/llvm/include/llvm/Analysis/LoopCacheAnalysis.h b/llvm/include/llvm/Analysis/LoopCacheAnalysis.h index 4c5083f3c980..a323cacdbcdc 100644 --- a/llvm/include/llvm/Analysis/LoopCacheAnalysis.h +++ b/llvm/include/llvm/Analysis/LoopCacheAnalysis.h @@ -108,8 +108,9 @@ private: /// Return true if the indexed reference is 'consecutive' in loop \p L. /// An indexed reference is 'consecutive' if the only coefficient that uses /// the loop induction variable is the rightmost one, and the access stride is - /// smaller than the cache line size \p CLS. - bool isConsecutive(const Loop &L, unsigned CLS) const; + /// smaller than the cache line size \p CLS. Provide a valid \p Stride value + /// if the indexed reference is 'consecutive'. + bool isConsecutive(const Loop &L, const SCEV *&Stride, unsigned CLS) const; /// Retrieve the index of the subscript corresponding to the given loop \p /// L. Return a zero-based positive index if the subscript index is diff --git a/llvm/include/llvm/Analysis/LoopInfo.h b/llvm/include/llvm/Analysis/LoopInfo.h index 9351b83ad747..5a4f8f143093 100644 --- a/llvm/include/llvm/Analysis/LoopInfo.h +++ b/llvm/include/llvm/Analysis/LoopInfo.h @@ -814,12 +814,15 @@ public: /// by one each time through the loop. bool isCanonical(ScalarEvolution &SE) const; - /// Return true if the Loop is in LCSSA form. - bool isLCSSAForm(const DominatorTree &DT) const; - - /// Return true if this Loop and all inner subloops are in LCSSA form. - bool isRecursivelyLCSSAForm(const DominatorTree &DT, - const LoopInfo &LI) const; + /// Return true if the Loop is in LCSSA form. If \p IgnoreTokens is set to + /// true, token values defined inside loop are allowed to violate LCSSA form. + bool isLCSSAForm(const DominatorTree &DT, bool IgnoreTokens = true) const; + + /// Return true if this Loop and all inner subloops are in LCSSA form. If \p + /// IgnoreTokens is set to true, token values defined inside loop are allowed + /// to violate LCSSA form. + bool isRecursivelyLCSSAForm(const DominatorTree &DT, const LoopInfo &LI, + bool IgnoreTokens = true) const; /// Return true if the Loop is in the form that the LoopSimplify form /// transforms loops to, which is sometimes called normal form. diff --git a/llvm/include/llvm/Analysis/MemoryBuiltins.h b/llvm/include/llvm/Analysis/MemoryBuiltins.h index 7ad83612880f..422f63db749f 100644 --- a/llvm/include/llvm/Analysis/MemoryBuiltins.h +++ b/llvm/include/llvm/Analysis/MemoryBuiltins.h @@ -65,14 +65,13 @@ bool isMallocOrCallocLikeFn(const Value *V, const TargetLibraryInfo *TLI); /// allocates memory (either malloc, calloc, or strdup like). bool isAllocLikeFn(const Value *V, const TargetLibraryInfo *TLI); -/// Tests if a value is a call or invoke to a library function that -/// reallocates memory (e.g., realloc). -bool isReallocLikeFn(const Value *V, const TargetLibraryInfo *TLI); - /// Tests if a function is a call or invoke to a library function that /// reallocates memory (e.g., realloc). bool isReallocLikeFn(const Function *F, const TargetLibraryInfo *TLI); +/// If this is a call to a realloc function, return the reallocated operand. +Value *getReallocatedOperand(const CallBase *CB, const TargetLibraryInfo *TLI); + //===----------------------------------------------------------------------===// // free Call Utility Functions. // @@ -80,26 +79,23 @@ bool isReallocLikeFn(const Function *F, const TargetLibraryInfo *TLI); /// isLibFreeFunction - Returns true if the function is a builtin free() bool isLibFreeFunction(const Function *F, const LibFunc TLIFn); -/// isFreeCall - Returns non-null if the value is a call to the builtin free() -const CallInst *isFreeCall(const Value *I, const TargetLibraryInfo *TLI); - -inline CallInst *isFreeCall(Value *I, const TargetLibraryInfo *TLI) { - return const_cast<CallInst*>(isFreeCall((const Value*)I, TLI)); -} +/// If this if a call to a free function, return the freed operand. +Value *getFreedOperand(const CallBase *CB, const TargetLibraryInfo *TLI); //===----------------------------------------------------------------------===// // Properties of allocation functions // -/// Return false if the allocation can have side effects on the program state -/// we are required to preserve beyond the effect of allocating a new object. +/// Return true if this is a call to an allocation function that does not have +/// side effects that we are required to preserve beyond the effect of +/// allocating a new object. /// Ex: If our allocation routine has a counter for the number of objects /// allocated, and the program prints it on exit, can the value change due /// to optimization? Answer is highly language dependent. /// Note: *Removable* really does mean removable; it does not mean observable. /// A language (e.g. C++) can allow removing allocations without allowing /// insertion or speculative execution of allocation routines. -bool isAllocRemovable(const CallBase *V, const TargetLibraryInfo *TLI); +bool isRemovableAlloc(const CallBase *V, const TargetLibraryInfo *TLI); /// Gets the alignment argument for an aligned_alloc-like function, using either /// built-in knowledge based on fuction names/signatures or allocalign @@ -107,13 +103,16 @@ bool isAllocRemovable(const CallBase *V, const TargetLibraryInfo *TLI); /// the definition of the allocalign attribute. Value *getAllocAlignment(const CallBase *V, const TargetLibraryInfo *TLI); -/// Return the size of the requested allocation. With a trivial mapper, this is -/// identical to calling getObjectSize(..., Exact). A mapper function can be -/// used to replace one Value* (operand to the allocation) with another. This -/// is useful when doing abstract interpretation. -Optional<APInt> getAllocSize(const CallBase *CB, - const TargetLibraryInfo *TLI, - std::function<const Value*(const Value*)> Mapper); +/// Return the size of the requested allocation. With a trivial mapper, this is +/// similar to calling getObjectSize(..., Exact), but without looking through +/// calls that return their argument. A mapper function can be used to replace +/// one Value* (operand to the allocation) with another. This is useful when +/// doing abstract interpretation. +Optional<APInt> getAllocSize( + const CallBase *CB, const TargetLibraryInfo *TLI, + function_ref<const Value *(const Value *)> Mapper = [](const Value *V) { + return V; + }); /// If this is a call to an allocation function that initializes memory to a /// fixed value, return said value in the requested type. Otherwise, return diff --git a/llvm/include/llvm/Analysis/MemoryProfileInfo.h b/llvm/include/llvm/Analysis/MemoryProfileInfo.h new file mode 100644 index 000000000000..1b12e78eaeba --- /dev/null +++ b/llvm/include/llvm/Analysis/MemoryProfileInfo.h @@ -0,0 +1,112 @@ +//===- llvm/Analysis/MemoryProfileInfo.h - memory profile info ---*- C++ -*-==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains utilities to analyze memory profile information. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ANALYSIS_MEMORYPROFILEINFO_H +#define LLVM_ANALYSIS_MEMORYPROFILEINFO_H + +#include "llvm/IR/Constants.h" +#include "llvm/IR/InstrTypes.h" +#include "llvm/IR/Metadata.h" +#include "llvm/IR/Module.h" +#include <map> + +namespace llvm { +namespace memprof { + +// Allocation type assigned to an allocation reached by a given context. +// More can be added but initially this is just noncold and cold. +// Values should be powers of two so that they can be ORed, in particular to +// track allocations that have different behavior with different calling +// contexts. +enum class AllocationType : uint8_t { None = 0, NotCold = 1, Cold = 2 }; + +/// Return the allocation type for a given set of memory profile values. +AllocationType getAllocType(uint64_t MaxAccessCount, uint64_t MinSize, + uint64_t MinLifetime); + +/// Build callstack metadata from the provided list of call stack ids. Returns +/// the resulting metadata node. +MDNode *buildCallstackMetadata(ArrayRef<uint64_t> CallStack, LLVMContext &Ctx); + +/// Returns the stack node from an MIB metadata node. +MDNode *getMIBStackNode(const MDNode *MIB); + +/// Returns the allocation type from an MIB metadata node. +AllocationType getMIBAllocType(const MDNode *MIB); + +/// Class to build a trie of call stack contexts for a particular profiled +/// allocation call, along with their associated allocation types. +/// The allocation will be at the root of the trie, which is then used to +/// compute the minimum lists of context ids needed to associate a call context +/// with a single allocation type. +class CallStackTrie { +private: + struct CallStackTrieNode { + // Allocation types for call context sharing the context prefix at this + // node. + uint8_t AllocTypes; + // Map of caller stack id to the corresponding child Trie node. + std::map<uint64_t, CallStackTrieNode *> Callers; + CallStackTrieNode(AllocationType Type) + : AllocTypes(static_cast<uint8_t>(Type)) {} + }; + + // The node for the allocation at the root. + CallStackTrieNode *Alloc; + // The allocation's leaf stack id. + uint64_t AllocStackId; + + void deleteTrieNode(CallStackTrieNode *Node) { + if (!Node) + return; + for (auto C : Node->Callers) + deleteTrieNode(C.second); + delete Node; + } + + // Recursive helper to trim contexts and create metadata nodes. + bool buildMIBNodes(CallStackTrieNode *Node, LLVMContext &Ctx, + std::vector<uint64_t> &MIBCallStack, + std::vector<Metadata *> &MIBNodes, + bool CalleeHasAmbiguousCallerContext); + +public: + CallStackTrie() : Alloc(nullptr), AllocStackId(0) {} + ~CallStackTrie() { deleteTrieNode(Alloc); } + + bool empty() const { return Alloc == nullptr; } + + /// Add a call stack context with the given allocation type to the Trie. + /// The context is represented by the list of stack ids (computed during + /// matching via a debug location hash), expected to be in order from the + /// allocation call down to the bottom of the call stack (i.e. callee to + /// caller order). + void addCallStack(AllocationType AllocType, ArrayRef<uint64_t> StackIds); + + /// Add the call stack context along with its allocation type from the MIB + /// metadata to the Trie. + void addCallStack(MDNode *MIB); + + /// Build and attach the minimal necessary MIB metadata. If the alloc has a + /// single allocation type, add a function attribute instead. The reason for + /// adding an attribute in this case is that it matches how the behavior for + /// allocation calls will be communicated to lib call simplification after + /// cloning or another optimization to distinguish the allocation types, + /// which is lower overhead and more direct than maintaining this metadata. + /// Returns true if memprof metadata attached, false if not (attribute added). + bool buildAndAttachMIBMetadata(CallBase *CI); +}; + +} // end namespace memprof +} // end namespace llvm + +#endif diff --git a/llvm/include/llvm/Analysis/ScalarEvolution.h b/llvm/include/llvm/Analysis/ScalarEvolution.h index 5b49ab14286b..fd00c744840b 100644 --- a/llvm/include/llvm/Analysis/ScalarEvolution.h +++ b/llvm/include/llvm/Analysis/ScalarEvolution.h @@ -1374,11 +1374,11 @@ private: /// Expression indicating the least constant maximum backedge-taken count of /// the loop that is known, or a SCEVCouldNotCompute. This expression is /// only valid if the redicates associated with all loop exits are true. - const SCEV *ConstantMax; + const SCEV *ConstantMax = nullptr; /// Indicating if \c ExitNotTaken has an element for every exiting block in /// the loop. - bool IsComplete; + bool IsComplete = false; /// Expression indicating the least maximum backedge-taken count of the loop /// that is known, or a SCEVCouldNotCompute. Lazily computed on first query. @@ -1391,7 +1391,7 @@ private: const SCEV *getConstantMax() const { return ConstantMax; } public: - BackedgeTakenInfo() : ConstantMax(nullptr), IsComplete(false) {} + BackedgeTakenInfo() = default; BackedgeTakenInfo(BackedgeTakenInfo &&) = default; BackedgeTakenInfo &operator=(BackedgeTakenInfo &&) = default; diff --git a/llvm/include/llvm/Analysis/SparsePropagation.h b/llvm/include/llvm/Analysis/SparsePropagation.h index 428238c5fa0b..d5805a731475 100644 --- a/llvm/include/llvm/Analysis/SparsePropagation.h +++ b/llvm/include/llvm/Analysis/SparsePropagation.h @@ -331,8 +331,8 @@ void SparseSolver<LatticeKey, LatticeVal, KeyInfo>::getFeasibleSuccessors( return; } - if (TI.isExceptionalTerminator() || - TI.isIndirectTerminator()) { + if (!isa<SwitchInst>(TI)) { + // Unknown termintor, assume all successors are feasible. Succs.assign(Succs.size(), true); return; } diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h index c64cb51cc08e..da4410fcac14 100644 --- a/llvm/include/llvm/Analysis/TargetTransformInfo.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h @@ -52,6 +52,7 @@ class LoadInst; class LoopAccessInfo; class Loop; class LoopInfo; +class LoopVectorizationLegality; class ProfileSummaryInfo; class RecurrenceDescriptor; class SCEV; @@ -129,7 +130,8 @@ class IntrinsicCostAttributes { public: IntrinsicCostAttributes( Intrinsic::ID Id, const CallBase &CI, - InstructionCost ScalarCost = InstructionCost::getInvalid()); + InstructionCost ScalarCost = InstructionCost::getInvalid(), + bool TypeBasedOnly = false); IntrinsicCostAttributes( Intrinsic::ID Id, Type *RTy, ArrayRef<Type *> Tys, @@ -530,7 +532,7 @@ public: bool preferPredicateOverEpilogue(Loop *L, LoopInfo *LI, ScalarEvolution &SE, AssumptionCache &AC, TargetLibraryInfo *TLI, DominatorTree *DT, - const LoopAccessInfo *LAI) const; + LoopVectorizationLegality *LVL) const; /// Query the target whether lowering of the llvm.get.active.lane.mask /// intrinsic is supported and how the mask should be used. A return value @@ -1555,10 +1557,12 @@ public: AssumptionCache &AC, TargetLibraryInfo *LibInfo, HardwareLoopInfo &HWLoopInfo) = 0; - virtual bool - preferPredicateOverEpilogue(Loop *L, LoopInfo *LI, ScalarEvolution &SE, - AssumptionCache &AC, TargetLibraryInfo *TLI, - DominatorTree *DT, const LoopAccessInfo *LAI) = 0; + virtual bool preferPredicateOverEpilogue(Loop *L, LoopInfo *LI, + ScalarEvolution &SE, + AssumptionCache &AC, + TargetLibraryInfo *TLI, + DominatorTree *DT, + LoopVectorizationLegality *LVL) = 0; virtual PredicationStyle emitGetActiveLaneMask() = 0; virtual Optional<Instruction *> instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) = 0; @@ -1935,8 +1939,8 @@ public: bool preferPredicateOverEpilogue(Loop *L, LoopInfo *LI, ScalarEvolution &SE, AssumptionCache &AC, TargetLibraryInfo *TLI, DominatorTree *DT, - const LoopAccessInfo *LAI) override { - return Impl.preferPredicateOverEpilogue(L, LI, SE, AC, TLI, DT, LAI); + LoopVectorizationLegality *LVL) override { + return Impl.preferPredicateOverEpilogue(L, LI, SE, AC, TLI, DT, LVL); } PredicationStyle emitGetActiveLaneMask() override { return Impl.emitGetActiveLaneMask(); diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h index af71fc9bffaf..1a75cb35549e 100644 --- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h @@ -163,7 +163,7 @@ public: bool preferPredicateOverEpilogue(Loop *L, LoopInfo *LI, ScalarEvolution &SE, AssumptionCache &AC, TargetLibraryInfo *TLI, DominatorTree *DT, - const LoopAccessInfo *LAI) const { + LoopVectorizationLegality *LVL) const { return false; } diff --git a/llvm/include/llvm/BinaryFormat/XCOFF.h b/llvm/include/llvm/BinaryFormat/XCOFF.h index 5d23ec5cd911..51e377293e95 100644 --- a/llvm/include/llvm/BinaryFormat/XCOFF.h +++ b/llvm/include/llvm/BinaryFormat/XCOFF.h @@ -31,6 +31,7 @@ constexpr size_t FileHeaderSize32 = 20; constexpr size_t FileHeaderSize64 = 24; constexpr size_t AuxFileHeaderSize32 = 72; constexpr size_t AuxFileHeaderSize64 = 110; +constexpr size_t AuxFileHeaderSizeShort = 28; constexpr size_t SectionHeaderSize32 = 40; constexpr size_t SectionHeaderSize64 = 72; constexpr size_t SymbolTableEntrySize = 18; diff --git a/llvm/include/llvm/CodeGen/BasicBlockSectionUtils.h b/llvm/include/llvm/CodeGen/BasicBlockSectionUtils.h index d8da3be0cd4c..d43f399b2c31 100644 --- a/llvm/include/llvm/CodeGen/BasicBlockSectionUtils.h +++ b/llvm/include/llvm/CodeGen/BasicBlockSectionUtils.h @@ -25,6 +25,8 @@ using MachineBasicBlockComparator = void sortBasicBlocksAndUpdateBranches(MachineFunction &MF, MachineBasicBlockComparator MBBCmp); +void avoidZeroOffsetLandingPad(MachineFunction &MF); + } // end namespace llvm #endif // LLVM_CODEGEN_BASICBLOCKSECTIONUTILS_H diff --git a/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h b/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h index 7ae1304cced9..557339548581 100644 --- a/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h +++ b/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h @@ -12,8 +12,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_ANALYSIS_BASICBLOCKSECTIONSINFO_H -#define LLVM_ANALYSIS_BASICBLOCKSECTIONSINFO_H +#ifndef LLVM_CODEGEN_BASICBLOCKSECTIONSPROFILEREADER_H +#define LLVM_CODEGEN_BASICBLOCKSECTIONSPROFILEREADER_H #include "llvm/ADT/Optional.h" #include "llvm/ADT/SmallSet.h" @@ -106,4 +106,4 @@ ImmutablePass * createBasicBlockSectionsProfileReaderPass(const MemoryBuffer *Buf); } // namespace llvm -#endif // LLVM_ANALYSIS_BASICBLOCKSECTIONSINFO_H +#endif // LLVM_CODEGEN_BASICBLOCKSECTIONSPROFILEREADER_H diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h index b5b766ff03f1..c35a9e878613 100644 --- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h +++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h @@ -603,8 +603,8 @@ public: bool preferPredicateOverEpilogue(Loop *L, LoopInfo *LI, ScalarEvolution &SE, AssumptionCache &AC, TargetLibraryInfo *TLI, DominatorTree *DT, - const LoopAccessInfo *LAI) { - return BaseT::preferPredicateOverEpilogue(L, LI, SE, AC, TLI, DT, LAI); + LoopVectorizationLegality *LVL) { + return BaseT::preferPredicateOverEpilogue(L, LI, SE, AC, TLI, DT, LVL); } PredicationStyle emitGetActiveLaneMask() { diff --git a/llvm/include/llvm/CodeGen/CallingConvLower.h b/llvm/include/llvm/CodeGen/CallingConvLower.h index 90afbfc32a4e..5c3776e972c0 100644 --- a/llvm/include/llvm/CodeGen/CallingConvLower.h +++ b/llvm/include/llvm/CodeGen/CallingConvLower.h @@ -435,8 +435,8 @@ public: /// Note that, unlike AllocateReg, this shadows ALL of the shadow registers. unsigned AllocateStack(unsigned Size, Align Alignment, ArrayRef<MCPhysReg> ShadowRegs) { - for (unsigned i = 0; i < ShadowRegs.size(); ++i) - MarkAllocated(ShadowRegs[i]); + for (MCPhysReg Reg : ShadowRegs) + MarkAllocated(Reg); return AllocateStack(Size, Alignment); } diff --git a/llvm/include/llvm/CodeGen/GlobalISel/IRTranslator.h b/llvm/include/llvm/CodeGen/GlobalISel/IRTranslator.h index 5e7428a5edc5..f7fafdc57401 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/IRTranslator.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/IRTranslator.h @@ -569,6 +569,7 @@ private: /// Current optimization remark emitter. Used to report failures. std::unique_ptr<OptimizationRemarkEmitter> ORE; + AAResults *AA; FunctionLoweringInfo FuncInfo; // True when either the Target Machine specifies no optimizations or the diff --git a/llvm/include/llvm/CodeGen/GlobalISel/Utils.h b/llvm/include/llvm/CodeGen/GlobalISel/Utils.h index 78f1b49da822..31f3d5d84186 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/Utils.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/Utils.h @@ -48,7 +48,6 @@ class TargetRegisterInfo; class TargetRegisterClass; class ConstantFP; class APFloat; -class MachineIRBuilder; // Convenience macros for dealing with vector reduction opcodes. #define GISEL_VECREDUCE_CASES_ALL \ diff --git a/llvm/include/llvm/CodeGen/ISDOpcodes.h b/llvm/include/llvm/CodeGen/ISDOpcodes.h index 14bbcd24d04d..e90730140406 100644 --- a/llvm/include/llvm/CodeGen/ISDOpcodes.h +++ b/llvm/include/llvm/CodeGen/ISDOpcodes.h @@ -1292,6 +1292,12 @@ enum NodeType { // Outputs: output chain, glue STACKMAP, + // The `llvm.experimental.patchpoint.*` intrinsic. + // Operands: input chain, [glue], reg-mask, <id>, <numShadowBytes>, callee, + // <numArgs>, cc, ... + // Outputs: [rv], output chain, glue + PATCHPOINT, + // Vector Predication #define BEGIN_REGISTER_VP_SDNODE(VPSDID, ...) VPSDID, #include "llvm/IR/VPIntrinsics.def" diff --git a/llvm/include/llvm/CodeGen/LiveIntervals.h b/llvm/include/llvm/CodeGen/LiveIntervals.h index b832eaa37305..b26aa773c9ea 100644 --- a/llvm/include/llvm/CodeGen/LiveIntervals.h +++ b/llvm/include/llvm/CodeGen/LiveIntervals.h @@ -55,8 +55,7 @@ class VirtRegMap; MachineFunction* MF; MachineRegisterInfo* MRI; const TargetRegisterInfo* TRI; - const TargetInstrInfo* TII; - AAResults *AA; + const TargetInstrInfo *TII; SlotIndexes* Indexes; MachineDominatorTree *DomTree = nullptr; LiveIntervalCalc *LICalc = nullptr; @@ -212,10 +211,6 @@ class VirtRegMap; return Indexes; } - AAResults *getAliasAnalysis() const { - return AA; - } - /// Returns true if the specified machine instr has been removed or was /// never entered in the map. bool isNotInMIMap(const MachineInstr &Instr) const { diff --git a/llvm/include/llvm/CodeGen/LiveRangeEdit.h b/llvm/include/llvm/CodeGen/LiveRangeEdit.h index c6efa7b30d71..3b61563cb598 100644 --- a/llvm/include/llvm/CodeGen/LiveRangeEdit.h +++ b/llvm/include/llvm/CodeGen/LiveRangeEdit.h @@ -32,7 +32,6 @@ namespace llvm { -class AAResults; class LiveIntervals; class MachineInstr; class MachineOperand; @@ -93,7 +92,7 @@ private: SmallPtrSet<const VNInfo *, 4> Rematted; /// scanRemattable - Identify the Parent values that may rematerialize. - void scanRemattable(AAResults *aa); + void scanRemattable(); /// foldAsLoad - If LI has a single use and a single def that can be folded as /// a load, eliminate the register by folding the def into the use. @@ -103,8 +102,7 @@ private: SmallPtrSet<LiveInterval *, 8>>; /// Helper for eliminateDeadDefs. - void eliminateDeadDef(MachineInstr *MI, ToShrinkSet &ToShrink, - AAResults *AA); + void eliminateDeadDef(MachineInstr *MI, ToShrinkSet &ToShrink); /// MachineRegisterInfo callback to notify when new virtual /// registers are created. @@ -184,12 +182,11 @@ public: /// anyRematerializable - Return true if any parent values may be /// rematerializable. /// This function must be called before any rematerialization is attempted. - bool anyRematerializable(AAResults *); + bool anyRematerializable(); /// checkRematerializable - Manually add VNI to the list of rematerializable /// values if DefMI may be rematerializable. - bool checkRematerializable(VNInfo *VNI, const MachineInstr *DefMI, - AAResults *); + bool checkRematerializable(VNInfo *VNI, const MachineInstr *DefMI); /// Remat - Information needed to rematerialize at a specific location. struct Remat { @@ -242,8 +239,7 @@ public: /// allocator. These registers should not be split into new intervals /// as currently those new intervals are not guaranteed to spill. void eliminateDeadDefs(SmallVectorImpl<MachineInstr *> &Dead, - ArrayRef<Register> RegsBeingSpilled = None, - AAResults *AA = nullptr); + ArrayRef<Register> RegsBeingSpilled = None); /// calculateRegClassAndHint - Recompute register class and hint for each new /// register. diff --git a/llvm/include/llvm/CodeGen/LiveVariables.h b/llvm/include/llvm/CodeGen/LiveVariables.h index aa198527415d..03a0517d2642 100644 --- a/llvm/include/llvm/CodeGen/LiveVariables.h +++ b/llvm/include/llvm/CodeGen/LiveVariables.h @@ -219,8 +219,7 @@ public: return false; bool Removed = false; - for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { - MachineOperand &MO = MI.getOperand(i); + for (MachineOperand &MO : MI.operands()) { if (MO.isReg() && MO.isKill() && MO.getReg() == Reg) { MO.setIsKill(false); Removed = true; @@ -255,8 +254,7 @@ public: return false; bool Removed = false; - for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { - MachineOperand &MO = MI.getOperand(i); + for (MachineOperand &MO : MI.operands()) { if (MO.isReg() && MO.isDef() && MO.getReg() == Reg) { MO.setIsDead(false); Removed = true; diff --git a/llvm/include/llvm/CodeGen/MachineInstr.h b/llvm/include/llvm/CodeGen/MachineInstr.h index acc4c9a24c01..5f483a8d0312 100644 --- a/llvm/include/llvm/CodeGen/MachineInstr.h +++ b/llvm/include/llvm/CodeGen/MachineInstr.h @@ -572,12 +572,9 @@ public: /// Returns true if the instruction has implicit definition. bool hasImplicitDef() const { - for (unsigned I = getNumExplicitOperands(), E = getNumOperands(); - I != E; ++I) { - const MachineOperand &MO = getOperand(I); + for (const MachineOperand &MO : implicit_operands()) if (MO.isDef() && MO.isImplicit()) return true; - } return false; } @@ -1620,7 +1617,7 @@ public: /// argument area of a function (if it does not change). If the instruction /// does multiple loads, this returns true only if all of the loads are /// dereferenceable and invariant. - bool isDereferenceableInvariantLoad(AAResults *AA) const; + bool isDereferenceableInvariantLoad() const; /// If the specified instruction is a PHI that always merges together the /// same virtual register, return the register, otherwise return 0. diff --git a/llvm/include/llvm/CodeGen/MachineModuleInfo.h b/llvm/include/llvm/CodeGen/MachineModuleInfo.h index cdd0073749d3..61240924e5e1 100644 --- a/llvm/include/llvm/CodeGen/MachineModuleInfo.h +++ b/llvm/include/llvm/CodeGen/MachineModuleInfo.h @@ -41,7 +41,6 @@ namespace llvm { -class BasicBlock; class Function; class LLVMTargetMachine; class MachineFunction; diff --git a/llvm/include/llvm/CodeGen/MachinePipeliner.h b/llvm/include/llvm/CodeGen/MachinePipeliner.h index 4559f7a9bde7..fc1cc0a879ca 100644 --- a/llvm/include/llvm/CodeGen/MachinePipeliner.h +++ b/llvm/include/llvm/CodeGen/MachinePipeliner.h @@ -333,9 +333,9 @@ public: NodeSet() = default; NodeSet(iterator S, iterator E) : Nodes(S, E), HasRecurrence(true) { Latency = 0; - for (unsigned i = 0, e = Nodes.size(); i < e; ++i) { + for (const SUnit *Node : Nodes) { DenseMap<SUnit *, unsigned> SuccSUnitLatency; - for (const SDep &Succ : Nodes[i]->Succs) { + for (const SDep &Succ : Node->Succs) { auto SuccSUnit = Succ.getSUnit(); if (!Nodes.count(SuccSUnit)) continue; diff --git a/llvm/include/llvm/CodeGen/Passes.h b/llvm/include/llvm/CodeGen/Passes.h index 6e37d42f0d29..9822f8013e91 100644 --- a/llvm/include/llvm/CodeGen/Passes.h +++ b/llvm/include/llvm/CodeGen/Passes.h @@ -26,7 +26,6 @@ namespace llvm { class FunctionPass; class MachineFunction; class MachineFunctionPass; -class MemoryBuffer; class ModulePass; class Pass; class TargetMachine; diff --git a/llvm/include/llvm/CodeGen/RegisterScavenging.h b/llvm/include/llvm/CodeGen/RegisterScavenging.h index 1f0cd273bf61..52797afbd848 100644 --- a/llvm/include/llvm/CodeGen/RegisterScavenging.h +++ b/llvm/include/llvm/CodeGen/RegisterScavenging.h @@ -146,9 +146,8 @@ public: /// Query whether a frame index is a scavenging frame index. bool isScavengingFrameIndex(int FI) const { - for (SmallVectorImpl<ScavengedInfo>::const_iterator I = Scavenged.begin(), - IE = Scavenged.end(); I != IE; ++I) - if (I->FrameIndex == FI) + for (const ScavengedInfo &SI : Scavenged) + if (SI.FrameIndex == FI) return true; return false; @@ -156,10 +155,9 @@ public: /// Get an array of scavenging frame indices. void getScavengingFrameIndices(SmallVectorImpl<int> &A) const { - for (SmallVectorImpl<ScavengedInfo>::const_iterator I = Scavenged.begin(), - IE = Scavenged.end(); I != IE; ++I) - if (I->FrameIndex >= 0) - A.push_back(I->FrameIndex); + for (const ScavengedInfo &I : Scavenged) + if (I.FrameIndex >= 0) + A.push_back(I.FrameIndex); } /// Make a register of the specific register class diff --git a/llvm/include/llvm/CodeGen/ScheduleDAG.h b/llvm/include/llvm/CodeGen/ScheduleDAG.h index f1c377f76d02..2fe2aabe833e 100644 --- a/llvm/include/llvm/CodeGen/ScheduleDAG.h +++ b/llvm/include/llvm/CodeGen/ScheduleDAG.h @@ -525,9 +525,8 @@ class TargetRegisterInfo; virtual void push(SUnit *U) = 0; void push_all(const std::vector<SUnit *> &Nodes) { - for (std::vector<SUnit *>::const_iterator I = Nodes.begin(), - E = Nodes.end(); I != E; ++I) - push(*I); + for (SUnit *SU : Nodes) + push(SU); } virtual SUnit *pop() = 0; diff --git a/llvm/include/llvm/CodeGen/SelectionDAG.h b/llvm/include/llvm/CodeGen/SelectionDAG.h index bcbd7ebcc0c9..1169e0116ec8 100644 --- a/llvm/include/llvm/CodeGen/SelectionDAG.h +++ b/llvm/include/llvm/CodeGen/SelectionDAG.h @@ -1043,13 +1043,15 @@ public: bool AlwaysInline, bool isTailCall, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo, - const AAMDNodes &AAInfo = AAMDNodes()); + const AAMDNodes &AAInfo = AAMDNodes(), + AAResults *AA = nullptr); SDValue getMemmove(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Src, SDValue Size, Align Alignment, bool isVol, bool isTailCall, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo, - const AAMDNodes &AAInfo = AAMDNodes()); + const AAMDNodes &AAInfo = AAMDNodes(), + AAResults *AA = nullptr); SDValue getMemset(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Src, SDValue Size, Align Alignment, bool isVol, diff --git a/llvm/include/llvm/CodeGen/SelectionDAGISel.h b/llvm/include/llvm/CodeGen/SelectionDAGISel.h index 87df6d1b1604..e152503f9e1a 100644 --- a/llvm/include/llvm/CodeGen/SelectionDAGISel.h +++ b/llvm/include/llvm/CodeGen/SelectionDAGISel.h @@ -321,7 +321,11 @@ private: void Select_FREEZE(SDNode *N); void Select_ARITH_FENCE(SDNode *N); + + void pushStackMapLiveVariable(SmallVectorImpl<SDValue> &Ops, SDValue Operand, + SDLoc DL); void Select_STACKMAP(SDNode *N); + void Select_PATCHPOINT(SDNode *N); private: void DoInstructionSelection(); diff --git a/llvm/include/llvm/CodeGen/TargetInstrInfo.h b/llvm/include/llvm/CodeGen/TargetInstrInfo.h index f9183e0a9c66..a1c9061baee6 100644 --- a/llvm/include/llvm/CodeGen/TargetInstrInfo.h +++ b/llvm/include/llvm/CodeGen/TargetInstrInfo.h @@ -121,12 +121,11 @@ public: /// This means the only allowed uses are constants and unallocatable physical /// registers so that the instructions result is independent of the place /// in the function. - bool isTriviallyReMaterializable(const MachineInstr &MI, - AAResults *AA = nullptr) const { + bool isTriviallyReMaterializable(const MachineInstr &MI) const { return MI.getOpcode() == TargetOpcode::IMPLICIT_DEF || (MI.getDesc().isRematerializable() && - (isReallyTriviallyReMaterializable(MI, AA) || - isReallyTriviallyReMaterializableGeneric(MI, AA))); + (isReallyTriviallyReMaterializable(MI) || + isReallyTriviallyReMaterializableGeneric(MI))); } /// Given \p MO is a PhysReg use return if it can be ignored for the purpose @@ -143,8 +142,7 @@ protected: /// than producing a value, or if it requres any address registers that are /// not always available. /// Requirements must be check as stated in isTriviallyReMaterializable() . - virtual bool isReallyTriviallyReMaterializable(const MachineInstr &MI, - AAResults *AA) const { + virtual bool isReallyTriviallyReMaterializable(const MachineInstr &MI) const { return false; } @@ -186,8 +184,7 @@ private: /// set and the target hook isReallyTriviallyReMaterializable returns false, /// this function does target-independent tests to determine if the /// instruction is really trivially rematerializable. - bool isReallyTriviallyReMaterializableGeneric(const MachineInstr &MI, - AAResults *AA) const; + bool isReallyTriviallyReMaterializableGeneric(const MachineInstr &MI) const; public: /// These methods return the opcode of the frame setup/destroy instructions diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h index ab5d3ba0164d..1bb2a8e50c07 100644 --- a/llvm/include/llvm/CodeGen/TargetLowering.h +++ b/llvm/include/llvm/CodeGen/TargetLowering.h @@ -3874,7 +3874,7 @@ public: virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const; /// Return true if it is profitable to move this shift by a constant amount - /// though its operand, adjusting any immediate operands as necessary to + /// through its operand, adjusting any immediate operands as necessary to /// preserve semantics. This transformation may not be desirable if it /// disrupts a particularly auspicious target-specific tree (e.g. bitfield /// extraction in AArch64). By default, it returns true. @@ -3886,6 +3886,14 @@ public: return true; } + /// Return true if it is profitable to combine an XOR of a logical shift + /// to create a logical shift of NOT. This transformation may not be desirable + /// if it disrupts a particularly auspicious target-specific tree (e.g. + /// BIC on ARM/AArch64). By default, it returns true. + virtual bool isDesirableToCommuteXorWithShift(const SDNode *N) const { + return true; + } + /// Return true if the target has native support for the specified value type /// and it is 'desirable' to use the type for the given node type. e.g. On x86 /// i16 is legal, but undesirable since i16 instruction encodings are longer diff --git a/llvm/include/llvm/DWARFLinker/DWARFLinker.h b/llvm/include/llvm/DWARFLinker/DWARFLinker.h index b2b2e2e873be..3961100e00e1 100644 --- a/llvm/include/llvm/DWARFLinker/DWARFLinker.h +++ b/llvm/include/llvm/DWARFLinker/DWARFLinker.h @@ -9,6 +9,7 @@ #ifndef LLVM_DWARFLINKER_DWARFLINKER_H #define LLVM_DWARFLINKER_DWARFLINKER_H +#include "llvm/ADT/AddressRanges.h" #include "llvm/CodeGen/AccelTable.h" #include "llvm/CodeGen/NonRelocatableStringpool.h" #include "llvm/DWARFLinker/DWARFLinkerCompileUnit.h" @@ -37,25 +38,6 @@ enum class DwarfLinkerAccelTableKind : uint8_t { Pub, ///< .debug_pubnames, .debug_pubtypes }; -/// Partial address range. Besides an offset, only the -/// HighPC is stored. The structure is stored in a map where the LowPC is the -/// key. -struct ObjFileAddressRange { - /// Function HighPC. - uint64_t HighPC; - /// Offset to apply to the linked address. - /// should be 0 for not-linked object file. - int64_t Offset; - - ObjFileAddressRange(uint64_t EndPC, int64_t Offset) - : HighPC(EndPC), Offset(Offset) {} - - ObjFileAddressRange() : HighPC(0), Offset(0) {} -}; - -/// Map LowPC to ObjFileAddressRange. -using RangesTy = std::map<uint64_t, ObjFileAddressRange>; - /// AddressesMap represents information about valid addresses used /// by debug information. Valid addresses are those which points to /// live code sections. i.e. relocations for these addresses point @@ -142,7 +124,7 @@ public: /// original \p Entries. virtual void emitRangesEntries( int64_t UnitPcOffset, uint64_t OrigLowPc, - const FunctionIntervals::const_iterator &FuncRange, + Optional<std::pair<AddressRange, int64_t>> FuncRange, const std::vector<DWARFDebugRangeList::RangeListEntry> &Entries, unsigned AddressSize) = 0; diff --git a/llvm/include/llvm/DWARFLinker/DWARFLinkerCompileUnit.h b/llvm/include/llvm/DWARFLinker/DWARFLinkerCompileUnit.h index 788275782235..05e291c05132 100644 --- a/llvm/include/llvm/DWARFLinker/DWARFLinkerCompileUnit.h +++ b/llvm/include/llvm/DWARFLinker/DWARFLinkerCompileUnit.h @@ -9,8 +9,8 @@ #ifndef LLVM_DWARFLINKER_DWARFLINKERCOMPILEUNIT_H #define LLVM_DWARFLINKER_DWARFLINKERCOMPILEUNIT_H +#include "llvm/ADT/AddressRanges.h" #include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/IntervalMap.h" #include "llvm/CodeGen/DIE.h" #include "llvm/DebugInfo/DWARF/DWARFUnit.h" @@ -18,12 +18,9 @@ namespace llvm { class DeclContext; -template <typename KeyT, typename ValT> -using HalfOpenIntervalMap = - IntervalMap<KeyT, ValT, IntervalMapImpl::NodeSizer<KeyT, ValT>::LeafSize, - IntervalMapHalfOpenInfo<KeyT>>; - -using FunctionIntervals = HalfOpenIntervalMap<uint64_t, int64_t>; +/// Mapped value in the address map is the offset to apply to the +/// linked address. +using RangesTy = AddressRangesMap<int64_t>; // FIXME: Delete this structure. struct PatchLocation { @@ -84,8 +81,7 @@ public: CompileUnit(DWARFUnit &OrigUnit, unsigned ID, bool CanUseODR, StringRef ClangModuleName) - : OrigUnit(OrigUnit), ID(ID), Ranges(RangeAlloc), - ClangModuleName(ClangModuleName) { + : OrigUnit(OrigUnit), ID(ID), ClangModuleName(ClangModuleName) { Info.resize(OrigUnit.getNumDIEs()); auto CUDie = OrigUnit.getUnitDIE(false); @@ -143,7 +139,7 @@ public: return UnitRangeAttribute; } - const FunctionIntervals &getFunctionRanges() const { return Ranges; } + const RangesTy &getFunctionRanges() const { return Ranges; } const std::vector<PatchLocation> &getRangesAttributes() const { return RangeAttributes; @@ -266,12 +262,10 @@ private: std::tuple<DIE *, const CompileUnit *, DeclContext *, PatchLocation>> ForwardDIEReferences; - FunctionIntervals::Allocator RangeAlloc; - - /// The ranges in that interval map are the PC ranges for - /// functions in this unit, associated with the PC offset to apply - /// to the addresses to get the linked address. - FunctionIntervals Ranges; + /// The ranges in that map are the PC ranges for functions in this unit, + /// associated with the PC offset to apply to the addresses to get + /// the linked address. + RangesTy Ranges; /// The DW_AT_low_pc of each DW_TAG_label. SmallDenseMap<uint64_t, uint64_t, 1> Labels; diff --git a/llvm/include/llvm/DWARFLinker/DWARFStreamer.h b/llvm/include/llvm/DWARFLinker/DWARFStreamer.h index 003fe548252a..0ccab0efa8f4 100644 --- a/llvm/include/llvm/DWARFLinker/DWARFStreamer.h +++ b/llvm/include/llvm/DWARFLinker/DWARFStreamer.h @@ -96,7 +96,7 @@ public: /// original \p Entries. void emitRangesEntries( int64_t UnitPcOffset, uint64_t OrigLowPc, - const FunctionIntervals::const_iterator &FuncRange, + Optional<std::pair<AddressRange, int64_t>> FuncRange, const std::vector<DWARFDebugRangeList::RangeListEntry> &Entries, unsigned AddressSize) override; diff --git a/llvm/include/llvm/DebugInfo/CodeView/CodeViewRegisters.def b/llvm/include/llvm/DebugInfo/CodeView/CodeViewRegisters.def index 4cee3abdde87..5d537755b2d6 100644 --- a/llvm/include/llvm/DebugInfo/CodeView/CodeViewRegisters.def +++ b/llvm/include/llvm/DebugInfo/CodeView/CodeViewRegisters.def @@ -523,10 +523,12 @@ CV_REGISTER(ARM_NQ15, 415) #if defined(CV_REGISTERS_ALL) || defined(CV_REGISTERS_ARM64) -// arm64intr.h from MSVC defines ARM64_FPSR, which conflicts with +// arm64intr.h from MSVC defines ARM64_FPSR and ARM64_FPCR, which conflicts with // these declarations. #pragma push_macro("ARM64_FPSR") +#pragma push_macro("ARM64_FPCR") #undef ARM64_FPSR +#undef ARM64_FPCR // ARM64 registers @@ -715,7 +717,79 @@ CV_REGISTER(ARM64_Q31, 211) // Floating point status register CV_REGISTER(ARM64_FPSR, 220) +CV_REGISTER(ARM64_FPCR, 221) + +// 8 bit floating point registers + +CV_REGISTER(ARM64_B0, 230) +CV_REGISTER(ARM64_B1, 231) +CV_REGISTER(ARM64_B2, 232) +CV_REGISTER(ARM64_B3, 233) +CV_REGISTER(ARM64_B4, 234) +CV_REGISTER(ARM64_B5, 235) +CV_REGISTER(ARM64_B6, 236) +CV_REGISTER(ARM64_B7, 237) +CV_REGISTER(ARM64_B8, 238) +CV_REGISTER(ARM64_B9, 239) +CV_REGISTER(ARM64_B10, 240) +CV_REGISTER(ARM64_B11, 241) +CV_REGISTER(ARM64_B12, 242) +CV_REGISTER(ARM64_B13, 243) +CV_REGISTER(ARM64_B14, 244) +CV_REGISTER(ARM64_B15, 245) +CV_REGISTER(ARM64_B16, 246) +CV_REGISTER(ARM64_B17, 247) +CV_REGISTER(ARM64_B18, 248) +CV_REGISTER(ARM64_B19, 249) +CV_REGISTER(ARM64_B20, 250) +CV_REGISTER(ARM64_B21, 251) +CV_REGISTER(ARM64_B22, 252) +CV_REGISTER(ARM64_B23, 253) +CV_REGISTER(ARM64_B24, 254) +CV_REGISTER(ARM64_B25, 255) +CV_REGISTER(ARM64_B26, 256) +CV_REGISTER(ARM64_B27, 257) +CV_REGISTER(ARM64_B28, 258) +CV_REGISTER(ARM64_B29, 259) +CV_REGISTER(ARM64_B30, 260) +CV_REGISTER(ARM64_B31, 261) + +// 16 bit floating point registers + +CV_REGISTER(ARM64_H0, 270) +CV_REGISTER(ARM64_H1, 271) +CV_REGISTER(ARM64_H2, 272) +CV_REGISTER(ARM64_H3, 273) +CV_REGISTER(ARM64_H4, 274) +CV_REGISTER(ARM64_H5, 275) +CV_REGISTER(ARM64_H6, 276) +CV_REGISTER(ARM64_H7, 277) +CV_REGISTER(ARM64_H8, 278) +CV_REGISTER(ARM64_H9, 279) +CV_REGISTER(ARM64_H10, 280) +CV_REGISTER(ARM64_H11, 281) +CV_REGISTER(ARM64_H12, 282) +CV_REGISTER(ARM64_H13, 283) +CV_REGISTER(ARM64_H14, 284) +CV_REGISTER(ARM64_H15, 285) +CV_REGISTER(ARM64_H16, 286) +CV_REGISTER(ARM64_H17, 287) +CV_REGISTER(ARM64_H18, 288) +CV_REGISTER(ARM64_H19, 289) +CV_REGISTER(ARM64_H20, 290) +CV_REGISTER(ARM64_H21, 291) +CV_REGISTER(ARM64_H22, 292) +CV_REGISTER(ARM64_H23, 293) +CV_REGISTER(ARM64_H24, 294) +CV_REGISTER(ARM64_H25, 295) +CV_REGISTER(ARM64_H26, 296) +CV_REGISTER(ARM64_H27, 297) +CV_REGISTER(ARM64_H28, 298) +CV_REGISTER(ARM64_H29, 299) +CV_REGISTER(ARM64_H30, 300) +CV_REGISTER(ARM64_H31, 301) #pragma pop_macro("ARM64_FPSR") +#pragma pop_macro("ARM64_FPCR") #endif // defined(CV_REGISTERS_ALL) || defined(CV_REGISTERS_ARM64) diff --git a/llvm/include/llvm/DebugInfo/Symbolize/Markup.h b/llvm/include/llvm/DebugInfo/Symbolize/Markup.h index 2628b47cf6d3..4f2b0de481ec 100644 --- a/llvm/include/llvm/DebugInfo/Symbolize/Markup.h +++ b/llvm/include/llvm/DebugInfo/Symbolize/Markup.h @@ -84,6 +84,10 @@ public: /// \returns the next markup node or None if none remain. Optional<MarkupNode> nextNode(); + bool isSGR(const MarkupNode &Node) const { + return SGRSyntax.match(Node.Text); + } + private: Optional<MarkupNode> parseElement(StringRef Line); void parseTextOutsideMarkup(StringRef Text); diff --git a/llvm/include/llvm/DebugInfo/Symbolize/MarkupFilter.h b/llvm/include/llvm/DebugInfo/Symbolize/MarkupFilter.h index b7d70ccafe66..26686143af95 100644 --- a/llvm/include/llvm/DebugInfo/Symbolize/MarkupFilter.h +++ b/llvm/include/llvm/DebugInfo/Symbolize/MarkupFilter.h @@ -17,6 +17,9 @@ #include "Markup.h" +#include <map> + +#include "llvm/ADT/DenseMap.h" #include "llvm/Support/WithColor.h" #include "llvm/Support/raw_ostream.h" @@ -29,45 +32,106 @@ class MarkupFilter { public: MarkupFilter(raw_ostream &OS, Optional<bool> ColorsEnabled = llvm::None); - /// Begins a logical \p Line of markup. - /// - /// This must be called for each line of the input stream before calls to - /// filter() for elements of that line. The provided \p Line must be the same - /// one that was passed to parseLine() to produce the elements to be later - /// passed to filter(). + /// Filters a line containing symbolizer markup and writes the human-readable + /// results to the output stream. /// - /// This informs the filter that a new line is beginning and establishes a - /// context for error location reporting. - void beginLine(StringRef Line); + /// Invalid or unimplemented markup elements are removed. Some output may be + /// deferred until future filter() or finish() call. + void filter(StringRef Line); - /// Handle a \p Node of symbolizer markup. - /// - /// If the node is a recognized, valid markup element, it is replaced with a - /// human-readable string. If the node isn't an element or the element isn't - /// recognized, it is output verbatim. If the element is recognized but isn't - /// valid, it is omitted from the output. - void filter(const MarkupNode &Node); + /// Records that the input stream has ended and writes any deferred output. + void finish(); private: + struct Module { + uint64_t ID; + std::string Name; + SmallVector<uint8_t> BuildID; + }; + + struct MMap { + uint64_t Addr; + uint64_t Size; + const Module *Mod; + std::string Mode; // Lowercase + uint64_t ModuleRelativeAddr; + + bool contains(uint64_t Addr) const; + }; + + // An informational module line currently being constructed. As many mmap + // elements as possible are folded into one ModuleInfo line. + struct ModuleInfoLine { + const Module *Mod; + + SmallVector<const MMap *> MMaps = {}; + }; + + bool tryContextualElement(const MarkupNode &Node, + const SmallVector<MarkupNode> &DeferredNodes); + bool tryMMap(const MarkupNode &Element, + const SmallVector<MarkupNode> &DeferredNodes); + bool tryReset(const MarkupNode &Element, + const SmallVector<MarkupNode> &DeferredNodes); + bool tryModule(const MarkupNode &Element, + const SmallVector<MarkupNode> &DeferredNodes); + + void beginModuleInfoLine(const Module *M); + void endAnyModuleInfoLine(); + + void filterNode(const MarkupNode &Node); + + bool tryPresentation(const MarkupNode &Node); + bool trySymbol(const MarkupNode &Node); + bool trySGR(const MarkupNode &Node); void highlight(); + void highlightValue(); void restoreColor(); void resetColor(); + Optional<Module> parseModule(const MarkupNode &Element) const; + Optional<MMap> parseMMap(const MarkupNode &Element) const; + + Optional<uint64_t> parseAddr(StringRef Str) const; + Optional<uint64_t> parseModuleID(StringRef Str) const; + Optional<uint64_t> parseSize(StringRef Str) const; + Optional<SmallVector<uint8_t>> parseBuildID(StringRef Str) const; + Optional<std::string> parseMode(StringRef Str) const; + bool checkTag(const MarkupNode &Node) const; - bool checkNumFields(const MarkupNode &Node, size_t Size) const; + bool checkNumFields(const MarkupNode &Element, size_t Size) const; + bool checkNumFieldsAtLeast(const MarkupNode &Element, size_t Size) const; void reportTypeError(StringRef Str, StringRef TypeName) const; void reportLocation(StringRef::iterator Loc) const; + const MMap *overlappingMMap(const MMap &Map) const; + + StringRef lineEnding() const; + raw_ostream &OS; const bool ColorsEnabled; + MarkupParser Parser; + + // Current line being filtered. StringRef Line; + // A module info line currently being built. This incorporates as much mmap + // information as possible before being emitted. + Optional<ModuleInfoLine> MIL; + + // SGR state. Optional<raw_ostream::Colors> Color; bool Bold = false; + + // Map from Module ID to Module. + DenseMap<uint64_t, std::unique_ptr<Module>> Modules; + + // Ordered map from starting address to mmap. + std::map<uint64_t, MMap> MMaps; }; } // end namespace symbolize diff --git a/llvm/include/llvm/Debuginfod/HTTPServer.h b/llvm/include/llvm/Debuginfod/HTTPServer.h index 410ba32b3f2e..15e611ec546f 100644 --- a/llvm/include/llvm/Debuginfod/HTTPServer.h +++ b/llvm/include/llvm/Debuginfod/HTTPServer.h @@ -13,8 +13,8 @@ /// //===----------------------------------------------------------------------===// -#ifndef LLVM_SUPPORT_HTTP_SERVER_H -#define LLVM_SUPPORT_HTTP_SERVER_H +#ifndef LLVM_DEBUGINFOD_HTTPSERVER_H +#define LLVM_DEBUGINFOD_HTTPSERVER_H #include "llvm/ADT/StringRef.h" #include "llvm/Support/Error.h" @@ -120,4 +120,4 @@ public: }; } // end namespace llvm -#endif // LLVM_SUPPORT_HTTP_SERVER_H +#endif // LLVM_DEBUGINFOD_HTTPSERVER_H diff --git a/llvm/include/llvm/Demangle/ItaniumDemangle.h b/llvm/include/llvm/Demangle/ItaniumDemangle.h index 959632f13e1e..6d4f6222af44 100644 --- a/llvm/include/llvm/Demangle/ItaniumDemangle.h +++ b/llvm/include/llvm/Demangle/ItaniumDemangle.h @@ -13,8 +13,8 @@ // //===----------------------------------------------------------------------===// -#ifndef DEMANGLE_ITANIUMDEMANGLE_H -#define DEMANGLE_ITANIUMDEMANGLE_H +#ifndef LLVM_DEMANGLE_ITANIUMDEMANGLE_H +#define LLVM_DEMANGLE_ITANIUMDEMANGLE_H #include "DemangleConfig.h" #include "StringView.h" @@ -5477,4 +5477,4 @@ struct ManglingParser : AbstractManglingParser<ManglingParser<Alloc>, Alloc> { DEMANGLE_NAMESPACE_END -#endif // DEMANGLE_ITANIUMDEMANGLE_H +#endif // LLVM_DEMANGLE_ITANIUMDEMANGLE_H diff --git a/llvm/include/llvm/Demangle/StringView.h b/llvm/include/llvm/Demangle/StringView.h index 6bbb8837fed1..30580af282fb 100644 --- a/llvm/include/llvm/Demangle/StringView.h +++ b/llvm/include/llvm/Demangle/StringView.h @@ -13,8 +13,8 @@ // //===----------------------------------------------------------------------===// -#ifndef DEMANGLE_STRINGVIEW_H -#define DEMANGLE_STRINGVIEW_H +#ifndef LLVM_DEMANGLE_STRINGVIEW_H +#define LLVM_DEMANGLE_STRINGVIEW_H #include "DemangleConfig.h" #include <cassert> diff --git a/llvm/include/llvm/Demangle/Utility.h b/llvm/include/llvm/Demangle/Utility.h index ca7e44b948c7..691c34067d7f 100644 --- a/llvm/include/llvm/Demangle/Utility.h +++ b/llvm/include/llvm/Demangle/Utility.h @@ -13,8 +13,8 @@ // //===----------------------------------------------------------------------===// -#ifndef DEMANGLE_UTILITY_H -#define DEMANGLE_UTILITY_H +#ifndef LLVM_DEMANGLE_UTILITY_H +#define LLVM_DEMANGLE_UTILITY_H #include "StringView.h" #include <array> diff --git a/llvm/include/llvm/ExecutionEngine/Orc/MapperJITLinkMemoryManager.h b/llvm/include/llvm/ExecutionEngine/Orc/MapperJITLinkMemoryManager.h new file mode 100644 index 000000000000..37d75bfff546 --- /dev/null +++ b/llvm/include/llvm/ExecutionEngine/Orc/MapperJITLinkMemoryManager.h @@ -0,0 +1,56 @@ +//===--------------- MapperJITLinkMemoryManager.h -*- C++ -*---------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Implements JITLinkMemoryManager using MemoryMapper +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_EXECUTIONENGINE_ORC_MAPPERJITLINKMEMORYMANAGER_H +#define LLVM_EXECUTIONENGINE_ORC_MAPPERJITLINKMEMORYMANAGER_H + +#include "llvm/ExecutionEngine/JITLink/JITLinkMemoryManager.h" +#include "llvm/ExecutionEngine/Orc/Core.h" +#include "llvm/ExecutionEngine/Orc/MemoryMapper.h" + +namespace llvm { +namespace orc { + +class MapperJITLinkMemoryManager : public jitlink::JITLinkMemoryManager { +public: + MapperJITLinkMemoryManager(std::unique_ptr<MemoryMapper> Mapper); + + template <class MemoryMapperType, class... Args> + static Expected<std::unique_ptr<MapperJITLinkMemoryManager>> + CreateWithMapper(Args &&...A) { + auto Mapper = MemoryMapperType::Create(std::forward<Args>(A)...); + if (!Mapper) + return Mapper.takeError(); + + return std::make_unique<MapperJITLinkMemoryManager>(std::move(*Mapper)); + } + + void allocate(const jitlink::JITLinkDylib *JD, jitlink::LinkGraph &G, + OnAllocatedFunction OnAllocated) override; + // synchronous overload + using JITLinkMemoryManager::allocate; + + void deallocate(std::vector<FinalizedAlloc> Allocs, + OnDeallocatedFunction OnDeallocated) override; + // synchronous overload + using JITLinkMemoryManager::deallocate; + +private: + class InFlightAlloc; + + std::unique_ptr<MemoryMapper> Mapper; +}; + +} // end namespace orc +} // end namespace llvm + +#endif // LLVM_EXECUTIONENGINE_ORC_MAPPERJITLINKMEMORYMANAGER_H diff --git a/llvm/include/llvm/ExecutionEngine/Orc/MemoryMapper.h b/llvm/include/llvm/ExecutionEngine/Orc/MemoryMapper.h index d023bfbdb5b6..0b4cda119cad 100644 --- a/llvm/include/llvm/ExecutionEngine/Orc/MemoryMapper.h +++ b/llvm/include/llvm/ExecutionEngine/Orc/MemoryMapper.h @@ -14,6 +14,7 @@ #define LLVM_EXECUTIONENGINE_ORC_MEMORYMAPPER_H #include "llvm/ExecutionEngine/Orc/Core.h" +#include "llvm/Support/Process.h" #include <mutex> @@ -41,6 +42,9 @@ public: using OnReservedFunction = unique_function<void(Expected<ExecutorAddrRange>)>; + // Page size of the target process + virtual unsigned int getPageSize() = 0; + /// Reserves address space in executor process virtual void reserve(size_t NumBytes, OnReservedFunction OnReserved) = 0; @@ -76,7 +80,11 @@ public: class InProcessMemoryMapper final : public MemoryMapper { public: - InProcessMemoryMapper() {} + InProcessMemoryMapper(size_t PageSize); + + static Expected<std::unique_ptr<InProcessMemoryMapper>> Create(); + + unsigned int getPageSize() override { return PageSize; } void reserve(size_t NumBytes, OnReservedFunction OnReserved) override; @@ -107,6 +115,56 @@ private: std::mutex Mutex; ReservationMap Reservations; AllocationMap Allocations; + + size_t PageSize; +}; + +class SharedMemoryMapper final : public MemoryMapper { +public: + struct SymbolAddrs { + ExecutorAddr Instance; + ExecutorAddr Reserve; + ExecutorAddr Initialize; + ExecutorAddr Deinitialize; + ExecutorAddr Release; + }; + + SharedMemoryMapper(ExecutorProcessControl &EPC, SymbolAddrs SAs, + size_t PageSize); + + static Expected<std::unique_ptr<SharedMemoryMapper>> + Create(ExecutorProcessControl &EPC, SymbolAddrs SAs); + + unsigned int getPageSize() override { return PageSize; } + + void reserve(size_t NumBytes, OnReservedFunction OnReserved) override; + + char *prepare(ExecutorAddr Addr, size_t ContentSize) override; + + void initialize(AllocInfo &AI, OnInitializedFunction OnInitialized) override; + + void deinitialize(ArrayRef<ExecutorAddr> Allocations, + OnDeinitializedFunction OnDeInitialized) override; + + void release(ArrayRef<ExecutorAddr> Reservations, + OnReleasedFunction OnRelease) override; + + ~SharedMemoryMapper() override; + +private: + struct Reservation { + void *LocalAddr; + size_t Size; + }; + + ExecutorProcessControl &EPC; + SymbolAddrs SAs; + + std::mutex Mutex; + + std::map<ExecutorAddr, Reservation> Reservations; + + size_t PageSize; }; } // namespace orc diff --git a/llvm/include/llvm/ExecutionEngine/Orc/Shared/OrcRTBridge.h b/llvm/include/llvm/ExecutionEngine/Orc/Shared/OrcRTBridge.h index 96166ac20b2e..2aedf1e44ad8 100644 --- a/llvm/include/llvm/ExecutionEngine/Orc/Shared/OrcRTBridge.h +++ b/llvm/include/llvm/ExecutionEngine/Orc/Shared/OrcRTBridge.h @@ -31,6 +31,12 @@ extern const char *SimpleExecutorMemoryManagerReserveWrapperName; extern const char *SimpleExecutorMemoryManagerFinalizeWrapperName; extern const char *SimpleExecutorMemoryManagerDeallocateWrapperName; +extern const char *ExecutorSharedMemoryMapperServiceInstanceName; +extern const char *ExecutorSharedMemoryMapperServiceReserveWrapperName; +extern const char *ExecutorSharedMemoryMapperServiceInitializeWrapperName; +extern const char *ExecutorSharedMemoryMapperServiceDeinitializeWrapperName; +extern const char *ExecutorSharedMemoryMapperServiceReleaseWrapperName; + extern const char *MemoryWriteUInt8sWrapperName; extern const char *MemoryWriteUInt16sWrapperName; extern const char *MemoryWriteUInt32sWrapperName; @@ -58,6 +64,21 @@ using SPSSimpleExecutorMemoryManagerFinalizeSignature = using SPSSimpleExecutorMemoryManagerDeallocateSignature = shared::SPSError( shared::SPSExecutorAddr, shared::SPSSequence<shared::SPSExecutorAddr>); +// ExecutorSharedMemoryMapperService +using SPSExecutorSharedMemoryMapperServiceReserveSignature = + shared::SPSExpected< + shared::SPSTuple<shared::SPSExecutorAddr, shared::SPSString>>( + shared::SPSExecutorAddr, uint64_t); +using SPSExecutorSharedMemoryMapperServiceInitializeSignature = + shared::SPSExpected<shared::SPSExecutorAddr>( + shared::SPSExecutorAddr, shared::SPSExecutorAddr, + shared::SPSSharedMemoryFinalizeRequest); +using SPSExecutorSharedMemoryMapperServiceDeinitializeSignature = + shared::SPSError(shared::SPSExecutorAddr, + shared::SPSSequence<shared::SPSExecutorAddr>); +using SPSExecutorSharedMemoryMapperServiceReleaseSignature = shared::SPSError( + shared::SPSExecutorAddr, shared::SPSSequence<shared::SPSExecutorAddr>); + using SPSRunAsMainSignature = int64_t(shared::SPSExecutorAddr, shared::SPSSequence<shared::SPSString>); diff --git a/llvm/include/llvm/ExecutionEngine/Orc/Shared/SimplePackedSerialization.h b/llvm/include/llvm/ExecutionEngine/Orc/Shared/SimplePackedSerialization.h index 9be58e9f0fa9..c38825948208 100644 --- a/llvm/include/llvm/ExecutionEngine/Orc/Shared/SimplePackedSerialization.h +++ b/llvm/include/llvm/ExecutionEngine/Orc/Shared/SimplePackedSerialization.h @@ -56,6 +56,7 @@ public: SPSOutputBuffer(char *Buffer, size_t Remaining) : Buffer(Buffer), Remaining(Remaining) {} bool write(const char *Data, size_t Size) { + assert(Data && "Data must not be null"); if (Size > Remaining) return false; memcpy(Buffer, Data, Size); @@ -349,6 +350,8 @@ public: static bool serialize(SPSOutputBuffer &OB, const ArrayRef<char> &A) { if (!SPSArgList<uint64_t>::serialize(OB, static_cast<uint64_t>(A.size()))) return false; + if (A.empty()) // Empty ArrayRef may have null data, so bail out early. + return true; return OB.write(A.data(), A.size()); } @@ -358,7 +361,7 @@ public: return false; if (Size > std::numeric_limits<size_t>::max()) return false; - A = {IB.data(), static_cast<size_t>(Size)}; + A = {Size ? IB.data() : nullptr, static_cast<size_t>(Size)}; return IB.skip(Size); } }; @@ -476,6 +479,8 @@ public: static bool serialize(SPSOutputBuffer &OB, StringRef S) { if (!SPSArgList<uint64_t>::serialize(OB, static_cast<uint64_t>(S.size()))) return false; + if (S.empty()) // Empty StringRef may have null data, so bail out early. + return true; return OB.write(S.data(), S.size()); } @@ -487,7 +492,7 @@ public: Data = IB.data(); if (!IB.skip(Size)) return false; - S = StringRef(Data, Size); + S = StringRef(Size ? Data : nullptr, Size); return true; } }; diff --git a/llvm/include/llvm/ExecutionEngine/Orc/Shared/TargetProcessControlTypes.h b/llvm/include/llvm/ExecutionEngine/Orc/Shared/TargetProcessControlTypes.h index d596a89a50b6..502c7c1f7069 100644 --- a/llvm/include/llvm/ExecutionEngine/Orc/Shared/TargetProcessControlTypes.h +++ b/llvm/include/llvm/ExecutionEngine/Orc/Shared/TargetProcessControlTypes.h @@ -82,6 +82,17 @@ struct FinalizeRequest { shared::AllocActions Actions; }; +struct SharedMemorySegFinalizeRequest { + WireProtectionFlags Prot; + ExecutorAddr Addr; + uint64_t Size; +}; + +struct SharedMemoryFinalizeRequest { + std::vector<SharedMemorySegFinalizeRequest> Segments; + shared::AllocActions Actions; +}; + template <typename T> struct UIntWrite { UIntWrite() = default; UIntWrite(ExecutorAddr Addr, T Value) : Addr(Addr), Value(Value) {} @@ -131,6 +142,13 @@ using SPSSegFinalizeRequest = using SPSFinalizeRequest = SPSTuple<SPSSequence<SPSSegFinalizeRequest>, SPSSequence<SPSAllocActionCallPair>>; +using SPSSharedMemorySegFinalizeRequest = + SPSTuple<SPSMemoryProtectionFlags, SPSExecutorAddr, uint64_t>; + +using SPSSharedMemoryFinalizeRequest = + SPSTuple<SPSSequence<SPSSharedMemorySegFinalizeRequest>, + SPSSequence<SPSAllocActionCallPair>>; + template <typename T> using SPSMemoryAccessUIntWrite = SPSTuple<SPSExecutorAddr, T>; @@ -204,6 +222,48 @@ public: } }; +template <> +class SPSSerializationTraits<SPSSharedMemorySegFinalizeRequest, + tpctypes::SharedMemorySegFinalizeRequest> { + using SFRAL = SPSSharedMemorySegFinalizeRequest::AsArgList; + +public: + static size_t size(const tpctypes::SharedMemorySegFinalizeRequest &SFR) { + return SFRAL::size(SFR.Prot, SFR.Addr, SFR.Size); + } + + static bool serialize(SPSOutputBuffer &OB, + const tpctypes::SharedMemorySegFinalizeRequest &SFR) { + return SFRAL::serialize(OB, SFR.Prot, SFR.Addr, SFR.Size); + } + + static bool deserialize(SPSInputBuffer &IB, + tpctypes::SharedMemorySegFinalizeRequest &SFR) { + return SFRAL::deserialize(IB, SFR.Prot, SFR.Addr, SFR.Size); + } +}; + +template <> +class SPSSerializationTraits<SPSSharedMemoryFinalizeRequest, + tpctypes::SharedMemoryFinalizeRequest> { + using FRAL = SPSSharedMemoryFinalizeRequest::AsArgList; + +public: + static size_t size(const tpctypes::SharedMemoryFinalizeRequest &FR) { + return FRAL::size(FR.Segments, FR.Actions); + } + + static bool serialize(SPSOutputBuffer &OB, + const tpctypes::SharedMemoryFinalizeRequest &FR) { + return FRAL::serialize(OB, FR.Segments, FR.Actions); + } + + static bool deserialize(SPSInputBuffer &IB, + tpctypes::SharedMemoryFinalizeRequest &FR) { + return FRAL::deserialize(IB, FR.Segments, FR.Actions); + } +}; + template <typename T> class SPSSerializationTraits<SPSMemoryAccessUIntWrite<T>, tpctypes::UIntWrite<T>> { @@ -244,7 +304,6 @@ public: } }; - } // end namespace shared } // end namespace orc } // end namespace llvm diff --git a/llvm/include/llvm/ExecutionEngine/Orc/TargetProcess/ExecutorSharedMemoryMapperService.h b/llvm/include/llvm/ExecutionEngine/Orc/TargetProcess/ExecutorSharedMemoryMapperService.h new file mode 100644 index 000000000000..69d8cf5d2980 --- /dev/null +++ b/llvm/include/llvm/ExecutionEngine/Orc/TargetProcess/ExecutorSharedMemoryMapperService.h @@ -0,0 +1,78 @@ +//===----------- ExecutorSharedMemoryMapperService.h ------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_EXECUTIONENGINE_ORC_TARGETPROCESS_EXECUTORSHAREDMEMORYMAPPERSERVICE +#define LLVM_EXECUTIONENGINE_ORC_TARGETPROCESS_EXECUTORSHAREDMEMORYMAPPERSERVICE + +#include "llvm/ADT/DenseMap.h" +#include "llvm/ExecutionEngine/Orc/Shared/TargetProcessControlTypes.h" +#include "llvm/ExecutionEngine/Orc/TargetProcess/ExecutorBootstrapService.h" + +#include <atomic> +#include <mutex> + +#if defined(_WIN32) +#include <windows.h> +#endif + +namespace llvm { +namespace orc { +namespace rt_bootstrap { + +class ExecutorSharedMemoryMapperService final + : public ExecutorBootstrapService { +public: + ~ExecutorSharedMemoryMapperService(){}; + + Expected<std::pair<ExecutorAddr, std::string>> reserve(uint64_t Size); + Expected<ExecutorAddr> initialize(ExecutorAddr Reservation, + tpctypes::SharedMemoryFinalizeRequest &FR); + + Error deinitialize(const std::vector<ExecutorAddr> &Bases); + Error release(const std::vector<ExecutorAddr> &Bases); + + Error shutdown() override; + void addBootstrapSymbols(StringMap<ExecutorAddr> &M) override; + +private: + struct Allocation { + std::vector<shared::WrapperFunctionCall> DeinitializationActions; + }; + using AllocationMap = DenseMap<ExecutorAddr, Allocation>; + + struct Reservation { + size_t Size; + std::vector<ExecutorAddr> Allocations; +#if defined(_WIN32) + HANDLE SharedMemoryFile; +#endif + }; + using ReservationMap = DenseMap<void *, Reservation>; + + static llvm::orc::shared::CWrapperFunctionResult + reserveWrapper(const char *ArgData, size_t ArgSize); + + static llvm::orc::shared::CWrapperFunctionResult + initializeWrapper(const char *ArgData, size_t ArgSize); + + static llvm::orc::shared::CWrapperFunctionResult + deinitializeWrapper(const char *ArgData, size_t ArgSize); + + static llvm::orc::shared::CWrapperFunctionResult + releaseWrapper(const char *ArgData, size_t ArgSize); + + std::atomic<int> SharedMemoryCount{0}; + std::mutex Mutex; + ReservationMap Reservations; + AllocationMap Allocations; +}; + +} // namespace rt_bootstrap +} // namespace orc +} // namespace llvm +#endif // LLVM_EXECUTIONENGINE_ORC_TARGETPROCESS_EXECUTORSHAREDMEMORYMAPPERSERVICE diff --git a/llvm/include/llvm/Frontend/Directive/DirectiveBase.td b/llvm/include/llvm/Frontend/Directive/DirectiveBase.td index e40f40f74c73..4269a966a988 100644 --- a/llvm/include/llvm/Frontend/Directive/DirectiveBase.td +++ b/llvm/include/llvm/Frontend/Directive/DirectiveBase.td @@ -74,6 +74,9 @@ class Clause<string c> { // Define an alternative name return in get<LanguageName>ClauseName function. string alternativeName = ""; + // Define aliases used in the parser. + list<string> aliases = []; + // Optional class holding value of the clause in clang AST. string clangClass = ""; @@ -88,6 +91,7 @@ class Clause<string c> { // List of allowed clause values list<ClauseVal> allowedClauseValues = []; + // If set to true, value class is part of a list. Single class by default. bit isValueList = false; @@ -101,6 +105,14 @@ class Clause<string c> { // Set clause used by default when unknown. Function returning the kind // of enumeration will use this clause as the default. bit isDefault = false; + + // Prefix before the actual value. Used in the parser generation. + // `clause(prefix: value)` + string prefix = ""; + + // Set the prefix as optional. + // `clause([prefix]: value)` + bit isPrefixOptional = true; } // Hold information about clause validity by version. diff --git a/llvm/include/llvm/Frontend/OpenACC/ACC.td b/llvm/include/llvm/Frontend/OpenACC/ACC.td index 45d815894454..e5f0632f59f5 100644 --- a/llvm/include/llvm/Frontend/OpenACC/ACC.td +++ b/llvm/include/llvm/Frontend/OpenACC/ACC.td @@ -62,20 +62,24 @@ def ACCC_Collapse : Clause<"collapse"> { // 2.7.6 def ACCC_Copy : Clause<"copy"> { let flangClass = "AccObjectList"; + let aliases = ["present_or_copy", "pcopy"]; } // 2.7.7 def ACCC_Copyin : Clause<"copyin"> { let flangClass = "AccObjectListWithModifier"; + let aliases = ["present_or_copyin", "pcopyin"]; } // 2.7.8 def ACCC_Copyout : Clause<"copyout"> { let flangClass = "AccObjectListWithModifier"; + let aliases = ["present_or_copyout", "pcopyout"]; } // 2.7.9 def ACCC_Create : Clause<"create"> { let flangClass = "AccObjectListWithModifier"; + let aliases = ["present_or_create", "pcreate"]; } // 2.5.15 @@ -130,6 +134,7 @@ def ACCC_DeviceResident : Clause<"device_resident"> { def ACCC_DeviceType : Clause<"device_type"> { let flangClass = "AccDeviceTypeExprList"; let defaultValue = "*"; + let aliases = ["dtype"]; } // 2.6.6 @@ -226,6 +231,7 @@ def ACCC_Seq : Clause<"seq"> {} def ACCC_Vector : Clause<"vector"> { let flangClass = "ScalarIntExpr"; let isValueOptional = true; + let prefix = "length"; } // 2.5.11 @@ -243,6 +249,7 @@ def ACCC_Wait : Clause<"wait"> { def ACCC_Worker: Clause<"worker"> { let flangClass = "ScalarIntExpr"; let isValueOptional = true; + let prefix = "num"; } // 2.12 diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h index 3dfcabffb58a..e4f2fcc649fc 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h +++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h @@ -630,6 +630,15 @@ public: InsertPointTy AllocaIP, BodyGenCallbackTy BodyGenCB, bool Tied = true, Value *Final = nullptr); + /// Generator for the taskgroup construct + /// + /// \param Loc The location where the taskgroup construct was encountered. + /// \param AllocaIP The insertion point to be used for alloca instructions. + /// \param BodyGenCB Callback that will generate the region code. + InsertPointTy createTaskgroup(const LocationDescription &Loc, + InsertPointTy AllocaIP, + BodyGenCallbackTy BodyGenCB); + /// Functions used to generate reductions. Such functions take two Values /// representing LHS and RHS of the reduction, respectively, and a reference /// to the value that is updated to refer to the reduction result. diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def index 9d1ab57729b7..7a70af9ce792 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def +++ b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def @@ -37,6 +37,7 @@ __OMP_TYPE(Int8Ptr) __OMP_TYPE(Int16Ptr) __OMP_TYPE(Int32Ptr) __OMP_TYPE(Int64Ptr) +__OMP_TYPE(Double) OMP_TYPE(SizeTy, M.getDataLayout().getIntPtrType(Ctx)) @@ -236,6 +237,7 @@ __OMP_RTL(omp_get_place_proc_ids, false, Void, Int32, Int32Ptr) __OMP_RTL(omp_get_place_num, false, Int32, ) __OMP_RTL(omp_get_partition_num_places, false, Int32, ) __OMP_RTL(omp_get_partition_place_nums, false, Void, Int32Ptr) +__OMP_RTL(omp_get_wtime, false, Double,) __OMP_RTL(omp_set_num_threads, false, Void, Int32) __OMP_RTL(omp_set_dynamic, false, Void, Int32) @@ -681,6 +683,7 @@ __OMP_RTL_ATTRS(omp_get_partition_num_places, GetterAttrs, AttributeSet(), ParamAttrs()) __OMP_RTL_ATTRS(omp_get_partition_place_nums, GetterAttrs, AttributeSet(), ParamAttrs()) +__OMP_RTL_ATTRS(omp_get_wtime, GetterAttrs, AttributeSet(), ParamAttrs()) __OMP_RTL_ATTRS(omp_set_num_threads, SetterAttrs, AttributeSet(), ParamAttrs()) __OMP_RTL_ATTRS(omp_set_dynamic, SetterAttrs, AttributeSet(), ParamAttrs()) @@ -919,7 +922,7 @@ __OMP_RTL_ATTRS(__kmpc_doacross_fini, BarrierAttrs, AttributeSet(), __OMP_RTL_ATTRS(__kmpc_alloc_shared, AttributeSet( EnumAttr(NoUnwind), - EnumAttr(NoSync), + EnumAttr(NoSync), AllocSizeAttr(0, None)), ReturnPtrAttrs, ParamAttrs()) __OMP_RTL_ATTRS(__kmpc_free_shared, DeviceAllocAttrs, AttributeSet(), ParamAttrs(NoCaptureAttrs)) diff --git a/llvm/include/llvm/IR/FixedMetadataKinds.def b/llvm/include/llvm/IR/FixedMetadataKinds.def index 1d24f527df7b..c7cb59b13050 100644 --- a/llvm/include/llvm/IR/FixedMetadataKinds.def +++ b/llvm/include/llvm/IR/FixedMetadataKinds.def @@ -45,3 +45,5 @@ LLVM_FIXED_MD_KIND(MD_annotation, "annotation", 30) LLVM_FIXED_MD_KIND(MD_nosanitize, "nosanitize", 31) LLVM_FIXED_MD_KIND(MD_func_sanitize, "func_sanitize", 32) LLVM_FIXED_MD_KIND(MD_exclude, "exclude", 33) +LLVM_FIXED_MD_KIND(MD_memprof, "memprof", 34) +LLVM_FIXED_MD_KIND(MD_callsite, "callsite", 35) diff --git a/llvm/include/llvm/IR/GlobalIFunc.h b/llvm/include/llvm/IR/GlobalIFunc.h index 976772b343fd..c148ee790778 100644 --- a/llvm/include/llvm/IR/GlobalIFunc.h +++ b/llvm/include/llvm/IR/GlobalIFunc.h @@ -93,6 +93,12 @@ public: static bool classof(const Value *V) { return V->getValueID() == Value::GlobalIFuncVal; } + + // Apply specific operation to all resolver-related values. If resolver target + // is already a global object, then apply the operation to it directly. If + // target is a GlobalExpr or a GlobalAlias, evaluate it to its base object and + // apply the operation for the base object and all aliases along the path. + void applyAlongResolverPath(function_ref<void(const GlobalValue &)> Op) const; }; template <> diff --git a/llvm/include/llvm/IR/IRBuilder.h b/llvm/include/llvm/IR/IRBuilder.h index cec26e966b5c..0d3ffba955a3 100644 --- a/llvm/include/llvm/IR/IRBuilder.h +++ b/llvm/include/llvm/IR/IRBuilder.h @@ -974,7 +974,7 @@ public: /// This is a convenience function for code that uses aggregate return values /// as a vehicle for having multiple return values. ReturnInst *CreateAggregateRet(Value *const *retVals, unsigned N) { - Value *V = UndefValue::get(getCurrentFunctionReturnType()); + Value *V = PoisonValue::get(getCurrentFunctionReturnType()); for (unsigned i = 0; i != N; ++i) V = CreateInsertValue(V, retVals[i], i, "mrv"); return Insert(ReturnInst::Create(Context, V)); diff --git a/llvm/include/llvm/IR/InlineAsm.h b/llvm/include/llvm/IR/InlineAsm.h index 032a70efdceb..0a8d27aad58a 100644 --- a/llvm/include/llvm/IR/InlineAsm.h +++ b/llvm/include/llvm/IR/InlineAsm.h @@ -92,7 +92,8 @@ public: enum ConstraintPrefix { isInput, // 'x' isOutput, // '=x' - isClobber // '~x' + isClobber, // '~x' + isLabel, // '!x' }; using ConstraintCodeVector = std::vector<std::string>; @@ -117,7 +118,7 @@ public: using ConstraintInfoVector = std::vector<ConstraintInfo>; struct ConstraintInfo { - /// Type - The basic type of the constraint: input/output/clobber + /// Type - The basic type of the constraint: input/output/clobber/label /// ConstraintPrefix Type = isInput; diff --git a/llvm/include/llvm/IR/Instruction.h b/llvm/include/llvm/IR/Instruction.h index 8d0a8363cdfb..15b0bdf557fb 100644 --- a/llvm/include/llvm/IR/Instruction.h +++ b/llvm/include/llvm/IR/Instruction.h @@ -172,10 +172,6 @@ public: /// its operands. bool isOnlyUserOfAnyOperand(); - bool isIndirectTerminator() const { - return isIndirectTerminator(getOpcode()); - } - static const char* getOpcodeName(unsigned OpCode); static inline bool isTerminator(unsigned OpCode) { @@ -242,17 +238,6 @@ public: } } - /// Returns true if the OpCode is a terminator with indirect targets. - static inline bool isIndirectTerminator(unsigned OpCode) { - switch (OpCode) { - case Instruction::IndirectBr: - case Instruction::CallBr: - return true; - default: - return false; - } - } - //===--------------------------------------------------------------------===// // Metadata manipulation. //===--------------------------------------------------------------------===// diff --git a/llvm/include/llvm/IR/Instructions.h b/llvm/include/llvm/IR/Instructions.h index a14bc39cea65..083fed5de4a3 100644 --- a/llvm/include/llvm/IR/Instructions.h +++ b/llvm/include/llvm/IR/Instructions.h @@ -44,6 +44,7 @@ namespace llvm { class APFloat; class APInt; class BasicBlock; +class BlockAddress; class ConstantInt; class DataLayout; class StringRef; @@ -4004,9 +4005,6 @@ class CallBrInst : public CallBase { ArrayRef<BasicBlock *> IndirectDests, ArrayRef<Value *> Args, ArrayRef<OperandBundleDef> Bundles, const Twine &NameStr); - /// Should the Indirect Destinations change, scan + update the Arg list. - void updateArgBlockAddresses(unsigned i, BasicBlock *B); - /// Compute the number of operands to allocate. static int ComputeNumOperands(int NumArgs, int NumIndirectDests, int NumBundleInputs = 0) { @@ -4154,7 +4152,6 @@ public: *(&Op<-1>() - getNumIndirectDests() - 1) = reinterpret_cast<Value *>(B); } void setIndirectDest(unsigned i, BasicBlock *B) { - updateArgBlockAddresses(i, B); *(&Op<-1>() - getNumIndirectDests() + i) = reinterpret_cast<Value *>(B); } @@ -4172,6 +4169,8 @@ public: unsigned getNumSuccessors() const { return getNumIndirectDests() + 1; } + BlockAddress *getBlockAddressForIndirectDest(unsigned DestNo) const; + // Methods for support type inquiry through isa, cast, and dyn_cast: static bool classof(const Instruction *I) { return (I->getOpcode() == Instruction::CallBr); diff --git a/llvm/include/llvm/IR/IntrinsicInst.h b/llvm/include/llvm/IR/IntrinsicInst.h index 06d2335821d3..fc9111a4f512 100644 --- a/llvm/include/llvm/IR/IntrinsicInst.h +++ b/llvm/include/llvm/IR/IntrinsicInst.h @@ -1381,7 +1381,7 @@ public: } /// The statepoint with which this gc.relocate is associated. - const GCStatepointInst *getStatepoint() const; + const Value *getStatepoint() const; }; /// Represents calls to the gc.relocate intrinsic. diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td index 8bf8e9ca76ad..c523e3773de4 100644 --- a/llvm/include/llvm/IR/Intrinsics.td +++ b/llvm/include/llvm/IR/Intrinsics.td @@ -495,7 +495,7 @@ def int_objc_arc_annotation_bottomup_bbend : Intrinsic<[], // Returns the location of the Swift asynchronous context (usually stored just // before the frame pointer), and triggers the creation of a null context if it // would otherwise be unneeded. -def int_swift_async_context_addr : Intrinsic<[llvm_ptrptr_ty], [], [IntrNoMem]>; +def int_swift_async_context_addr : Intrinsic<[llvm_ptrptr_ty], [], []>; //===--------------------- Code Generator Intrinsics ----------------------===// // @@ -619,6 +619,7 @@ def int_memcpy : Intrinsic<[], [llvm_anyptr_ty, llvm_anyptr_ty, llvm_anyint_ty, llvm_i1_ty], [IntrArgMemOnly, IntrWillReturn, IntrNoFree, + IntrNoCallback, NoCapture<ArgIndex<0>>, NoCapture<ArgIndex<1>>, NoAlias<ArgIndex<0>>, NoAlias<ArgIndex<1>>, WriteOnly<ArgIndex<0>>, ReadOnly<ArgIndex<1>>, @@ -631,7 +632,7 @@ def int_memcpy : Intrinsic<[], def int_memcpy_inline : Intrinsic<[], [llvm_anyptr_ty, llvm_anyptr_ty, llvm_anyint_ty, llvm_i1_ty], - [IntrArgMemOnly, IntrWillReturn, IntrNoFree, + [IntrArgMemOnly, IntrWillReturn, IntrNoFree, IntrNoCallback, NoCapture<ArgIndex<0>>, NoCapture<ArgIndex<1>>, NoAlias<ArgIndex<0>>, NoAlias<ArgIndex<1>>, WriteOnly<ArgIndex<0>>, ReadOnly<ArgIndex<1>>, @@ -641,6 +642,7 @@ def int_memmove : Intrinsic<[], [llvm_anyptr_ty, llvm_anyptr_ty, llvm_anyint_ty, llvm_i1_ty], [IntrArgMemOnly, IntrWillReturn, IntrNoFree, + IntrNoCallback, NoCapture<ArgIndex<0>>, NoCapture<ArgIndex<1>>, WriteOnly<ArgIndex<0>>, ReadOnly<ArgIndex<1>>, ImmArg<ArgIndex<3>>]>; @@ -648,7 +650,7 @@ def int_memset : Intrinsic<[], [llvm_anyptr_ty, llvm_i8_ty, llvm_anyint_ty, llvm_i1_ty], [IntrWriteMem, IntrArgMemOnly, IntrWillReturn, - IntrNoFree, + IntrNoFree, IntrNoCallback, NoCapture<ArgIndex<0>>, WriteOnly<ArgIndex<0>>, ImmArg<ArgIndex<3>>]>; @@ -659,7 +661,7 @@ def int_memset : Intrinsic<[], def int_memset_inline : Intrinsic<[], [llvm_anyptr_ty, llvm_i8_ty, llvm_anyint_ty, llvm_i1_ty], - [IntrWriteMem, IntrArgMemOnly, IntrWillReturn, IntrNoFree, + [IntrWriteMem, IntrArgMemOnly, IntrWillReturn, IntrNoFree, IntrNoCallback, NoCapture<ArgIndex<0>>, WriteOnly<ArgIndex<0>>, ImmArg<ArgIndex<2>>, ImmArg<ArgIndex<3>>]>; @@ -1963,11 +1965,11 @@ def int_vscale : DefaultAttrsIntrinsic<[llvm_anyint_ty], [], [IntrNoMem]>; //===---------- Intrinsics to perform subvector insertion/extraction ------===// def int_vector_insert : DefaultAttrsIntrinsic<[llvm_anyvector_ty], [LLVMMatchType<0>, llvm_anyvector_ty, llvm_i64_ty], - [IntrNoMem, ImmArg<ArgIndex<2>>]>; + [IntrNoMem, IntrSpeculatable, ImmArg<ArgIndex<2>>]>; def int_vector_extract : DefaultAttrsIntrinsic<[llvm_anyvector_ty], [llvm_anyvector_ty, llvm_i64_ty], - [IntrNoMem, ImmArg<ArgIndex<1>>]>; + [IntrNoMem, IntrSpeculatable, ImmArg<ArgIndex<1>>]>; //===----------------- Pointer Authentication Intrinsics ------------------===// // diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td index e81224d9b890..93925a84c8e8 100644 --- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td +++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td @@ -167,6 +167,10 @@ def int_amdgcn_dispatch_id : ClangBuiltin<"__builtin_amdgcn_dispatch_id">, Intrinsic<[llvm_i64_ty], [], [IntrNoMem, IntrSpeculatable, IntrWillReturn]>; +// For internal use. Coordinates LDS lowering between IR transform and backend. +def int_amdgcn_lds_kernel_id : + Intrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable, IntrWillReturn]>; + def int_amdgcn_implicit_buffer_ptr : ClangBuiltin<"__builtin_amdgcn_implicit_buffer_ptr">, Intrinsic<[LLVMQualPointerType<llvm_i8_ty, 4>], [], @@ -2304,6 +2308,17 @@ def int_amdgcn_mfma_i32_32x32x16_i8 : AMDGPUMfmaIntrinsic<llvm_v16i32_ty, ll def int_amdgcn_mfma_f32_16x16x8_xf32 : AMDGPUMfmaIntrinsic<llvm_v4f32_ty, llvm_v2f32_ty>; def int_amdgcn_mfma_f32_32x32x4_xf32 : AMDGPUMfmaIntrinsic<llvm_v16f32_ty, llvm_v2f32_ty>; +class AMDGPUMFp8MfmaIntrinsic<LLVMType DestTy> : + AMDGPUMfmaIntrinsic<DestTy, llvm_i64_ty>; + +multiclass AMDGPUMFp8MfmaIntrinsic<LLVMType DestTy> { + foreach kind = ["bf8_bf8", "bf8_fp8", "fp8_bf8", "fp8_fp8"] in + def NAME#"_"#kind : AMDGPUMFp8MfmaIntrinsic<DestTy>; +} + +defm int_amdgcn_mfma_f32_16x16x32 : AMDGPUMFp8MfmaIntrinsic<llvm_v4f32_ty>; +defm int_amdgcn_mfma_f32_32x32x16 : AMDGPUMFp8MfmaIntrinsic<llvm_v16f32_ty>; + // llvm.amdgcn.smfmac.?32.* vdst, srcA, srcB, srcC, index, cbsz, abid class AMDGPUMSmfmacIntrinsic<LLVMType DestTy, LLVMType SrcA, LLVMType SrcB> : ClangBuiltin<!subst("int", "__builtin", NAME)>, @@ -2320,6 +2335,69 @@ def int_amdgcn_smfmac_f32_32x32x16_bf16 : AMDGPUMSmfmacIntrinsic<llvm_v16f32_ty, def int_amdgcn_smfmac_i32_16x16x64_i8 : AMDGPUMSmfmacIntrinsic<llvm_v4i32_ty, llvm_v2i32_ty, llvm_v4i32_ty>; def int_amdgcn_smfmac_i32_32x32x32_i8 : AMDGPUMSmfmacIntrinsic<llvm_v16i32_ty, llvm_v2i32_ty, llvm_v4i32_ty>; +class AMDGPUMFp8SmfmacIntrinsic<LLVMType DestTy> : + AMDGPUMSmfmacIntrinsic<DestTy, llvm_v2i32_ty, llvm_v4i32_ty>; + +multiclass AMDGPUMFp8SmfmacIntrinsic<LLVMType DestTy> { + foreach kind = ["bf8_bf8", "bf8_fp8", "fp8_bf8", "fp8_fp8"] in + def NAME#"_"#kind : AMDGPUMFp8SmfmacIntrinsic<DestTy>; +} + +defm int_amdgcn_smfmac_f32_16x16x64 : AMDGPUMFp8SmfmacIntrinsic<llvm_v4f32_ty>; +defm int_amdgcn_smfmac_f32_32x32x32 : AMDGPUMFp8SmfmacIntrinsic<llvm_v16f32_ty>; + +// llvm.amdgcn.cvt.f32.bf8 float vdst, int srcA, imm byte_sel [0..3] +// byte_sel selects byte from srcA. +def int_amdgcn_cvt_f32_bf8 : ClangBuiltin<"__builtin_amdgcn_cvt_f32_bf8">, + Intrinsic<[llvm_float_ty], + [llvm_i32_ty, llvm_i32_ty], + [IntrNoMem, IntrWillReturn, ImmArg<ArgIndex<1>>]>; + +// llvm.amdgcn.cvt.f32.fp8 float vdst, int srcA, imm byte_sel [0..3] +def int_amdgcn_cvt_f32_fp8 : ClangBuiltin<"__builtin_amdgcn_cvt_f32_fp8">, + Intrinsic<[llvm_float_ty], + [llvm_i32_ty, llvm_i32_ty], + [IntrNoMem, IntrWillReturn, ImmArg<ArgIndex<1>>]>; + +// llvm.amdgcn.cvt.pk.f32.bf8 float2 vdst, int srcA, imm word_sel +// word_sel = 1 selects 2 high bytes, 0 selects 2 low bytes. +def int_amdgcn_cvt_pk_f32_bf8 : ClangBuiltin<"__builtin_amdgcn_cvt_pk_f32_bf8">, + Intrinsic<[llvm_v2f32_ty], + [llvm_i32_ty, llvm_i1_ty], + [IntrNoMem, IntrWillReturn, ImmArg<ArgIndex<1>>]>; + +// llvm.amdgcn.cvt.pk.f32.fp8 float2 vdst, int srcA, imm word_sel. +def int_amdgcn_cvt_pk_f32_fp8 : ClangBuiltin<"__builtin_amdgcn_cvt_pk_f32_fp8">, + Intrinsic<[llvm_v2f32_ty], + [llvm_i32_ty, llvm_i1_ty], + [IntrNoMem, IntrWillReturn, ImmArg<ArgIndex<1>>]>; + +// llvm.amdgcn.cvt.pk.bf8.f32 int vdst, float srcA, float srcB, int old, imm word_sel +// word_sel = 1 selects 2 high bytes in the vdst, 0 selects 2 low bytes. +def int_amdgcn_cvt_pk_bf8_f32 : ClangBuiltin<"__builtin_amdgcn_cvt_pk_bf8_f32">, + Intrinsic<[llvm_i32_ty], + [llvm_float_ty, llvm_float_ty, llvm_i32_ty, llvm_i1_ty], + [IntrNoMem, IntrWillReturn, ImmArg<ArgIndex<3>>]>; + +// llvm.amdgcn.cvt.pk.fp8.f32 int vdst, float srcA, float srcB, int old, imm word_sel +def int_amdgcn_cvt_pk_fp8_f32 : ClangBuiltin<"__builtin_amdgcn_cvt_pk_fp8_f32">, + Intrinsic<[llvm_i32_ty], + [llvm_float_ty, llvm_float_ty, llvm_i32_ty, llvm_i1_ty], + [IntrNoMem, IntrWillReturn, ImmArg<ArgIndex<3>>]>; + +// llvm.amdgcn.cvt.sr.bf8.f32 int vdst, float srcA, int srcB, int old, imm byte_sel [0..3] +// byte_sel selects byte to write into vdst. +def int_amdgcn_cvt_sr_bf8_f32 : ClangBuiltin<"__builtin_amdgcn_cvt_sr_bf8_f32">, + Intrinsic<[llvm_i32_ty], + [llvm_float_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [IntrNoMem, IntrWillReturn, ImmArg<ArgIndex<3>>]>; + +// llvm.amdgcn.cvt.sr.fp8.f32 int vdst, float srcA, int srcB, int old, imm byte_sel [0..3] +def int_amdgcn_cvt_sr_fp8_f32 : ClangBuiltin<"__builtin_amdgcn_cvt_sr_fp8_f32">, + Intrinsic<[llvm_i32_ty], + [llvm_float_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [IntrNoMem, IntrWillReturn, ImmArg<ArgIndex<3>>]>; + //===----------------------------------------------------------------------===// // Special Intrinsics for backend internal use only. No frontend // should emit calls to these. diff --git a/llvm/include/llvm/IR/IntrinsicsSPIRV.td b/llvm/include/llvm/IR/IntrinsicsSPIRV.td index 14c628595d30..2cdd75f82962 100644 --- a/llvm/include/llvm/IR/IntrinsicsSPIRV.td +++ b/llvm/include/llvm/IR/IntrinsicsSPIRV.td @@ -20,7 +20,7 @@ let TargetPrefix = "spv" in { def int_spv_gep : Intrinsic<[llvm_anyptr_ty], [llvm_i1_ty, llvm_any_ty, llvm_vararg_ty], [ImmArg<ArgIndex<0>>]>; def int_spv_load : Intrinsic<[llvm_i32_ty], [llvm_anyptr_ty, llvm_i16_ty, llvm_i8_ty], [ImmArg<ArgIndex<1>>, ImmArg<ArgIndex<2>>]>; - def int_spv_store : Intrinsic<[], [llvm_i32_ty, llvm_anyptr_ty, llvm_i16_ty, llvm_i8_ty], [ImmArg<ArgIndex<2>>, ImmArg<ArgIndex<3>>]>; + def int_spv_store : Intrinsic<[], [llvm_any_ty, llvm_anyptr_ty, llvm_i16_ty, llvm_i8_ty], [ImmArg<ArgIndex<2>>, ImmArg<ArgIndex<3>>]>; def int_spv_extractv : Intrinsic<[llvm_any_ty], [llvm_i32_ty, llvm_vararg_ty]>; def int_spv_insertv : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_any_ty, llvm_vararg_ty]>; def int_spv_extractelt : Intrinsic<[llvm_any_ty], [llvm_any_ty, llvm_anyint_ty]>; @@ -28,4 +28,5 @@ let TargetPrefix = "spv" in { def int_spv_const_composite : Intrinsic<[llvm_i32_ty], [llvm_vararg_ty]>; def int_spv_bitcast : Intrinsic<[llvm_any_ty], [llvm_any_ty]>; def int_spv_switch : Intrinsic<[], [llvm_any_ty, llvm_vararg_ty]>; + def int_spv_cmpxchg : Intrinsic<[llvm_i32_ty], [llvm_any_ty, llvm_vararg_ty]>; } diff --git a/llvm/include/llvm/IR/Metadata.h b/llvm/include/llvm/IR/Metadata.h index ec769ce95160..b90b895f32e8 100644 --- a/llvm/include/llvm/IR/Metadata.h +++ b/llvm/include/llvm/IR/Metadata.h @@ -1287,7 +1287,7 @@ public: return const_cast<MDNode *>(this)->mutable_end(); } - op_range operands() const { return op_range(op_begin(), op_end()); } + ArrayRef<MDOperand> operands() const { return getHeader().operands(); } const MDOperand &getOperand(unsigned I) const { assert(I < getNumOperands() && "Out of range"); @@ -1345,7 +1345,9 @@ class MDTuple : public MDNode { StorageType Storage, bool ShouldCreate = true); TempMDTuple cloneImpl() const { - return getTemporary(getContext(), SmallVector<Metadata *, 4>(operands())); + ArrayRef<MDOperand> Operands = operands(); + return getTemporary(getContext(), SmallVector<Metadata *, 4>( + Operands.begin(), Operands.end())); } public: diff --git a/llvm/include/llvm/IR/PatternMatch.h b/llvm/include/llvm/IR/PatternMatch.h index 7f0695b552e1..31ff63c8b660 100644 --- a/llvm/include/llvm/IR/PatternMatch.h +++ b/llvm/include/llvm/IR/PatternMatch.h @@ -153,10 +153,16 @@ inline class_match<ConstantFP> m_ConstantFP() { return class_match<ConstantFP>(); } -/// Match an arbitrary ConstantExpr and ignore it. -inline class_match<ConstantExpr> m_ConstantExpr() { - return class_match<ConstantExpr>(); -} +struct constantexpr_match { + template <typename ITy> bool match(ITy *V) { + auto *C = dyn_cast<Constant>(V); + return C && (isa<ConstantExpr>(C) || C->containsConstantExpression()); + } +}; + +/// Match a constant expression or a constant that contains a constant +/// expression. +inline constantexpr_match m_ConstantExpr() { return constantexpr_match(); } /// Match an arbitrary basic block value and ignore it. inline class_match<BasicBlock> m_BasicBlock() { @@ -741,14 +747,14 @@ inline bind_ty<const BasicBlock> m_BasicBlock(const BasicBlock *&V) { /// Match an arbitrary immediate Constant and ignore it. inline match_combine_and<class_match<Constant>, - match_unless<class_match<ConstantExpr>>> + match_unless<constantexpr_match>> m_ImmConstant() { return m_CombineAnd(m_Constant(), m_Unless(m_ConstantExpr())); } /// Match an immediate Constant, capturing the value if we match. inline match_combine_and<bind_ty<Constant>, - match_unless<class_match<ConstantExpr>>> + match_unless<constantexpr_match>> m_ImmConstant(Constant *&C) { return m_CombineAnd(m_Constant(C), m_Unless(m_ConstantExpr())); } diff --git a/llvm/include/llvm/IR/Statepoint.h b/llvm/include/llvm/IR/Statepoint.h index ba8ffbbaf397..559840a33cfd 100644 --- a/llvm/include/llvm/IR/Statepoint.h +++ b/llvm/include/llvm/IR/Statepoint.h @@ -54,7 +54,6 @@ enum class StatepointFlags { // These two are defined in IntrinsicInst since they're part of the // IntrinsicInst class hierarchy. class GCRelocateInst; -class GCResultInst; /// Represents a gc.statepoint intrinsic call. This extends directly from /// CallBase as the IntrinsicInst only supports calls and gc.statepoint is diff --git a/llvm/include/llvm/InitializePasses.h b/llvm/include/llvm/InitializePasses.h index 77f2c6330788..8cf31c08dff8 100644 --- a/llvm/include/llvm/InitializePasses.h +++ b/llvm/include/llvm/InitializePasses.h @@ -88,7 +88,6 @@ void initializeBranchProbabilityInfoWrapperPassPass(PassRegistry&); void initializeBranchRelaxationPass(PassRegistry&); void initializeBreakCriticalEdgesPass(PassRegistry&); void initializeBreakFalseDepsPass(PassRegistry&); -void initializeCanonicalizeAliasesLegacyPassPass(PassRegistry &); void initializeCanonicalizeFreezeInLoopsPass(PassRegistry &); void initializeCFGOnlyPrinterLegacyPassPass(PassRegistry&); void initializeCFGOnlyViewerLegacyPassPass(PassRegistry&); @@ -101,7 +100,6 @@ void initializeCFIFixupPass(PassRegistry&); void initializeCFIInstrInserterPass(PassRegistry&); void initializeCFLAndersAAWrapperPassPass(PassRegistry&); void initializeCFLSteensAAWrapperPassPass(PassRegistry&); -void initializeCGProfileLegacyPassPass(PassRegistry &); void initializeCallGraphDOTPrinterPass(PassRegistry&); void initializeCallGraphPrinterLegacyPassPass(PassRegistry&); void initializeCallGraphViewerPass(PassRegistry&); @@ -113,7 +111,6 @@ void initializeCodeGenPreparePass(PassRegistry&); void initializeConstantHoistingLegacyPassPass(PassRegistry&); void initializeConstantMergeLegacyPassPass(PassRegistry&); void initializeConstraintEliminationPass(PassRegistry &); -void initializeControlHeightReductionLegacyPassPass(PassRegistry&); void initializeCorrelatedValuePropagationPass(PassRegistry&); void initializeCostModelAnalysisPass(PassRegistry&); void initializeCrossDSOCFIPass(PassRegistry&); @@ -148,7 +145,6 @@ void initializeEarlyTailDuplicatePass(PassRegistry&); void initializeEdgeBundlesPass(PassRegistry&); void initializeEHContGuardCatchretPass(PassRegistry &); void initializeEliminateAvailableExternallyLegacyPassPass(PassRegistry&); -void initializeEntryExitInstrumenterPass(PassRegistry&); void initializeExpandMemCmpPassPass(PassRegistry&); void initializeExpandPostRAPass(PassRegistry&); void initializeExpandReductionsPass(PassRegistry&); @@ -165,7 +161,6 @@ void initializeFloat2IntLegacyPassPass(PassRegistry&); void initializeForceFunctionAttrsLegacyPassPass(PassRegistry&); void initializeForwardControlFlowIntegrityPass(PassRegistry&); void initializeFuncletLayoutPass(PassRegistry&); -void initializeFunctionImportLegacyPassPass(PassRegistry&); void initializeFunctionSpecializationLegacyPassPass(PassRegistry &); void initializeGCMachineCodeAnalysisPass(PassRegistry&); void initializeGCModuleInfoPass(PassRegistry&); @@ -200,8 +195,6 @@ void initializeInlineCostAnalysisPass(PassRegistry&); void initializeInstCountLegacyPassPass(PassRegistry &); void initializeInstNamerPass(PassRegistry&); void initializeInstSimplifyLegacyPassPass(PassRegistry &); -void initializeInstrProfilingLegacyPassPass(PassRegistry&); -void initializeInstrOrderFileLegacyPassPass(PassRegistry&); void initializeInstructionCombiningPassPass(PassRegistry&); void initializeInstructionSelectPass(PassRegistry&); void initializeInterleavedAccessPass(PassRegistry&); @@ -273,7 +266,6 @@ void initializeLowerWidenableConditionLegacyPassPass(PassRegistry&); void initializeLowerIntrinsicsPass(PassRegistry&); void initializeLowerInvokeLegacyPassPass(PassRegistry&); void initializeLowerSwitchLegacyPassPass(PassRegistry &); -void initializeLowerTypeTestsPass(PassRegistry&); void initializeLowerMatrixIntrinsicsLegacyPassPass(PassRegistry &); void initializeLowerMatrixIntrinsicsMinimalLegacyPassPass(PassRegistry &); void initializeMIRAddFSDiscriminatorsPass(PassRegistry &); @@ -321,7 +313,6 @@ void initializeModuleSummaryIndexWrapperPassPass(PassRegistry&); void initializeModuloScheduleTestPass(PassRegistry&); void initializeMustExecutePrinterPass(PassRegistry&); void initializeMustBeExecutedContextPrinterPass(PassRegistry&); -void initializeNameAnonGlobalLegacyPassPass(PassRegistry&); void initializeNaryReassociateLegacyPassPass(PassRegistry&); void initializeNewGVNLegacyPassPass(PassRegistry&); void initializeObjCARCAAWrapperPassPass(PassRegistry&); @@ -347,7 +338,6 @@ void initializePostDomOnlyViewerWrapperPassPass(PassRegistry &); void initializePostDomPrinterWrapperPassPass(PassRegistry &); void initializePostDomViewerWrapperPassPass(PassRegistry &); void initializePostDominatorTreeWrapperPassPass(PassRegistry&); -void initializePostInlineEntryExitInstrumenterPass(PassRegistry&); void initializePostMachineSchedulerPass(PassRegistry&); void initializePostOrderFunctionAttrsLegacyPassPass(PassRegistry&); void initializePostRAHazardRecognizerPass(PassRegistry&); @@ -393,9 +383,7 @@ void initializeSLPVectorizerPass(PassRegistry&); void initializeSROALegacyPassPass(PassRegistry&); void initializeSafeStackLegacyPassPass(PassRegistry&); void initializeSafepointIRVerifierPass(PassRegistry&); -void initializeSampleProfileLoaderLegacyPassPass(PassRegistry&); void initializeSelectOptimizePass(PassRegistry &); -void initializeModuleSanitizerCoverageLegacyPassPass(PassRegistry &); void initializeScalarEvolutionWrapperPassPass(PassRegistry&); void initializeScalarizeMaskedMemIntrinLegacyPassPass(PassRegistry &); void initializeScalarizerLegacyPassPass(PassRegistry&); @@ -448,7 +436,6 @@ void initializeVirtRegMapPass(PassRegistry&); void initializeVirtRegRewriterPass(PassRegistry&); void initializeWarnMissedTransformationsLegacyPass(PassRegistry &); void initializeWasmEHPreparePass(PassRegistry&); -void initializeWholeProgramDevirtPass(PassRegistry&); void initializeWinEHPreparePass(PassRegistry&); void initializeWriteBitcodePassPass(PassRegistry&); void initializeWriteThinLTOBitcodePass(PassRegistry&); diff --git a/llvm/include/llvm/LinkAllPasses.h b/llvm/include/llvm/LinkAllPasses.h index af5926dcb38b..e660ea05ddcf 100644 --- a/llvm/include/llvm/LinkAllPasses.h +++ b/llvm/include/llvm/LinkAllPasses.h @@ -91,7 +91,6 @@ namespace { (void) llvm::createLibCallsShrinkWrapPass(); (void) llvm::createCalledValuePropagationPass(); (void) llvm::createConstantMergePass(); - (void) llvm::createControlHeightReductionLegacyPass(); (void) llvm::createCostModelAnalysisPass(); (void) llvm::createDeadArgEliminationPass(); (void) llvm::createDeadCodeEliminationPass(); @@ -101,8 +100,6 @@ namespace { (void) llvm::createDomPrinterWrapperPassPass(); (void) llvm::createDomOnlyViewerWrapperPassPass(); (void) llvm::createDomViewerWrapperPassPass(); - (void) llvm::createInstrProfilingLegacyPass(); - (void) llvm::createFunctionImportPass(); (void) llvm::createFunctionInliningPass(); (void) llvm::createAlwaysInlinerLegacyPass(); (void) llvm::createGlobalDCEPass(); @@ -177,8 +174,6 @@ namespace { (void) llvm::createInstCountPass(); (void) llvm::createConstantHoistingPass(); (void) llvm::createCodeGenPreparePass(); - (void) llvm::createEntryExitInstrumenterPass(); - (void) llvm::createPostInlineEntryExitInstrumenterPass(); (void) llvm::createEarlyCSEPass(); (void) llvm::createGVNHoistPass(); (void) llvm::createMergedLoadStoreMotionPass(); diff --git a/llvm/include/llvm/MC/MCContext.h b/llvm/include/llvm/MC/MCContext.h index a0e18891ed90..61520c4f29bf 100644 --- a/llvm/include/llvm/MC/MCContext.h +++ b/llvm/include/llvm/MC/MCContext.h @@ -603,8 +603,6 @@ public: const MCSymbolELF *Group, const MCSectionELF *RelInfoSection); - void renameELFSection(MCSectionELF *Section, StringRef Name); - MCSectionELF *createELFGroupSection(const MCSymbolELF *Group, bool IsComdat); void recordELFMergeableSectionInfo(StringRef SectionName, unsigned Flags, diff --git a/llvm/include/llvm/MC/MCDXContainerStreamer.h b/llvm/include/llvm/MC/MCDXContainerStreamer.h index ef1a95f71778..ac2fbc6cdff3 100644 --- a/llvm/include/llvm/MC/MCDXContainerStreamer.h +++ b/llvm/include/llvm/MC/MCDXContainerStreamer.h @@ -22,8 +22,6 @@ #include "llvm/MC/MCObjectWriter.h" namespace llvm { -class MCAssembler; -class MCExpr; class MCInst; class raw_ostream; diff --git a/llvm/include/llvm/MC/MCMachObjectWriter.h b/llvm/include/llvm/MC/MCMachObjectWriter.h index 149373dd2b54..15e4652bc05d 100644 --- a/llvm/include/llvm/MC/MCMachObjectWriter.h +++ b/llvm/include/llvm/MC/MCMachObjectWriter.h @@ -263,9 +263,9 @@ public: const MCFragment &FB, bool InSet, bool IsPCRel) const override; - uint64_t writeObject(MCAssembler &Asm, const MCAsmLayout &Layout) override; + void populateAddrSigSection(MCAssembler &Asm); - void writeAddrsigSection(MCAssembler &Asm); + uint64_t writeObject(MCAssembler &Asm, const MCAsmLayout &Layout) override; }; /// Construct a new Mach-O writer instance. diff --git a/llvm/include/llvm/MC/MCSPIRVStreamer.h b/llvm/include/llvm/MC/MCSPIRVStreamer.h index 7366e0a9d82c..fc1e41c636d8 100644 --- a/llvm/include/llvm/MC/MCSPIRVStreamer.h +++ b/llvm/include/llvm/MC/MCSPIRVStreamer.h @@ -19,8 +19,6 @@ #include "llvm/MC/MCObjectWriter.h" namespace llvm { -class MCAssembler; -class MCExpr; class MCInst; class raw_ostream; diff --git a/llvm/include/llvm/MC/MCTargetOptions.h b/llvm/include/llvm/MC/MCTargetOptions.h index 9c906cdc90d0..ae305564a353 100644 --- a/llvm/include/llvm/MC/MCTargetOptions.h +++ b/llvm/include/llvm/MC/MCTargetOptions.h @@ -27,7 +27,6 @@ enum class ExceptionHandling { enum class DebugCompressionType { None, ///< No compression - GNU, ///< zlib-gnu style compression Z, ///< zlib style complession }; diff --git a/llvm/include/llvm/Object/Archive.h b/llvm/include/llvm/Object/Archive.h index a36c9bd6163b..63f0df85054e 100644 --- a/llvm/include/llvm/Object/Archive.h +++ b/llvm/include/llvm/Object/Archive.h @@ -379,10 +379,10 @@ protected: uint64_t getArchiveMagicLen() const; void setFirstRegular(const Child &C); -private: StringRef SymbolTable; StringRef StringTable; +private: StringRef FirstRegularData; uint16_t FirstRegularStartOfFile = -1; diff --git a/llvm/include/llvm/Object/DXContainer.h b/llvm/include/llvm/Object/DXContainer.h index 7aa7d8ecf4c7..9ec5b5e6b1b2 100644 --- a/llvm/include/llvm/Object/DXContainer.h +++ b/llvm/include/llvm/Object/DXContainer.h @@ -121,4 +121,4 @@ public: } // namespace object } // namespace llvm -#endif // LLVM_OBJECT_DXCONTAINERFILE_H +#endif // LLVM_OBJECT_DXCONTAINER_H diff --git a/llvm/include/llvm/Object/Decompressor.h b/llvm/include/llvm/Object/Decompressor.h index 00b6c2016742..35f4ebe2e5d6 100644 --- a/llvm/include/llvm/Object/Decompressor.h +++ b/llvm/include/llvm/Object/Decompressor.h @@ -16,8 +16,6 @@ namespace llvm { namespace object { -class SectionRef; - /// Decompressor helps to handle decompression of compressed sections. class Decompressor { public: @@ -43,19 +41,9 @@ public: /// Return memory buffer size required for decompression. uint64_t getDecompressedSize() { return DecompressedSize; } - /// Return true if section is compressed, including gnu-styled case. - static bool isCompressed(const object::SectionRef &Section); - - /// Return true if section is a ELF compressed one. - static bool isCompressedELFSection(uint64_t Flags, StringRef Name); - - /// Return true if section name matches gnu style compressed one. - static bool isGnuStyle(StringRef Name); - private: Decompressor(StringRef Data); - Error consumeCompressedGnuHeader(); Error consumeCompressedZLibHeader(bool Is64Bit, bool IsLittleEndian); StringRef SectionData; diff --git a/llvm/include/llvm/Object/ELF.h b/llvm/include/llvm/Object/ELF.h index 1a59ba94098f..794d29fd9913 100644 --- a/llvm/include/llvm/Object/ELF.h +++ b/llvm/include/llvm/Object/ELF.h @@ -181,6 +181,7 @@ public: private: StringRef Buf; + std::vector<Elf_Shdr> FakeSections; ELFFile(StringRef Object); @@ -389,6 +390,8 @@ public: Expected<ArrayRef<uint8_t>> getSectionContents(const Elf_Shdr &Sec) const; Expected<ArrayRef<uint8_t>> getSegmentContents(const Elf_Phdr &Phdr) const; Expected<std::vector<BBAddrMap>> decodeBBAddrMap(const Elf_Shdr &Sec) const; + + void createFakeSections(); }; using ELF32LEFile = ELFFile<ELF32LE>; @@ -757,11 +760,37 @@ Expected<ELFFile<ELFT>> ELFFile<ELFT>::create(StringRef Object) { return ELFFile(Object); } +/// Used by llvm-objdump -d (which needs sections for disassembly) to +/// disassemble objects without a section header table (e.g. ET_CORE objects +/// analyzed by linux perf or ET_EXEC with llvm-strip --strip-sections). +template <class ELFT> void ELFFile<ELFT>::createFakeSections() { + if (!FakeSections.empty()) + return; + auto PhdrsOrErr = program_headers(); + if (!PhdrsOrErr) + return; + + for (auto Phdr : *PhdrsOrErr) { + if (!(Phdr.p_type & ELF::PT_LOAD) || !(Phdr.p_flags & ELF::PF_X)) + continue; + Elf_Shdr FakeShdr = {}; + FakeShdr.sh_type = ELF::SHT_PROGBITS; + FakeShdr.sh_flags = ELF::SHF_ALLOC | ELF::SHF_EXECINSTR; + FakeShdr.sh_addr = Phdr.p_vaddr; + FakeShdr.sh_size = Phdr.p_memsz; + FakeShdr.sh_offset = Phdr.p_offset; + FakeSections.push_back(FakeShdr); + } +} + template <class ELFT> Expected<typename ELFT::ShdrRange> ELFFile<ELFT>::sections() const { const uintX_t SectionTableOffset = getHeader().e_shoff; - if (SectionTableOffset == 0) + if (SectionTableOffset == 0) { + if (!FakeSections.empty()) + return makeArrayRef(FakeSections.data(), FakeSections.size()); return ArrayRef<Elf_Shdr>(); + } if (getHeader().e_shentsize != sizeof(Elf_Shdr)) return createError("invalid e_shentsize in ELF header: " + diff --git a/llvm/include/llvm/Object/ELFObjectFile.h b/llvm/include/llvm/Object/ELFObjectFile.h index c449a3dafc0c..ed2f70b0da25 100644 --- a/llvm/include/llvm/Object/ELFObjectFile.h +++ b/llvm/include/llvm/Object/ELFObjectFile.h @@ -457,6 +457,8 @@ public: elf_symbol_iterator_range getDynamicSymbolIterators() const override; bool isRelocatableObject() const override; + + void createFakeSections() { EF.createFakeSections(); } }; using ELF32LEObjectFile = ELFObjectFile<ELF32LE>; diff --git a/llvm/include/llvm/Object/OffloadBinary.h b/llvm/include/llvm/Object/OffloadBinary.h index 5afc3ed295ae..4bff91c4c930 100644 --- a/llvm/include/llvm/Object/OffloadBinary.h +++ b/llvm/include/llvm/Object/OffloadBinary.h @@ -14,8 +14,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_BINARYFORMAT_OFFLOADING_H -#define LLVM_BINARYFORMAT_OFFLOADING_H +#ifndef LLVM_OBJECT_OFFLOADBINARY_H +#define LLVM_OBJECT_OFFLOADBINARY_H #include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringRef.h" @@ -80,7 +80,7 @@ public: /// Serialize the contents of \p File to a binary buffer to be read later. static std::unique_ptr<MemoryBuffer> write(const OffloadingImage &); - static uint64_t getAlignment() { return alignof(Header); } + static uint64_t getAlignment() { return 8; } ImageKind getImageKind() const { return TheEntry->TheImageKind; } OffloadKind getOffloadKind() const { return TheEntry->TheOffloadKind; } diff --git a/llvm/include/llvm/ObjectYAML/OffloadYAML.h b/llvm/include/llvm/ObjectYAML/OffloadYAML.h index a4fdbce0b320..fc7a8cc7e78e 100644 --- a/llvm/include/llvm/ObjectYAML/OffloadYAML.h +++ b/llvm/include/llvm/ObjectYAML/OffloadYAML.h @@ -76,4 +76,4 @@ template <> struct MappingTraits<OffloadYAML::Binary::Member> { } // end namespace yaml } // end namespace llvm -#endif // LLVM_OBJECTYAML_ARCHIVEYAML_H +#endif // LLVM_OBJECTYAML_OFFLOADYAML_H diff --git a/llvm/include/llvm/Passes/StandardInstrumentations.h b/llvm/include/llvm/Passes/StandardInstrumentations.h index 32ecc9ec5fb0..30287cde5de7 100644 --- a/llvm/include/llvm/Passes/StandardInstrumentations.h +++ b/llvm/include/llvm/Passes/StandardInstrumentations.h @@ -133,9 +133,9 @@ public: } bool isPoisoned() const { - return BBGuards && - std::any_of(BBGuards->begin(), BBGuards->end(), - [](const auto &BB) { return BB.second.isPoisoned(); }); + return BBGuards && llvm::any_of(*BBGuards, [](const auto &BB) { + return BB.second.isPoisoned(); + }); } static void printDiff(raw_ostream &out, const CFG &Before, diff --git a/llvm/include/llvm/ProfileData/InstrProfReader.h b/llvm/include/llvm/ProfileData/InstrProfReader.h index 3a25de05bbf1..1d1b59bb6c46 100644 --- a/llvm/include/llvm/ProfileData/InstrProfReader.h +++ b/llvm/include/llvm/ProfileData/InstrProfReader.h @@ -619,9 +619,14 @@ public: /// Read a single record. Error readNextRecord(NamedInstrProfRecord &Record) override; - /// Return the NamedInstrProfRecord associated with FuncName and FuncHash - Expected<InstrProfRecord> getInstrProfRecord(StringRef FuncName, - uint64_t FuncHash); + /// Return the NamedInstrProfRecord associated with FuncName and FuncHash. + /// When return a hash_mismatch error and MismatchedFuncSum is not nullptr, + /// the sum of all counters in the mismatched function will be set to + /// MismatchedFuncSum. If there are multiple instances of mismatched + /// functions, MismatchedFuncSum returns the maximum. + Expected<InstrProfRecord> + getInstrProfRecord(StringRef FuncName, uint64_t FuncHash, + uint64_t *MismatchedFuncSum = nullptr); /// Return the memprof record for the function identified by /// llvm::md5(Name). diff --git a/llvm/include/llvm/ProfileData/SampleProf.h b/llvm/include/llvm/ProfileData/SampleProf.h index f11392c05318..1ad83c2f5b5a 100644 --- a/llvm/include/llvm/ProfileData/SampleProf.h +++ b/llvm/include/llvm/ProfileData/SampleProf.h @@ -874,16 +874,20 @@ public: /// Return the total number of samples collected inside the function. uint64_t getTotalSamples() const { return TotalSamples; } - /// Return the total number of branch samples that have the function as the - /// branch target. This should be equivalent to the sample of the first - /// instruction of the symbol. But as we directly get this info for raw - /// profile without referring to potentially inaccurate debug info, this + /// For top-level functions, return the total number of branch samples that + /// have the function as the branch target (or 0 otherwise). This is the raw + /// data fetched from the profile. This should be equivalent to the sample of + /// the first instruction of the symbol. But as we directly get this info for + /// raw profile without referring to potentially inaccurate debug info, this /// gives more accurate profile data and is preferred for standalone symbols. uint64_t getHeadSamples() const { return TotalHeadSamples; } - /// Return the sample count of the first instruction of the function. + /// Return an estimate of the sample count of the function entry basic block. /// The function can be either a standalone symbol or an inlined function. - uint64_t getEntrySamples() const { + /// For Context-Sensitive profiles, this will prefer returning the head + /// samples (i.e. getHeadSamples()), if non-zero. Otherwise it estimates from + /// the function body's samples or callsite samples. + uint64_t getHeadSamplesEstimate() const { if (FunctionSamples::ProfileIsCS && getHeadSamples()) { // For CS profile, if we already have more accurate head samples // counted by branch sample from caller, use them as entry samples. @@ -900,7 +904,7 @@ public: // An indirect callsite may be promoted to several inlined direct calls. // We need to get the sum of them. for (const auto &N_FS : CallsiteSamples.begin()->second) - Count += N_FS.second.getEntrySamples(); + Count += N_FS.second.getHeadSamplesEstimate(); } // Return at least 1 if total sample is not 0. return Count ? Count : TotalSamples > 0; diff --git a/llvm/include/llvm/Support/AMDHSAKernelDescriptor.h b/llvm/include/llvm/Support/AMDHSAKernelDescriptor.h index 41d144cfd5c4..61b05743faf6 100644 --- a/llvm/include/llvm/Support/AMDHSAKernelDescriptor.h +++ b/llvm/include/llvm/Support/AMDHSAKernelDescriptor.h @@ -161,7 +161,8 @@ enum : int32_t { KERNEL_CODE_PROPERTY(ENABLE_SGPR_PRIVATE_SEGMENT_SIZE, 6, 1), KERNEL_CODE_PROPERTY(RESERVED0, 7, 3), KERNEL_CODE_PROPERTY(ENABLE_WAVEFRONT_SIZE32, 10, 1), // GFX10+ - KERNEL_CODE_PROPERTY(RESERVED1, 11, 5), + KERNEL_CODE_PROPERTY(USES_DYNAMIC_STACK, 11, 1), + KERNEL_CODE_PROPERTY(RESERVED1, 12, 4), }; #undef KERNEL_CODE_PROPERTY diff --git a/llvm/include/llvm/Support/Casting.h b/llvm/include/llvm/Support/Casting.h index 5444d777b749..b6bbff8ada10 100644 --- a/llvm/include/llvm/Support/Casting.h +++ b/llvm/include/llvm/Support/Casting.h @@ -265,7 +265,7 @@ struct CastIsPossible { template <typename To, typename From> struct CastIsPossible<To, Optional<From>> { static inline bool isPossible(const Optional<From> &f) { - assert(f.hasValue() && "CastIsPossible::isPossible called on a nullopt!"); + assert(f && "CastIsPossible::isPossible called on a nullopt!"); return isa_impl_wrap< To, const From, typename simplify_type<const From>::SimpleType>::doit(*f); diff --git a/llvm/include/llvm/Support/Compression.h b/llvm/include/llvm/Support/Compression.h index c99f811459ab..8500396d88a0 100644 --- a/llvm/include/llvm/Support/Compression.h +++ b/llvm/include/llvm/Support/Compression.h @@ -19,7 +19,6 @@ namespace llvm { template <typename T> class SmallVectorImpl; class Error; -class StringRef; namespace compression { namespace zlib { @@ -44,6 +43,28 @@ Error uncompress(ArrayRef<uint8_t> Input, } // End of namespace zlib +namespace zstd { + +constexpr int NoCompression = -5; +constexpr int BestSpeedCompression = 1; +constexpr int DefaultCompression = 5; +constexpr int BestSizeCompression = 12; + +bool isAvailable(); + +void compress(ArrayRef<uint8_t> Input, + SmallVectorImpl<uint8_t> &CompressedBuffer, + int Level = DefaultCompression); + +Error uncompress(ArrayRef<uint8_t> Input, uint8_t *UncompressedBuffer, + size_t &UncompressedSize); + +Error uncompress(ArrayRef<uint8_t> Input, + SmallVectorImpl<uint8_t> &UncompressedBuffer, + size_t UncompressedSize); + +} // End of namespace zstd + } // End of namespace compression } // End of namespace llvm diff --git a/llvm/include/llvm/Support/DivisionByConstantInfo.h b/llvm/include/llvm/Support/DivisionByConstantInfo.h index 896bc679885e..7d01613ce1c6 100644 --- a/llvm/include/llvm/Support/DivisionByConstantInfo.h +++ b/llvm/include/llvm/Support/DivisionByConstantInfo.h @@ -1,4 +1,4 @@ -//== llvm/Support/DivisonByConstantInfo.h - division by constant -*- C++ -*-==// +//===- llvm/Support/DivisionByConstantInfo.h ---------------------*- C++ -*-==// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -25,9 +25,9 @@ struct SignedDivisionByConstantInfo { }; /// Magic data for optimising unsigned division by a constant. -struct UnsignedDivisonByConstantInfo { - static UnsignedDivisonByConstantInfo get(const APInt &D, - unsigned LeadingZeros = 0); +struct UnsignedDivisionByConstantInfo { + static UnsignedDivisionByConstantInfo get(const APInt &D, + unsigned LeadingZeros = 0); APInt Magic; ///< magic number bool IsAdd; ///< add indicator unsigned ShiftAmount; ///< shift amount diff --git a/llvm/include/llvm/Support/JSON.h b/llvm/include/llvm/Support/JSON.h index 719e8b60d0fa..0a44aabedae6 100644 --- a/llvm/include/llvm/Support/JSON.h +++ b/llvm/include/llvm/Support/JSON.h @@ -169,44 +169,36 @@ public: emplace_back(V); } - Value &operator[](size_t I) { return V[I]; } - const Value &operator[](size_t I) const { return V[I]; } - Value &front() { return V.front(); } - const Value &front() const { return V.front(); } - Value &back() { return V.back(); } - const Value &back() const { return V.back(); } - Value *data() { return V.data(); } - const Value *data() const { return V.data(); } - - iterator begin() { return V.begin(); } - const_iterator begin() const { return V.begin(); } - iterator end() { return V.end(); } - const_iterator end() const { return V.end(); } - - bool empty() const { return V.empty(); } - size_t size() const { return V.size(); } - void reserve(size_t S) { V.reserve(S); } - - void clear() { V.clear(); } - void push_back(const Value &E) { V.push_back(E); } - void push_back(Value &&E) { V.push_back(std::move(E)); } - template <typename... Args> void emplace_back(Args &&... A) { - V.emplace_back(std::forward<Args>(A)...); - } - void pop_back() { V.pop_back(); } + Value &operator[](size_t I); + const Value &operator[](size_t I) const; + Value &front(); + const Value &front() const; + Value &back(); + const Value &back() const; + Value *data(); + const Value *data() const; + + iterator begin(); + const_iterator begin() const; + iterator end(); + const_iterator end() const; + + bool empty() const; + size_t size() const; + void reserve(size_t S); + + void clear(); + void push_back(const Value &E); + void push_back(Value &&E); + template <typename... Args> void emplace_back(Args &&...A); + void pop_back(); // FIXME: insert() takes const_iterator since C++11, old libstdc++ disagrees. - iterator insert(iterator P, const Value &E) { return V.insert(P, E); } - iterator insert(iterator P, Value &&E) { - return V.insert(P, std::move(E)); - } - template <typename It> iterator insert(iterator P, It A, It Z) { - return V.insert(P, A, Z); - } - template <typename... Args> iterator emplace(const_iterator P, Args &&... A) { - return V.emplace(P, std::forward<Args>(A)...); - } + iterator insert(iterator P, const Value &E); + iterator insert(iterator P, Value &&E); + template <typename It> iterator insert(iterator P, It A, It Z); + template <typename... Args> iterator emplace(const_iterator P, Args &&...A); - friend bool operator==(const Array &L, const Array &R) { return L.V == R.V; } + friend bool operator==(const Array &L, const Array &R); }; inline bool operator!=(const Array &L, const Array &R) { return !(L == R); } @@ -515,6 +507,48 @@ private: bool operator==(const Value &, const Value &); inline bool operator!=(const Value &L, const Value &R) { return !(L == R); } +// Array Methods +inline Value &Array::operator[](size_t I) { return V[I]; } +inline const Value &Array::operator[](size_t I) const { return V[I]; } +inline Value &Array::front() { return V.front(); } +inline const Value &Array::front() const { return V.front(); } +inline Value &Array::back() { return V.back(); } +inline const Value &Array::back() const { return V.back(); } +inline Value *Array::data() { return V.data(); } +inline const Value *Array::data() const { return V.data(); } + +inline typename Array::iterator Array::begin() { return V.begin(); } +inline typename Array::const_iterator Array::begin() const { return V.begin(); } +inline typename Array::iterator Array::end() { return V.end(); } +inline typename Array::const_iterator Array::end() const { return V.end(); } + +inline bool Array::empty() const { return V.empty(); } +inline size_t Array::size() const { return V.size(); } +inline void Array::reserve(size_t S) { V.reserve(S); } + +inline void Array::clear() { V.clear(); } +inline void Array::push_back(const Value &E) { V.push_back(E); } +inline void Array::push_back(Value &&E) { V.push_back(std::move(E)); } +template <typename... Args> inline void Array::emplace_back(Args &&...A) { + V.emplace_back(std::forward<Args>(A)...); +} +inline void Array::pop_back() { V.pop_back(); } +inline typename Array::iterator Array::insert(iterator P, const Value &E) { + return V.insert(P, E); +} +inline typename Array::iterator Array::insert(iterator P, Value &&E) { + return V.insert(P, std::move(E)); +} +template <typename It> +inline typename Array::iterator Array::insert(iterator P, It A, It Z) { + return V.insert(P, A, Z); +} +template <typename... Args> +inline typename Array::iterator Array::emplace(const_iterator P, Args &&...A) { + return V.emplace(P, std::forward<Args>(A)...); +} +inline bool operator==(const Array &L, const Array &R) { return L.V == R.V; } + /// ObjectKey is a used to capture keys in Object. Like Value but: /// - only strings are allowed /// - it's optimized for the string literal case (Owned == nullptr) diff --git a/llvm/include/llvm/Support/SpecialCaseList.h b/llvm/include/llvm/Support/SpecialCaseList.h index d022a8f53706..0d56c4b9912d 100644 --- a/llvm/include/llvm/Support/SpecialCaseList.h +++ b/llvm/include/llvm/Support/SpecialCaseList.h @@ -19,9 +19,9 @@ // prefix:wildcard_expression[=category] // If category is not specified, it is assumed to be empty string. // Definitions of "prefix" and "category" are sanitizer-specific. For example, -// sanitizer exclusion support prefixes "src", "fun" and "global". -// Wildcard expressions define, respectively, source files, functions or -// globals which shouldn't be instrumented. +// sanitizer exclusion support prefixes "src", "mainfile", "fun" and "global". +// Wildcard expressions define, respectively, source files, main files, +// functions or globals which shouldn't be instrumented. // Examples of categories: // "functional": used in DFSan to list functions with pure functional // semantics. @@ -37,6 +37,7 @@ // type:*Namespace::ClassName*=init // src:file_with_tricky_code.cc // src:ignore-global-initializers-issues.cc=init +// mainfile:main_file.cc // // [dataflow] // # Functions with pure functional semantics: diff --git a/llvm/include/llvm/Support/VirtualFileSystem.h b/llvm/include/llvm/Support/VirtualFileSystem.h index 3c99b0d8efdb..6844a406f38c 100644 --- a/llvm/include/llvm/Support/VirtualFileSystem.h +++ b/llvm/include/llvm/Support/VirtualFileSystem.h @@ -462,7 +462,6 @@ private: namespace detail { class InMemoryDirectory; -class InMemoryFile; class InMemoryNode; struct NewInMemoryNodeInfo { diff --git a/llvm/include/llvm/TableGen/DirectiveEmitter.h b/llvm/include/llvm/TableGen/DirectiveEmitter.h index d73b9ae49235..e85c13f4b7cc 100644 --- a/llvm/include/llvm/TableGen/DirectiveEmitter.h +++ b/llvm/include/llvm/TableGen/DirectiveEmitter.h @@ -174,6 +174,16 @@ public: } bool isImplicit() const { return Def->getValueAsBit("isImplicit"); } + + std::vector<StringRef> getAliases() const { + return Def->getValueAsListOfStrings("aliases"); + } + + StringRef getPrefix() const { return Def->getValueAsString("prefix"); } + + bool isPrefixOptional() const { + return Def->getValueAsBit("isPrefixOptional"); + } }; // Wrapper class that contains VersionedClause's information defined in diff --git a/llvm/include/llvm/TableGen/Record.h b/llvm/include/llvm/TableGen/Record.h index 44daad976c12..50df38e695d7 100644 --- a/llvm/include/llvm/TableGen/Record.h +++ b/llvm/include/llvm/TableGen/Record.h @@ -1558,6 +1558,7 @@ private: // Location where record was instantiated, followed by the location of // multiclass prototypes used. SmallVector<SMLoc, 4> Locs; + SmallVector<SMLoc, 0> ForwardDeclarationLocs; SmallVector<Init *, 0> TemplateArgs; SmallVector<RecordVal, 0> Values; SmallVector<AssertionInfo, 0> Assertions; @@ -1614,7 +1615,7 @@ public: return Name; } - const std::string getNameInitAsString() const { + std::string getNameInitAsString() const { return getNameInit()->getAsUnquotedString(); } @@ -1623,6 +1624,13 @@ public: ArrayRef<SMLoc> getLoc() const { return Locs; } void appendLoc(SMLoc Loc) { Locs.push_back(Loc); } + ArrayRef<SMLoc> getForwardDeclarationLocs() const { + return ForwardDeclarationLocs; + } + + // Update a class location when encountering a (re-)definition. + void updateClassLoc(SMLoc Loc); + // Make the type that this record should have based on its superclasses. RecordRecTy *getType(); diff --git a/llvm/include/llvm/Target/TargetCallingConv.td b/llvm/include/llvm/Target/TargetCallingConv.td index a719581e0ac3..18b7ff4aec95 100644 --- a/llvm/include/llvm/Target/TargetCallingConv.td +++ b/llvm/include/llvm/Target/TargetCallingConv.td @@ -141,6 +141,15 @@ class CCAssignToStackWithShadow<int size, list<Register> ShadowRegList = shadowList; } +/// CCAssignToRegAndStack - Same as CCAssignToReg, but also allocates a stack +/// slot, when some register is used. Basically, it works like: +/// CCIf<CCAssignToReg<regList>, CCAssignToStack<size, align>>. +class CCAssignToRegAndStack<list<Register> regList, int size, int align> + : CCAssignToReg<regList> { + int Size = size; + int Align = align; +} + /// CCPassByVal - This action always matches: it assigns the value to a stack /// slot to implement ByVal aggregate parameter passing. Size and alignment /// specify the minimum size and alignment for the stack slot. diff --git a/llvm/include/llvm/Target/TargetSelectionDAG.td b/llvm/include/llvm/Target/TargetSelectionDAG.td index 171fdb1b98e0..bce8f2b72ec3 100644 --- a/llvm/include/llvm/Target/TargetSelectionDAG.td +++ b/llvm/include/llvm/Target/TargetSelectionDAG.td @@ -705,6 +705,9 @@ def concat_vectors : SDNode<"ISD::CONCAT_VECTORS", def vector_extract_subvec : SDNode<"ISD::EXTRACT_SUBVECTOR", SDTypeProfile<1, 2, [SDTCisInt<2>, SDTCisVec<1>, SDTCisVec<0>]>, []>; +def vector_insert_subvec : SDNode<"ISD::INSERT_SUBVECTOR", + SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0, 1>, SDTCisVec<2>, SDTCisInt<3>]>, + []>; // This operator does subvector type checking. def extract_subvector : SDNode<"ISD::EXTRACT_SUBVECTOR", SDTSubVecExtract, []>; diff --git a/llvm/include/llvm/Transforms/IPO.h b/llvm/include/llvm/Transforms/IPO.h index 6b7d4f4821f0..0b0f30be4dc9 100644 --- a/llvm/include/llvm/Transforms/IPO.h +++ b/llvm/include/llvm/Transforms/IPO.h @@ -98,10 +98,6 @@ ModulePass *createGVExtractionPass(std::vector<GlobalValue*>& GVs, bool deleteFn = false, bool keepConstInit = false); //===----------------------------------------------------------------------===// -/// This pass performs iterative function importing from other modules. -Pass *createFunctionImportPass(); - -//===----------------------------------------------------------------------===// /// createFunctionInliningPass - Return a new pass object that uses a heuristic /// to inline direct function calls to small functions. /// @@ -239,49 +235,13 @@ enum class PassSummaryAction { Export, ///< Export information to summary. }; -/// This pass lowers type metadata and the llvm.type.test intrinsic to -/// bitsets. -/// -/// The behavior depends on the summary arguments: -/// - If ExportSummary is non-null, this pass will export type identifiers to -/// the given summary. -/// - If ImportSummary is non-null, this pass will import type identifiers from -/// the given summary. -/// - Otherwise, if both are null and DropTypeTests is true, all type test -/// assume sequences will be removed from the IR. -/// It is invalid for both ExportSummary and ImportSummary to be non-null -/// unless DropTypeTests is true. -ModulePass *createLowerTypeTestsPass(ModuleSummaryIndex *ExportSummary, - const ModuleSummaryIndex *ImportSummary, - bool DropTypeTests = false); - /// This pass export CFI checks for use by external modules. ModulePass *createCrossDSOCFIPass(); -/// This pass implements whole-program devirtualization using type -/// metadata. -/// -/// The behavior depends on the summary arguments: -/// - If ExportSummary is non-null, this pass will export type identifiers to -/// the given summary. -/// - Otherwise, if ImportSummary is non-null, this pass will import type -/// identifiers from the given summary. -/// - Otherwise it does neither. -/// It is invalid for both ExportSummary and ImportSummary to be non-null. -ModulePass * -createWholeProgramDevirtPass(ModuleSummaryIndex *ExportSummary, - const ModuleSummaryIndex *ImportSummary); - /// This pass splits globals into pieces for the benefit of whole-program /// devirtualization and control-flow integrity. ModulePass *createGlobalSplitPass(); -//===----------------------------------------------------------------------===// -// SampleProfilePass - Loads sample profile data from disk and generates -// IR metadata to reflect the profile. -ModulePass *createSampleProfileLoaderPass(); -ModulePass *createSampleProfileLoaderPass(StringRef Name); - /// Write ThinLTO-ready bitcode to Str. ModulePass *createWriteThinLTOBitcodePass(raw_ostream &Str, raw_ostream *ThinLinkOS = nullptr); diff --git a/llvm/include/llvm/Transforms/IPO/Attributor.h b/llvm/include/llvm/Transforms/IPO/Attributor.h index 17e29695ab73..8466f5612d99 100644 --- a/llvm/include/llvm/Transforms/IPO/Attributor.h +++ b/llvm/include/llvm/Transforms/IPO/Attributor.h @@ -118,7 +118,9 @@ #include "llvm/IR/ConstantRange.h" #include "llvm/IR/Constants.h" #include "llvm/IR/InstIterator.h" +#include "llvm/IR/Instruction.h" #include "llvm/IR/PassManager.h" +#include "llvm/IR/Value.h" #include "llvm/Support/Alignment.h" #include "llvm/Support/Allocator.h" #include "llvm/Support/Casting.h" @@ -155,6 +157,7 @@ namespace AA { enum ValueScope : uint8_t { Intraprocedural = 1, Interprocedural = 2, + AnyScope = Intraprocedural | Interprocedural, }; struct ValueAndContext : public std::pair<Value *, const Instruction *> { @@ -217,12 +220,11 @@ Constant *getInitialValueForObj(Value &Obj, Type &Ty, /// \returns True if \p Objects contains all assumed underlying objects, and /// false if something went wrong and the objects could not be /// determined. -bool getAssumedUnderlyingObjects(Attributor &A, const Value &Ptr, - SmallVectorImpl<Value *> &Objects, - const AbstractAttribute &QueryingAA, - const Instruction *CtxI, - bool &UsedAssumedInformation, - AA::ValueScope VS = Interprocedural); +bool getAssumedUnderlyingObjects( + Attributor &A, const Value &Ptr, SmallSetVector<Value *, 8> &Objects, + const AbstractAttribute &QueryingAA, const Instruction *CtxI, + bool &UsedAssumedInformation, AA::ValueScope VS = AA::Interprocedural, + SmallPtrSetImpl<Value *> *SeenObjects = nullptr); /// Collect all potential values \p LI could read into \p PotentialValues. That /// is, the only values read by \p LI are assumed to be known and all are in @@ -305,6 +307,24 @@ struct DenseMapInfo<AA::ValueAndContext> } }; +template <> +struct DenseMapInfo<AA::ValueScope> : public DenseMapInfo<unsigned char> { + using Base = DenseMapInfo<unsigned char>; + static inline AA::ValueScope getEmptyKey() { + return AA::ValueScope(Base::getEmptyKey()); + } + static inline AA::ValueScope getTombstoneKey() { + return AA::ValueScope(Base::getTombstoneKey()); + } + static unsigned getHashValue(const AA::ValueScope &S) { + return Base::getHashValue(S); + } + + static bool isEqual(const AA::ValueScope &LHS, const AA::ValueScope &RHS) { + return Base::isEqual(LHS, RHS); + } +}; + /// The value passed to the line option that defines the maximal initialization /// chain length. extern unsigned MaxInitializationChainLength; @@ -1643,8 +1663,6 @@ struct Attributor { /// Record that \p F is deleted after information was manifested. void deleteAfterManifest(Function &F) { - errs() << "Delete " << F.getName() << " : " << (Configuration.DeleteFns) - << "\n"; if (Configuration.DeleteFns) ToBeDeletedFunctions.insert(&F); } @@ -1664,14 +1682,16 @@ struct Attributor { /// return None, otherwise return `nullptr`. Optional<Value *> getAssumedSimplified(const IRPosition &IRP, const AbstractAttribute &AA, - bool &UsedAssumedInformation) { - return getAssumedSimplified(IRP, &AA, UsedAssumedInformation); + bool &UsedAssumedInformation, + AA::ValueScope S) { + return getAssumedSimplified(IRP, &AA, UsedAssumedInformation, S); } Optional<Value *> getAssumedSimplified(const Value &V, const AbstractAttribute &AA, - bool &UsedAssumedInformation) { + bool &UsedAssumedInformation, + AA::ValueScope S) { return getAssumedSimplified(IRPosition::value(V), AA, - UsedAssumedInformation); + UsedAssumedInformation, S); } /// If \p V is assumed simplified, return it, if it is unclear yet, @@ -1679,7 +1699,17 @@ struct Attributor { /// except that it can be used without recording dependences on any \p AA. Optional<Value *> getAssumedSimplified(const IRPosition &V, const AbstractAttribute *AA, - bool &UsedAssumedInformation); + bool &UsedAssumedInformation, + AA::ValueScope S); + + /// Try to simplify \p IRP and in the scope \p S. If successful, true is + /// returned and all potential values \p IRP can take are put into \p Values. + /// If false is returned no other information is valid. + bool getAssumedSimplifiedValues(const IRPosition &IRP, + const AbstractAttribute *AA, + SmallVectorImpl<AA::ValueAndContext> &Values, + AA::ValueScope S, + bool &UsedAssumedInformation); /// Register \p CB as a simplification callback. /// `Attributor::getAssumedSimplified` will use these callbacks before @@ -4409,6 +4439,10 @@ template <typename MemberTy> struct PotentialValuesState : AbstractState { return *this; } + bool contains(const MemberTy &V) const { + return !isValidState() ? true : Set.contains(V); + } + protected: SetTy &getAssumedSet() { assert(isValidState() && "This set shoud not be used when it is invalid!"); @@ -4490,9 +4524,12 @@ private: }; using PotentialConstantIntValuesState = PotentialValuesState<APInt>; +using PotentialLLVMValuesState = + PotentialValuesState<std::pair<AA::ValueAndContext, AA::ValueScope>>; raw_ostream &operator<<(raw_ostream &OS, const PotentialConstantIntValuesState &R); +raw_ostream &operator<<(raw_ostream &OS, const PotentialLLVMValuesState &R); /// An abstract interface for potential values analysis. /// @@ -4508,7 +4545,7 @@ raw_ostream &operator<<(raw_ostream &OS, /// 2. We tried to initialize on a Value that we cannot handle (e.g. an /// operator we do not currently handle). /// -/// TODO: Support values other than constant integers. +/// For non constant integers see AAPotentialValues. struct AAPotentialConstantValues : public StateWrapper<PotentialConstantIntValuesState, AbstractAttribute> { using Base = StateWrapper<PotentialConstantIntValuesState, AbstractAttribute>; @@ -4562,6 +4599,48 @@ struct AAPotentialConstantValues static const char ID; }; +struct AAPotentialValues + : public StateWrapper<PotentialLLVMValuesState, AbstractAttribute> { + using Base = StateWrapper<PotentialLLVMValuesState, AbstractAttribute>; + AAPotentialValues(const IRPosition &IRP, Attributor &A) : Base(IRP) {} + + /// See AbstractAttribute::getState(...). + PotentialLLVMValuesState &getState() override { return *this; } + const PotentialLLVMValuesState &getState() const override { return *this; } + + /// Create an abstract attribute view for the position \p IRP. + static AAPotentialValues &createForPosition(const IRPosition &IRP, + Attributor &A); + + /// Extract the single value in \p Values if any. + static Value *getSingleValue(Attributor &A, const AbstractAttribute &AA, + const IRPosition &IRP, + SmallVectorImpl<AA::ValueAndContext> &Values); + + /// See AbstractAttribute::getName() + const std::string getName() const override { return "AAPotentialValues"; } + + /// See AbstractAttribute::getIdAddr() + const char *getIdAddr() const override { return &ID; } + + /// This function should return true if the type of the \p AA is + /// AAPotentialValues + static bool classof(const AbstractAttribute *AA) { + return (AA->getIdAddr() == &ID); + } + + /// Unique ID (due to the unique address) + static const char ID; + +private: + virtual bool + getAssumedSimplifiedValues(Attributor &A, + SmallVectorImpl<AA::ValueAndContext> &Values, + AA::ValueScope) const = 0; + + friend struct Attributor; +}; + /// An abstract interface for all noundef attributes. struct AANoUndef : public IRAttribute<Attribute::NoUndef, @@ -4801,8 +4880,7 @@ struct AAFunctionReachability /// Can \p Inst reach \p Fn. /// See also AA::isPotentiallyReachable. virtual bool instructionCanReach(Attributor &A, const Instruction &Inst, - const Function &Fn, - bool UseBackwards = true) const = 0; + const Function &Fn) const = 0; /// Create an abstract attribute view for the position \p IRP. static AAFunctionReachability &createForPosition(const IRPosition &IRP, @@ -4834,19 +4912,36 @@ struct AAPointerInfo : public AbstractAttribute { AAPointerInfo(const IRPosition &IRP) : AbstractAttribute(IRP) {} enum AccessKind { - AK_READ = 1 << 0, - AK_WRITE = 1 << 1, - AK_READ_WRITE = AK_READ | AK_WRITE, + // First two bits to distinguish may and must accesses + AK_MUST = 1 << 0, + AK_MAY = 1 << 1, + + // Then two bits for read and write. These are not exclusive. + AK_R = 1 << 2, + AK_W = 1 << 3, + AK_RW = AK_R | AK_W, + + // Helper for easy access. + AK_MAY_READ = AK_MAY | AK_R, + AK_MAY_WRITE = AK_MAY | AK_W, + AK_MAY_READ_WRITE = AK_MAY | AK_R | AK_W, + AK_MUST_READ = AK_MUST | AK_R, + AK_MUST_WRITE = AK_MUST | AK_W, + AK_MUST_READ_WRITE = AK_MUST | AK_R | AK_W, }; /// An access description. struct Access { Access(Instruction *I, Optional<Value *> Content, AccessKind Kind, Type *Ty) - : LocalI(I), RemoteI(I), Content(Content), Kind(Kind), Ty(Ty) {} + : LocalI(I), RemoteI(I), Content(Content), Kind(Kind), Ty(Ty) { + verify(); + } Access(Instruction *LocalI, Instruction *RemoteI, Optional<Value *> Content, AccessKind Kind, Type *Ty) : LocalI(LocalI), RemoteI(RemoteI), Content(Content), Kind(Kind), - Ty(Ty) {} + Ty(Ty) { + verify(); + } Access(const Access &Other) = default; Access(const Access &&Other) : LocalI(Other.LocalI), RemoteI(Other.RemoteI), Content(Other.Content), @@ -4867,14 +4962,22 @@ struct AAPointerInfo : public AbstractAttribute { return *this; } + void verify() { + assert(isMustAccess() + isMayAccess() == 1 && + "Expect must or may access, not both."); + } + /// Return the access kind. AccessKind getKind() const { return Kind; } /// Return true if this is a read access. - bool isRead() const { return Kind & AK_READ; } + bool isRead() const { return Kind & AK_R; } /// Return true if this is a write access. - bool isWrite() const { return Kind & AK_WRITE; } + bool isWrite() const { return Kind & AK_W; } + + bool isMustAccess() const { return Kind & AK_MUST; } + bool isMayAccess() const { return Kind & AK_MAY; } /// Return the instruction that causes the access with respect to the local /// scope of the associated attribute. @@ -4887,7 +4990,9 @@ struct AAPointerInfo : public AbstractAttribute { bool isWrittenValueYetUndetermined() const { return !Content; } /// Return true if the value written cannot be determined at all. - bool isWrittenValueUnknown() const { return Content && !*Content; } + bool isWrittenValueUnknown() const { + return Content.has_value() && !*Content; + } /// Return the type associated with the access, if known. Type *getType() const { return Ty; } @@ -4976,10 +5081,14 @@ struct AAPointerInfo : public AbstractAttribute { /// return true if all such accesses were known and the callback returned true /// for all of them, false otherwise. In contrast to forallInterferingAccesses /// this function will perform reasoning to exclude write accesses that cannot - /// affect the load even if they on the surface look as if they would. - virtual bool forallInterferingAccesses( - Attributor &A, const AbstractAttribute &QueryingAA, Instruction &I, - function_ref<bool(const Access &, bool)> CB) const = 0; + /// affect the load even if they on the surface look as if they would. The + /// flag \p HasBeenWrittenTo will be set to true if we know that \p I does not + /// read the intial value of the underlying memory. + virtual bool + forallInterferingAccesses(Attributor &A, const AbstractAttribute &QueryingAA, + Instruction &I, + function_ref<bool(const Access &, bool)> CB, + bool &HasBeenWrittenTo) const = 0; /// This function should return true if the type of the \p AA is AAPointerInfo static bool classof(const AbstractAttribute *AA) { diff --git a/llvm/include/llvm/Transforms/IPO/PassManagerBuilder.h b/llvm/include/llvm/Transforms/IPO/PassManagerBuilder.h index c8ea1f5b6624..2e882eeb8bac 100644 --- a/llvm/include/llvm/Transforms/IPO/PassManagerBuilder.h +++ b/llvm/include/llvm/Transforms/IPO/PassManagerBuilder.h @@ -164,26 +164,10 @@ public: bool VerifyInput; bool VerifyOutput; bool MergeFunctions; - bool PrepareForLTO; - bool PrepareForThinLTO; - bool PerformThinLTO; bool DivergentTarget; unsigned LicmMssaOptCap; unsigned LicmMssaNoAccForPromotionCap; - /// Enable profile instrumentation pass. - bool EnablePGOInstrGen; - /// Enable profile context sensitive instrumentation pass. - bool EnablePGOCSInstrGen; - /// Enable profile context sensitive profile use pass. - bool EnablePGOCSInstrUse; - /// Profile data file name that the instrumentation will be written to. - std::string PGOInstrGen; - /// Path of the profile data file. - std::string PGOInstrUse; - /// Path of the sample Profile data file. - std::string PGOSampleUse; - private: /// ExtensionList - This is list of all of the extensions that are registered. std::vector<std::pair<ExtensionPointTy, ExtensionFn>> Extensions; diff --git a/llvm/include/llvm/Transforms/IPO/ProfiledCallGraph.h b/llvm/include/llvm/Transforms/IPO/ProfiledCallGraph.h index fff06da22cf3..c41871e33eaf 100644 --- a/llvm/include/llvm/Transforms/IPO/ProfiledCallGraph.h +++ b/llvm/include/llvm/Transforms/IPO/ProfiledCallGraph.h @@ -105,7 +105,7 @@ public: if (!CalleeSamples || !CallerSamples) { Weight = 0; } else { - uint64_t CalleeEntryCount = CalleeSamples->getEntrySamples(); + uint64_t CalleeEntryCount = CalleeSamples->getHeadSamplesEstimate(); uint64_t CallsiteCount = 0; LineLocation Callsite = Callee->getCallSiteLoc(); if (auto CallTargets = CallerSamples->findCallTargetMapAt(Callsite)) { @@ -169,7 +169,7 @@ private: for (const auto &InlinedSamples : CallsiteSamples.second) { addProfiledFunction(InlinedSamples.first); addProfiledCall(Samples.getFuncName(), InlinedSamples.first, - InlinedSamples.second.getEntrySamples()); + InlinedSamples.second.getHeadSamplesEstimate()); addProfiledCalls(InlinedSamples.second); } } diff --git a/llvm/include/llvm/Transforms/InstCombine/InstCombiner.h b/llvm/include/llvm/Transforms/InstCombine/InstCombiner.h index ae19fbfb49a7..87eeb8e020a6 100644 --- a/llvm/include/llvm/Transforms/InstCombine/InstCombiner.h +++ b/llvm/include/llvm/Transforms/InstCombine/InstCombiner.h @@ -425,7 +425,7 @@ public: // If we are replacing the instruction with itself, this must be in a // segment of unreachable code, so just clobber the instruction. if (&I == V) - V = UndefValue::get(I.getType()); + V = PoisonValue::get(I.getType()); LLVM_DEBUG(dbgs() << "IC: Replacing " << I << "\n" << " with " << *V << '\n'); diff --git a/llvm/include/llvm/Transforms/Instrumentation.h b/llvm/include/llvm/Transforms/Instrumentation.h index 9ff45fc29b06..0c688e3bdaf6 100644 --- a/llvm/include/llvm/Transforms/Instrumentation.h +++ b/llvm/include/llvm/Transforms/Instrumentation.h @@ -28,7 +28,6 @@ namespace llvm { class Triple; -class FunctionPass; class ModulePass; class OptimizationRemarkEmitter; class Comdat; @@ -79,8 +78,6 @@ struct GCOVOptions { std::string Exclude; }; -ModulePass *createCGProfileLegacyPass(); - // The pgo-specific indirect call promotion function declared below is used by // the pgo-driven indirect call promotion and sample profile passes. It's a // wrapper around llvm::promoteCall, et al. that additionally computes !prof @@ -126,13 +123,6 @@ struct InstrProfOptions { InstrProfOptions() = default; }; -/// Insert frontend instrumentation based profiling. Parameter IsCS indicates if -// this is the context sensitive instrumentation. -ModulePass *createInstrProfilingLegacyPass( - const InstrProfOptions &Options = InstrProfOptions(), bool IsCS = false); - -ModulePass *createInstrOrderFilePass(); - // Insert DataFlowSanitizer (dynamic data flow analysis) instrumentation ModulePass *createDataFlowSanitizerLegacyPassPass( const std::vector<std::string> &ABIListFiles = std::vector<std::string>()); diff --git a/llvm/include/llvm/Transforms/Instrumentation/AddressSanitizer.h b/llvm/include/llvm/Transforms/Instrumentation/AddressSanitizer.h index d12b2cf45825..f56ec6ff682f 100644 --- a/llvm/include/llvm/Transforms/Instrumentation/AddressSanitizer.h +++ b/llvm/include/llvm/Transforms/Instrumentation/AddressSanitizer.h @@ -17,12 +17,7 @@ #include "llvm/Transforms/Instrumentation/AddressSanitizerOptions.h" namespace llvm { -class Function; -class FunctionPass; -class GlobalVariable; -class MDNode; class Module; -class ModulePass; class raw_ostream; struct AddressSanitizerOptions { diff --git a/llvm/include/llvm/Transforms/Instrumentation/HWAddressSanitizer.h b/llvm/include/llvm/Transforms/Instrumentation/HWAddressSanitizer.h index d3b5b5ca5c25..11ea66780d8c 100644 --- a/llvm/include/llvm/Transforms/Instrumentation/HWAddressSanitizer.h +++ b/llvm/include/llvm/Transforms/Instrumentation/HWAddressSanitizer.h @@ -17,7 +17,6 @@ #include "llvm/IR/PassManager.h" namespace llvm { -class FunctionPass; class Module; class StringRef; class raw_ostream; diff --git a/llvm/include/llvm/Transforms/Instrumentation/MemorySanitizer.h b/llvm/include/llvm/Transforms/Instrumentation/MemorySanitizer.h index e4654a0fc7ef..ad92ed1c6330 100644 --- a/llvm/include/llvm/Transforms/Instrumentation/MemorySanitizer.h +++ b/llvm/include/llvm/Transforms/Instrumentation/MemorySanitizer.h @@ -18,7 +18,6 @@ namespace llvm { class Function; -class FunctionPass; class Module; class StringRef; class raw_ostream; diff --git a/llvm/include/llvm/Transforms/Instrumentation/SanitizerCoverage.h b/llvm/include/llvm/Transforms/Instrumentation/SanitizerCoverage.h index 9bacb7eb38a5..dfa85fde28e6 100644 --- a/llvm/include/llvm/Transforms/Instrumentation/SanitizerCoverage.h +++ b/llvm/include/llvm/Transforms/Instrumentation/SanitizerCoverage.h @@ -8,8 +8,7 @@ // //===----------------------------------------------------------------------===// // -// This file declares the SanitizerCoverage class which is a port of the legacy -// SanitizerCoverage pass to use the new PassManager infrastructure. +// SanitizerCoverage is a simple code coverage implementation. // //===----------------------------------------------------------------------===// @@ -23,7 +22,6 @@ namespace llvm { class Module; -class ModulePass; /// This is the ModuleSanitizerCoverage pass used in the new pass manager. The /// pass instruments functions for coverage, adds initialization calls to the @@ -56,13 +54,6 @@ private: std::unique_ptr<SpecialCaseList> Blocklist; }; -// Insert SanitizerCoverage instrumentation. -ModulePass *createModuleSanitizerCoverageLegacyPassPass( - const SanitizerCoverageOptions &Options = SanitizerCoverageOptions(), - const std::vector<std::string> &AllowlistFiles = std::vector<std::string>(), - const std::vector<std::string> &BlocklistFiles = - std::vector<std::string>()); - } // namespace llvm #endif diff --git a/llvm/include/llvm/Transforms/Instrumentation/ThreadSanitizer.h b/llvm/include/llvm/Transforms/Instrumentation/ThreadSanitizer.h index b3a067ba59c2..fd37130d5459 100644 --- a/llvm/include/llvm/Transforms/Instrumentation/ThreadSanitizer.h +++ b/llvm/include/llvm/Transforms/Instrumentation/ThreadSanitizer.h @@ -17,7 +17,6 @@ namespace llvm { class Function; -class FunctionPass; class Module; /// A function pass for tsan instrumentation. diff --git a/llvm/include/llvm/Transforms/Scalar.h b/llvm/include/llvm/Transforms/Scalar.h index edd492b0343d..5f852963c687 100644 --- a/llvm/include/llvm/Transforms/Scalar.h +++ b/llvm/include/llvm/Transforms/Scalar.h @@ -521,10 +521,6 @@ FunctionPass *createLoopVersioningPass(); // FunctionPass *createLoopDataPrefetchPass(); -///===---------------------------------------------------------------------===// -ModulePass *createNameAnonGlobalPass(); -ModulePass *createCanonicalizeAliasesPass(); - //===----------------------------------------------------------------------===// // // LibCallsShrinkWrap - Shrink-wraps a call to function if the result is not diff --git a/llvm/include/llvm/Transforms/Utils.h b/llvm/include/llvm/Transforms/Utils.h index ebd4bd318573..5abfb2cceb58 100644 --- a/llvm/include/llvm/Transforms/Utils.h +++ b/llvm/include/llvm/Transforms/Utils.h @@ -50,15 +50,6 @@ extern char &LowerSwitchID; //===----------------------------------------------------------------------===// // -// EntryExitInstrumenter pass - Instrument function entry/exit with calls to -// mcount(), @__cyg_profile_func_{enter,exit} and the like. There are two -// variants, intended to run pre- and post-inlining, respectively. -// -FunctionPass *createEntryExitInstrumenterPass(); -FunctionPass *createPostInlineEntryExitInstrumenterPass(); - -//===----------------------------------------------------------------------===// -// // BreakCriticalEdges - Break all of the critical edges in the CFG by inserting // a dummy basic block. This pass may be "required" by passes that cannot deal // with critical edges. For this usage, a pass must call: @@ -115,13 +106,6 @@ ModulePass *createStripNonLineTableDebugLegacyPass(); //===----------------------------------------------------------------------===// // -// ControlHeightReudction - Merges conditional blocks of code and reduces the -// number of conditional branches in the hot paths based on profiles. -// -FunctionPass *createControlHeightReductionLegacyPass(); - -//===----------------------------------------------------------------------===// -// // InjectTLIMappingsLegacy - populates the VFABI attribute with the // scalar-to-vector mappings from the TargetLibraryInfo. // diff --git a/llvm/include/llvm/Transforms/Utils/LowerAtomic.h b/llvm/include/llvm/Transforms/Utils/LowerAtomic.h index c85f8e3a5646..43a407d35909 100644 --- a/llvm/include/llvm/Transforms/Utils/LowerAtomic.h +++ b/llvm/include/llvm/Transforms/Utils/LowerAtomic.h @@ -11,8 +11,8 @@ /// //===----------------------------------------------------------------------===// -#ifndef LLVM_TRANSFORMS_SCALAR_LOWERATOMIC_H -#define LLVM_TRANSFORMS_SCALAR_LOWERATOMIC_H +#ifndef LLVM_TRANSFORMS_UTILS_LOWERATOMIC_H +#define LLVM_TRANSFORMS_UTILS_LOWERATOMIC_H #include "llvm/IR/Instructions.h" @@ -34,4 +34,4 @@ Value *buildAtomicRMWValue(AtomicRMWInst::BinOp Op, IRBuilderBase &Builder, Value *Loaded, Value *Inc); } -#endif // LLVM_TRANSFORMS_SCALAR_LOWERATOMIC_H +#endif // LLVM_TRANSFORMS_UTILS_LOWERATOMIC_H diff --git a/llvm/include/llvm/Transforms/Utils/MisExpect.h b/llvm/include/llvm/Transforms/Utils/MisExpect.h index 064eeac4c669..75eb97646770 100644 --- a/llvm/include/llvm/Transforms/Utils/MisExpect.h +++ b/llvm/include/llvm/Transforms/Utils/MisExpect.h @@ -14,6 +14,9 @@ // //===----------------------------------------------------------------------===// +#ifndef LLVM_TRANSFORMS_UTILS_MISEXPECT_H +#define LLVM_TRANSFORMS_UTILS_MISEXPECT_H + #include "llvm/ADT/SmallVector.h" #include "llvm/IR/Function.h" #include "llvm/IR/Instructions.h" @@ -75,3 +78,5 @@ void checkExpectAnnotations(Instruction &I, } // namespace misexpect } // namespace llvm + +#endif diff --git a/llvm/include/llvm/Transforms/Utils/ScalarEvolutionExpander.h b/llvm/include/llvm/Transforms/Utils/ScalarEvolutionExpander.h index 260ed1a97831..4f878928a7bf 100644 --- a/llvm/include/llvm/Transforms/Utils/ScalarEvolutionExpander.h +++ b/llvm/include/llvm/Transforms/Utils/ScalarEvolutionExpander.h @@ -28,19 +28,6 @@ namespace llvm { extern cl::opt<unsigned> SCEVCheapExpansionBudget; -/// Return true if the given expression is safe to expand in the sense that -/// all materialized values are safe to speculate anywhere their operands are -/// defined, and the expander is capable of expanding the expression. -/// CanonicalMode indicates whether the expander will be used in canonical mode. -bool isSafeToExpand(const SCEV *S, ScalarEvolution &SE, - bool CanonicalMode = true); - -/// Return true if the given expression is safe to expand in the sense that -/// all materialized values are defined and safe to speculate at the specified -/// location and their operands are defined at this location. -bool isSafeToExpandAt(const SCEV *S, const Instruction *InsertionPoint, - ScalarEvolution &SE); - /// struct for holding enough information to help calculate the cost of the /// given SCEV when expanded into IR. struct SCEVOperand { @@ -270,6 +257,16 @@ public: SmallVectorImpl<WeakTrackingVH> &DeadInsts, const TargetTransformInfo *TTI = nullptr); + /// Return true if the given expression is safe to expand in the sense that + /// all materialized values are safe to speculate anywhere their operands are + /// defined, and the expander is capable of expanding the expression. + bool isSafeToExpand(const SCEV *S) const; + + /// Return true if the given expression is safe to expand in the sense that + /// all materialized values are defined and safe to speculate at the specified + /// location and their operands are defined at this location. + bool isSafeToExpandAt(const SCEV *S, const Instruction *InsertionPoint) const; + /// Insert code to directly compute the specified SCEV expression into the /// program. The code is inserted into the specified block. Value *expandCodeFor(const SCEV *SH, Type *Ty, Instruction *I) { diff --git a/llvm/include/llvm/WindowsDriver/MSVCPaths.h b/llvm/include/llvm/WindowsDriver/MSVCPaths.h index 7256a4f66eaa..66c93cd55e20 100644 --- a/llvm/include/llvm/WindowsDriver/MSVCPaths.h +++ b/llvm/include/llvm/WindowsDriver/MSVCPaths.h @@ -6,8 +6,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_SUPPORT_MSVCPATHS_H -#define LLVM_SUPPORT_MSVCPATHS_H +#ifndef LLVM_WINDOWSDRIVER_MSVCPATHS_H +#define LLVM_WINDOWSDRIVER_MSVCPATHS_H #include "llvm/ADT/Optional.h" #include "llvm/ADT/SmallString.h" diff --git a/llvm/lib/Analysis/AliasSetTracker.cpp b/llvm/lib/Analysis/AliasSetTracker.cpp index 234a73bff6a8..bb25244a88dd 100644 --- a/llvm/lib/Analysis/AliasSetTracker.cpp +++ b/llvm/lib/Analysis/AliasSetTracker.cpp @@ -579,7 +579,7 @@ AliasSet &AliasSetTracker::mergeAllAliasSets() { AliasAnyAS->Access = AliasSet::ModRefAccess; AliasAnyAS->AliasAny = true; - for (auto Cur : ASVector) { + for (auto *Cur : ASVector) { // If Cur was already forwarding, just forward to the new AS instead. AliasSet *FwdTo = Cur->Forward; if (FwdTo) { diff --git a/llvm/lib/Analysis/BasicAliasAnalysis.cpp b/llvm/lib/Analysis/BasicAliasAnalysis.cpp index c78f822b8bcf..c3b032abcba2 100644 --- a/llvm/lib/Analysis/BasicAliasAnalysis.cpp +++ b/llvm/lib/Analysis/BasicAliasAnalysis.cpp @@ -1764,7 +1764,7 @@ bool BasicAAResult::isValueEqualInPotentialCycles(const Value *V, // Make sure that the visited phis cannot reach the Value. This ensures that // the Values cannot come from different iterations of a potential cycle the // phi nodes could be involved in. - for (auto *P : VisitedPhiBBs) + for (const auto *P : VisitedPhiBBs) if (isPotentiallyReachable(&P->front(), Inst, nullptr, DT)) return false; diff --git a/llvm/lib/Analysis/BranchProbabilityInfo.cpp b/llvm/lib/Analysis/BranchProbabilityInfo.cpp index 428ae8975c30..f45728768fcd 100644 --- a/llvm/lib/Analysis/BranchProbabilityInfo.cpp +++ b/llvm/lib/Analysis/BranchProbabilityInfo.cpp @@ -1250,7 +1250,7 @@ void BranchProbabilityInfo::calculate(const Function &F, const LoopInfo &LoopI, // Walk the basic blocks in post-order so that we can build up state about // the successors of a block iteratively. - for (auto BB : post_order(&F.getEntryBlock())) { + for (const auto *BB : post_order(&F.getEntryBlock())) { LLVM_DEBUG(dbgs() << "Computing probabilities for " << BB->getName() << "\n"); // If there is no at least two successors, no sense to set probability. diff --git a/llvm/lib/Analysis/CFG.cpp b/llvm/lib/Analysis/CFG.cpp index 1902d72f2f89..e5dd45842d6a 100644 --- a/llvm/lib/Analysis/CFG.cpp +++ b/llvm/lib/Analysis/CFG.cpp @@ -149,7 +149,7 @@ bool llvm::isPotentiallyReachableFromMany( // untrue. SmallPtrSet<const Loop *, 8> LoopsWithHoles; if (LI && ExclusionSet) { - for (auto BB : *ExclusionSet) { + for (auto *BB : *ExclusionSet) { if (const Loop *L = getOutermostLoop(LI, BB)) LoopsWithHoles.insert(L); } diff --git a/llvm/lib/Analysis/CFLAndersAliasAnalysis.cpp b/llvm/lib/Analysis/CFLAndersAliasAnalysis.cpp index 602a01867f3b..d70e1b21d768 100644 --- a/llvm/lib/Analysis/CFLAndersAliasAnalysis.cpp +++ b/llvm/lib/Analysis/CFLAndersAliasAnalysis.cpp @@ -615,7 +615,7 @@ static void initializeWorkList(std::vector<WorkListItem> &WorkList, auto Src = InstantiatedValue{Val, I}; // If there's an assignment edge from X to Y, it means Y is reachable from // X at S3 and X is reachable from Y at S1 - for (auto &Edge : ValueInfo.getNodeInfoAtLevel(I).Edges) { + for (const auto &Edge : ValueInfo.getNodeInfoAtLevel(I).Edges) { propagate(Edge.Other, Src, MatchState::FlowFromReadOnly, ReachSet, WorkList); propagate(Src, Edge.Other, MatchState::FlowToWriteOnly, ReachSet, diff --git a/llvm/lib/Analysis/CFLGraph.h b/llvm/lib/Analysis/CFLGraph.h index 60fc8d18678c..47bb02ac8e8b 100644 --- a/llvm/lib/Analysis/CFLGraph.h +++ b/llvm/lib/Analysis/CFLGraph.h @@ -434,7 +434,8 @@ template <typename CFLAA> class CFLGraphBuilder { // introduce any aliases. // TODO: address other common library functions such as realloc(), // strdup(), etc. - if (isMallocOrCallocLikeFn(&Call, &TLI) || isFreeCall(&Call, &TLI)) + if (isMallocOrCallocLikeFn(&Call, &TLI) || + getFreedOperand(&Call, &TLI) != nullptr) return; // TODO: Add support for noalias args/all the other fun function diff --git a/llvm/lib/Analysis/CFLSteensAliasAnalysis.cpp b/llvm/lib/Analysis/CFLSteensAliasAnalysis.cpp index f92869c2ec63..33ed6f88f82b 100644 --- a/llvm/lib/Analysis/CFLSteensAliasAnalysis.cpp +++ b/llvm/lib/Analysis/CFLSteensAliasAnalysis.cpp @@ -216,7 +216,7 @@ CFLSteensAAResult::FunctionInfo CFLSteensAAResult::buildSetsFrom(Function *Fn) { for (unsigned I = 0, E = ValueInfo.getNumLevels(); I < E; ++I) { auto Src = InstantiatedValue{Val, I}; - for (auto &Edge : ValueInfo.getNodeInfoAtLevel(I).Edges) + for (const auto &Edge : ValueInfo.getNodeInfoAtLevel(I).Edges) SetBuilder.addWith(Src, Edge.Other); } } diff --git a/llvm/lib/Analysis/ConstraintSystem.cpp b/llvm/lib/Analysis/ConstraintSystem.cpp index dc774728ab3d..2efa474f3552 100644 --- a/llvm/lib/Analysis/ConstraintSystem.cpp +++ b/llvm/lib/Analysis/ConstraintSystem.cpp @@ -110,7 +110,7 @@ void ConstraintSystem::dump(ArrayRef<std::string> Names) const { if (Constraints.empty()) return; - for (auto &Row : Constraints) { + for (const auto &Row : Constraints) { SmallVector<std::string, 16> Parts; for (unsigned I = 1, S = Row.size(); I < S; ++I) { if (Row[I] == 0) diff --git a/llvm/lib/Analysis/CostModel.cpp b/llvm/lib/Analysis/CostModel.cpp index 52e424ae324b..3c162f604cd5 100644 --- a/llvm/lib/Analysis/CostModel.cpp +++ b/llvm/lib/Analysis/CostModel.cpp @@ -25,6 +25,7 @@ #include "llvm/Pass.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/IR/IntrinsicInst.h" using namespace llvm; static cl::opt<TargetTransformInfo::TargetCostKind> CostKind( @@ -39,6 +40,9 @@ static cl::opt<TargetTransformInfo::TargetCostKind> CostKind( clEnumValN(TargetTransformInfo::TCK_SizeAndLatency, "size-latency", "Code size and latency"))); +static cl::opt<bool> TypeBasedIntrinsicCost("type-based-intrinsic-cost", + cl::desc("Calculate intrinsics cost based only on argument types"), + cl::init(false)); #define CM_NAME "cost-model" #define DEBUG_TYPE CM_NAME @@ -103,7 +107,16 @@ void CostModelAnalysis::print(raw_ostream &OS, const Module*) const { for (BasicBlock &B : *F) { for (Instruction &Inst : B) { - InstructionCost Cost = TTI->getInstructionCost(&Inst, CostKind); + InstructionCost Cost; + if (TypeBasedIntrinsicCost && isa<IntrinsicInst>(&Inst)) { + auto *II = dyn_cast<IntrinsicInst>(&Inst); + IntrinsicCostAttributes ICA(II->getIntrinsicID(), *II, + InstructionCost::getInvalid(), true); + Cost = TTI->getIntrinsicInstrCost(ICA, CostKind); + } + else { + Cost = TTI->getInstructionCost(&Inst, CostKind); + } if (auto CostVal = Cost.getValue()) OS << "Cost Model: Found an estimated cost of " << *CostVal; else @@ -122,7 +135,16 @@ PreservedAnalyses CostModelPrinterPass::run(Function &F, for (Instruction &Inst : B) { // TODO: Use a pass parameter instead of cl::opt CostKind to determine // which cost kind to print. - InstructionCost Cost = TTI.getInstructionCost(&Inst, CostKind); + InstructionCost Cost; + if (TypeBasedIntrinsicCost && isa<IntrinsicInst>(&Inst)) { + auto *II = dyn_cast<IntrinsicInst>(&Inst); + IntrinsicCostAttributes ICA(II->getIntrinsicID(), *II, + InstructionCost::getInvalid(), true); + Cost = TTI.getIntrinsicInstrCost(ICA, CostKind); + } + else { + Cost = TTI.getInstructionCost(&Inst, CostKind); + } if (auto CostVal = Cost.getValue()) OS << "Cost Model: Found an estimated cost of " << *CostVal; else diff --git a/llvm/lib/Analysis/DDG.cpp b/llvm/lib/Analysis/DDG.cpp index 998c888dd2d9..da64ef153960 100644 --- a/llvm/lib/Analysis/DDG.cpp +++ b/llvm/lib/Analysis/DDG.cpp @@ -95,7 +95,7 @@ raw_ostream &llvm::operator<<(raw_ostream &OS, const DDGNode &N) { llvm_unreachable("unimplemented type of node"); OS << (N.getEdges().empty() ? " Edges:none!\n" : " Edges:\n"); - for (auto &E : N.getEdges()) + for (const auto &E : N.getEdges()) OS.indent(2) << *E; return OS; } @@ -188,7 +188,7 @@ DataDependenceGraph::DataDependenceGraph(Function &F, DependenceInfo &D) // Put the basic blocks in program order for correct dependence // directions. BasicBlockListType BBList; - for (auto &SCC : make_range(scc_begin(&F), scc_end(&F))) + for (const auto &SCC : make_range(scc_begin(&F), scc_end(&F))) append_range(BBList, SCC); std::reverse(BBList.begin(), BBList.end()); DDGBuilder(*this, D, BBList).populate(); diff --git a/llvm/lib/Analysis/Delinearization.cpp b/llvm/lib/Analysis/Delinearization.cpp index c36e1d922915..7ab91b9eaea4 100644 --- a/llvm/lib/Analysis/Delinearization.cpp +++ b/llvm/lib/Analysis/Delinearization.cpp @@ -133,7 +133,7 @@ struct SCEVCollectAddRecMultiplies { if (auto *Mul = dyn_cast<SCEVMulExpr>(S)) { bool HasAddRec = false; SmallVector<const SCEV *, 0> Operands; - for (auto Op : Mul->operands()) { + for (const auto *Op : Mul->operands()) { const SCEVUnknown *Unknown = dyn_cast<SCEVUnknown>(Op); if (Unknown && !isa<CallInst>(Unknown->getValue())) { Operands.push_back(Op); diff --git a/llvm/lib/Analysis/DivergenceAnalysis.cpp b/llvm/lib/Analysis/DivergenceAnalysis.cpp index 1a4b09e0cac2..02c40d2640c1 100644 --- a/llvm/lib/Analysis/DivergenceAnalysis.cpp +++ b/llvm/lib/Analysis/DivergenceAnalysis.cpp @@ -220,19 +220,19 @@ void DivergenceAnalysisImpl::analyzeLoopExitDivergence( // phi nodes at the fringes of the dominance region if (!DT.dominates(&LoopHeader, UserBlock)) { // all PHI nodes of UserBlock become divergent - for (auto &Phi : UserBlock->phis()) { + for (const auto &Phi : UserBlock->phis()) { analyzeTemporalDivergence(Phi, OuterDivLoop); } continue; } // Taint outside users of values carried by OuterDivLoop. - for (auto &I : *UserBlock) { + for (const auto &I : *UserBlock) { analyzeTemporalDivergence(I, OuterDivLoop); } // visit all blocks in the dominance region - for (auto *SuccBlock : successors(UserBlock)) { + for (const auto *SuccBlock : successors(UserBlock)) { if (!Visited.insert(SuccBlock).second) { continue; } @@ -399,7 +399,7 @@ DivergenceAnalysisPrinterPass::run(Function &F, FunctionAnalysisManager &FAM) { } for (const BasicBlock &BB : F) { OS << "\n " << BB.getName() << ":\n"; - for (auto &I : BB.instructionsWithoutDebug()) { + for (const auto &I : BB.instructionsWithoutDebug()) { OS << (DI.isDivergent(I) ? "DIVERGENT: " : " "); OS << I << "\n"; } diff --git a/llvm/lib/Analysis/GlobalsModRef.cpp b/llvm/lib/Analysis/GlobalsModRef.cpp index db6eae0d962a..e6ef1c78a628 100644 --- a/llvm/lib/Analysis/GlobalsModRef.cpp +++ b/llvm/lib/Analysis/GlobalsModRef.cpp @@ -361,7 +361,7 @@ bool GlobalsAAResult::AnalyzeUsesOfPointer(Value *V, if (Call->isDataOperand(&U)) { // Detect calls to free. if (Call->isArgOperand(&U) && - isFreeCall(I, &GetTLI(*Call->getFunction()))) { + getFreedOperand(Call, &GetTLI(*Call->getFunction())) == U) { if (Writers) Writers->insert(Call->getParent()->getParent()); } else { @@ -906,7 +906,7 @@ ModRefInfo GlobalsAAResult::getModRefInfoForArgument(const CallBase *Call, // Iterate through all the arguments to the called function. If any argument // is based on GV, return the conservative result. - for (auto &A : Call->args()) { + for (const auto &A : Call->args()) { SmallVector<const Value*, 4> Objects; getUnderlyingObjects(A, Objects); diff --git a/llvm/lib/Analysis/IVDescriptors.cpp b/llvm/lib/Analysis/IVDescriptors.cpp index e4d706ab045c..a51e974003f6 100644 --- a/llvm/lib/Analysis/IVDescriptors.cpp +++ b/llvm/lib/Analysis/IVDescriptors.cpp @@ -63,20 +63,6 @@ bool RecurrenceDescriptor::isFloatingPointRecurrenceKind(RecurKind Kind) { return (Kind != RecurKind::None) && !isIntegerRecurrenceKind(Kind); } -bool RecurrenceDescriptor::isArithmeticRecurrenceKind(RecurKind Kind) { - switch (Kind) { - default: - break; - case RecurKind::Add: - case RecurKind::Mul: - case RecurKind::FAdd: - case RecurKind::FMul: - case RecurKind::FMulAdd: - return true; - } - return false; -} - /// Determines if Phi may have been type-promoted. If Phi has a single user /// that ANDs the Phi with a type mask, return the user. RT is updated to /// account for the narrower bit width represented by the mask, and the AND @@ -1170,7 +1156,7 @@ RecurrenceDescriptor::getReductionOpChain(PHINode *Phi, Loop *L) const { ExpectedUses = 2; auto getNextInstruction = [&](Instruction *Cur) -> Instruction * { - for (auto User : Cur->users()) { + for (auto *User : Cur->users()) { Instruction *UI = cast<Instruction>(User); if (isa<PHINode>(UI)) continue; diff --git a/llvm/lib/Analysis/IVUsers.cpp b/llvm/lib/Analysis/IVUsers.cpp index 5bde947bd851..830211658353 100644 --- a/llvm/lib/Analysis/IVUsers.cpp +++ b/llvm/lib/Analysis/IVUsers.cpp @@ -274,7 +274,7 @@ void IVUsers::print(raw_ostream &OS, const Module *M) const { OS << " "; IVUse.getOperandValToReplace()->printAsOperand(OS, false); OS << " = " << *getReplacementExpr(IVUse); - for (auto PostIncLoop : IVUse.PostIncLoops) { + for (const auto *PostIncLoop : IVUse.PostIncLoops) { OS << " (post-inc with loop "; PostIncLoop->getHeader()->printAsOperand(OS, false); OS << ")"; diff --git a/llvm/lib/Analysis/InstructionPrecedenceTracking.cpp b/llvm/lib/Analysis/InstructionPrecedenceTracking.cpp index 9fee57c54b85..78e7f456ebc6 100644 --- a/llvm/lib/Analysis/InstructionPrecedenceTracking.cpp +++ b/llvm/lib/Analysis/InstructionPrecedenceTracking.cpp @@ -68,7 +68,7 @@ bool InstructionPrecedenceTracking::isPreceededBySpecialInstruction( void InstructionPrecedenceTracking::fill(const BasicBlock *BB) { FirstSpecialInsts.erase(BB); - for (auto &I : *BB) { + for (const auto &I : *BB) { NumInstScanned++; if (isSpecialInstruction(&I)) { FirstSpecialInsts[BB] = &I; @@ -101,7 +101,7 @@ void InstructionPrecedenceTracking::validate(const BasicBlock *BB) const { void InstructionPrecedenceTracking::validateAll() const { // Check that for every known block the cached value is correct. - for (auto &It : FirstSpecialInsts) + for (const auto &It : FirstSpecialInsts) validate(It.first); } #endif diff --git a/llvm/lib/Analysis/LazyValueInfo.cpp b/llvm/lib/Analysis/LazyValueInfo.cpp index d49b20798c82..2fae260e0d8f 100644 --- a/llvm/lib/Analysis/LazyValueInfo.cpp +++ b/llvm/lib/Analysis/LazyValueInfo.cpp @@ -1896,7 +1896,7 @@ void LazyValueInfoAnnotatedWriter::emitBasicBlockStartAnnot( const BasicBlock *BB, formatted_raw_ostream &OS) { // Find if there are latticevalues defined for arguments of the function. auto *F = BB->getParent(); - for (auto &Arg : F->args()) { + for (const auto &Arg : F->args()) { ValueLatticeElement Result = LVIImpl->getValueInBlock( const_cast<Argument *>(&Arg), const_cast<BasicBlock *>(BB)); if (Result.isUnknown()) @@ -1932,12 +1932,12 @@ void LazyValueInfoAnnotatedWriter::emitInstructionAnnot( printResult(ParentBB); // Print the LVI analysis results for the immediate successor blocks, that // are dominated by `ParentBB`. - for (auto *BBSucc : successors(ParentBB)) + for (const auto *BBSucc : successors(ParentBB)) if (DT.dominates(ParentBB, BBSucc)) printResult(BBSucc); // Print LVI in blocks where `I` is used. - for (auto *U : I->users()) + for (const auto *U : I->users()) if (auto *UseI = dyn_cast<Instruction>(U)) if (!isa<PHINode>(UseI) || DT.dominates(ParentBB, UseI->getParent())) printResult(UseI->getParent()); diff --git a/llvm/lib/Analysis/LegacyDivergenceAnalysis.cpp b/llvm/lib/Analysis/LegacyDivergenceAnalysis.cpp index 491d44335f22..381d62a37662 100644 --- a/llvm/lib/Analysis/LegacyDivergenceAnalysis.cpp +++ b/llvm/lib/Analysis/LegacyDivergenceAnalysis.cpp @@ -393,14 +393,14 @@ void LegacyDivergenceAnalysis::print(raw_ostream &OS, const Module *) const { return; // Dumps all divergent values in F, arguments and then instructions. - for (auto &Arg : F->args()) { + for (const auto &Arg : F->args()) { OS << (isDivergent(&Arg) ? "DIVERGENT: " : " "); OS << Arg << "\n"; } // Iterate instructions using instructions() to ensure a deterministic order. for (const BasicBlock &BB : *F) { OS << "\n " << BB.getName() << ":\n"; - for (auto &I : BB.instructionsWithoutDebug()) { + for (const auto &I : BB.instructionsWithoutDebug()) { OS << (isDivergent(&I) ? "DIVERGENT: " : " "); OS << I << "\n"; } diff --git a/llvm/lib/Analysis/Lint.cpp b/llvm/lib/Analysis/Lint.cpp index 9cfb91a22b7d..8b0f2a8ed99b 100644 --- a/llvm/lib/Analysis/Lint.cpp +++ b/llvm/lib/Analysis/Lint.cpp @@ -229,7 +229,7 @@ void Lint::visitCallBase(CallBase &I) { if (Formal->hasNoAliasAttr() && Actual->getType()->isPointerTy()) { AttributeList PAL = I.getAttributes(); unsigned ArgNo = 0; - for (auto BI = I.arg_begin(); BI != AE; ++BI, ++ArgNo) { + for (auto *BI = I.arg_begin(); BI != AE; ++BI, ++ArgNo) { // Skip ByVal arguments since they will be memcpy'd to the callee's // stack so we're not really passing the pointer anyway. if (PAL.hasParamAttr(ArgNo, Attribute::ByVal)) diff --git a/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/llvm/lib/Analysis/LoopAccessAnalysis.cpp index 79161db9b5e4..bed684b7652a 100644 --- a/llvm/lib/Analysis/LoopAccessAnalysis.cpp +++ b/llvm/lib/Analysis/LoopAccessAnalysis.cpp @@ -130,6 +130,11 @@ static cl::opt<bool> EnableForwardingConflictDetection( cl::desc("Enable conflict detection in loop-access analysis"), cl::init(true)); +static cl::opt<unsigned> MaxForkedSCEVDepth( + "max-forked-scev-depth", cl::Hidden, + cl::desc("Maximum recursion depth when finding forked SCEVs (default = 5)"), + cl::init(5)); + bool VectorizerParams::isInterleaveForced() { return ::VectorizationInterleave.getNumOccurrences() > 0; } @@ -288,8 +293,10 @@ void RuntimePointerChecking::tryToCreateDiffCheck( DC.getInstructionsForAccess(Sink->PointerValue, Sink->IsWritePtr); Type *SrcTy = getLoadStoreType(SrcInsts[0]); Type *DstTy = getLoadStoreType(SinkInsts[0]); - if (isa<ScalableVectorType>(SrcTy) || isa<ScalableVectorType>(DstTy)) + if (isa<ScalableVectorType>(SrcTy) || isa<ScalableVectorType>(DstTy)) { + CanUseDiffCheck = false; return; + } unsigned AllocSize = std::max(DL.getTypeAllocSize(SrcTy), DL.getTypeAllocSize(DstTy)); IntegerType *IntTy = @@ -778,6 +785,140 @@ static void visitPointers(Value *StartPtr, const Loop &InnermostLoop, } } +// Walk back through the IR for a pointer, looking for a select like the +// following: +// +// %offset = select i1 %cmp, i64 %a, i64 %b +// %addr = getelementptr double, double* %base, i64 %offset +// %ld = load double, double* %addr, align 8 +// +// We won't be able to form a single SCEVAddRecExpr from this since the +// address for each loop iteration depends on %cmp. We could potentially +// produce multiple valid SCEVAddRecExprs, though, and check all of them for +// memory safety/aliasing if needed. +// +// If we encounter some IR we don't yet handle, or something obviously fine +// like a constant, then we just add the SCEV for that term to the list passed +// in by the caller. If we have a node that may potentially yield a valid +// SCEVAddRecExpr then we decompose it into parts and build the SCEV terms +// ourselves before adding to the list. +static void +findForkedSCEVs(ScalarEvolution *SE, const Loop *L, Value *Ptr, + SmallVectorImpl<std::pair<const SCEV *, bool>> &ScevList, + unsigned Depth) { + // If our Value is a SCEVAddRecExpr, loop invariant, not an instruction, or + // we've exceeded our limit on recursion, just return whatever we have + // regardless of whether it can be used for a forked pointer or not, along + // with an indication of whether it might be a poison or undef value. + const SCEV *Scev = SE->getSCEV(Ptr); + if (isa<SCEVAddRecExpr>(Scev) || L->isLoopInvariant(Ptr) || + !isa<Instruction>(Ptr) || Depth == 0) { + ScevList.push_back( + std::make_pair(Scev, !isGuaranteedNotToBeUndefOrPoison(Ptr))); + return; + } + + Depth--; + + auto UndefPoisonCheck = [](std::pair<const SCEV *, bool> S) -> bool { + return S.second; + }; + + Instruction *I = cast<Instruction>(Ptr); + unsigned Opcode = I->getOpcode(); + switch (Opcode) { + case Instruction::GetElementPtr: { + GetElementPtrInst *GEP = cast<GetElementPtrInst>(I); + Type *SourceTy = GEP->getSourceElementType(); + // We only handle base + single offset GEPs here for now. + // Not dealing with preexisting gathers yet, so no vectors. + if (I->getNumOperands() != 2 || SourceTy->isVectorTy()) { + ScevList.push_back( + std::make_pair(Scev, !isGuaranteedNotToBeUndefOrPoison(GEP))); + break; + } + SmallVector<std::pair<const SCEV *, bool>, 2> BaseScevs; + SmallVector<std::pair<const SCEV *, bool>, 2> OffsetScevs; + findForkedSCEVs(SE, L, I->getOperand(0), BaseScevs, Depth); + findForkedSCEVs(SE, L, I->getOperand(1), OffsetScevs, Depth); + + // See if we need to freeze our fork... + bool NeedsFreeze = any_of(BaseScevs, UndefPoisonCheck) || + any_of(OffsetScevs, UndefPoisonCheck); + + // Check that we only have a single fork, on either the base or the offset. + // Copy the SCEV across for the one without a fork in order to generate + // the full SCEV for both sides of the GEP. + if (OffsetScevs.size() == 2 && BaseScevs.size() == 1) + BaseScevs.push_back(BaseScevs[0]); + else if (BaseScevs.size() == 2 && OffsetScevs.size() == 1) + OffsetScevs.push_back(OffsetScevs[0]); + else { + ScevList.push_back(std::make_pair(Scev, NeedsFreeze)); + break; + } + + // Find the pointer type we need to extend to. + Type *IntPtrTy = SE->getEffectiveSCEVType( + SE->getSCEV(GEP->getPointerOperand())->getType()); + + // Find the size of the type being pointed to. We only have a single + // index term (guarded above) so we don't need to index into arrays or + // structures, just get the size of the scalar value. + const SCEV *Size = SE->getSizeOfExpr(IntPtrTy, SourceTy); + + // Scale up the offsets by the size of the type, then add to the bases. + const SCEV *Scaled1 = SE->getMulExpr( + Size, SE->getTruncateOrSignExtend(OffsetScevs[0].first, IntPtrTy)); + const SCEV *Scaled2 = SE->getMulExpr( + Size, SE->getTruncateOrSignExtend(OffsetScevs[1].first, IntPtrTy)); + ScevList.push_back(std::make_pair( + SE->getAddExpr(BaseScevs[0].first, Scaled1), NeedsFreeze)); + ScevList.push_back(std::make_pair( + SE->getAddExpr(BaseScevs[1].first, Scaled2), NeedsFreeze)); + break; + } + case Instruction::Select: { + SmallVector<std::pair<const SCEV *, bool>, 2> ChildScevs; + // A select means we've found a forked pointer, but we currently only + // support a single select per pointer so if there's another behind this + // then we just bail out and return the generic SCEV. + findForkedSCEVs(SE, L, I->getOperand(1), ChildScevs, Depth); + findForkedSCEVs(SE, L, I->getOperand(2), ChildScevs, Depth); + if (ChildScevs.size() == 2) { + ScevList.push_back(ChildScevs[0]); + ScevList.push_back(ChildScevs[1]); + } else + ScevList.push_back( + std::make_pair(Scev, !isGuaranteedNotToBeUndefOrPoison(Ptr))); + break; + } + default: + // Just return the current SCEV if we haven't handled the instruction yet. + LLVM_DEBUG(dbgs() << "ForkedPtr unhandled instruction: " << *I << "\n"); + ScevList.push_back( + std::make_pair(Scev, !isGuaranteedNotToBeUndefOrPoison(Ptr))); + break; + } +} + +static SmallVector<std::pair<const SCEV *, bool>> +findForkedPointer(PredicatedScalarEvolution &PSE, + const ValueToValueMap &StridesMap, Value *Ptr, + const Loop *L) { + ScalarEvolution *SE = PSE.getSE(); + assert(SE->isSCEVable(Ptr->getType()) && "Value is not SCEVable!"); + SmallVector<std::pair<const SCEV *, bool>> Scevs; + findForkedSCEVs(SE, L, Ptr, Scevs, MaxForkedSCEVDepth); + + // For now, we will only accept a forked pointer with two possible SCEVs. + if (Scevs.size() == 2) + return Scevs; + + return { + std::make_pair(replaceSymbolicStrideSCEV(PSE, StridesMap, Ptr), false)}; +} + bool AccessAnalysis::createCheckForAccess(RuntimePointerChecking &RtCheck, MemAccessInfo Access, Type *AccessTy, const ValueToValueMap &StridesMap, @@ -787,19 +928,8 @@ bool AccessAnalysis::createCheckForAccess(RuntimePointerChecking &RtCheck, bool Assume) { Value *Ptr = Access.getPointer(); - ScalarEvolution &SE = *PSE.getSE(); - SmallVector<std::pair<const SCEV *, bool>> TranslatedPtrs; - auto *SI = dyn_cast<SelectInst>(Ptr); - // Look through selects in the current loop. - if (SI && !TheLoop->isLoopInvariant(SI)) { - TranslatedPtrs = { - std::make_pair(SE.getSCEV(SI->getOperand(1)), - !isGuaranteedNotToBeUndefOrPoison(SI->getOperand(1))), - std::make_pair(SE.getSCEV(SI->getOperand(2)), - !isGuaranteedNotToBeUndefOrPoison(SI->getOperand(2)))}; - } else - TranslatedPtrs = { - std::make_pair(replaceSymbolicStrideSCEV(PSE, StridesMap, Ptr), false)}; + SmallVector<std::pair<const SCEV *, bool>> TranslatedPtrs = + findForkedPointer(PSE, StridesMap, Ptr, TheLoop); for (auto &P : TranslatedPtrs) { const SCEV *PtrExpr = P.first; @@ -879,7 +1009,7 @@ bool AccessAnalysis::canCheckPtrAtRT(RuntimePointerChecking &RtCheck, unsigned RunningDepId = 1; DenseMap<Value *, unsigned> DepSetId; - SmallVector<MemAccessInfo, 4> Retries; + SmallVector<std::pair<MemAccessInfo, Type *>, 4> Retries; // First, count how many write and read accesses are in the alias set. Also // collect MemAccessInfos for later. @@ -911,13 +1041,13 @@ bool AccessAnalysis::canCheckPtrAtRT(RuntimePointerChecking &RtCheck, } for (auto &Access : AccessInfos) { - for (auto &AccessTy : Accesses[Access]) { + for (const auto &AccessTy : Accesses[Access]) { if (!createCheckForAccess(RtCheck, Access, AccessTy, StridesMap, DepSetId, TheLoop, RunningDepId, ASId, ShouldCheckWrap, false)) { LLVM_DEBUG(dbgs() << "LAA: Can't find bounds for ptr:" << *Access.getPointer() << '\n'); - Retries.push_back(Access); + Retries.push_back({Access, AccessTy}); CanDoAliasSetRT = false; } } @@ -941,15 +1071,15 @@ bool AccessAnalysis::canCheckPtrAtRT(RuntimePointerChecking &RtCheck, // We know that we need these checks, so we can now be more aggressive // and add further checks if required (overflow checks). CanDoAliasSetRT = true; - for (auto Access : Retries) { - for (auto &AccessTy : Accesses[Access]) { - if (!createCheckForAccess(RtCheck, Access, AccessTy, StridesMap, - DepSetId, TheLoop, RunningDepId, ASId, - ShouldCheckWrap, /*Assume=*/true)) { - CanDoAliasSetRT = false; - UncomputablePtr = Access.getPointer(); - break; - } + for (auto Retry : Retries) { + MemAccessInfo Access = Retry.first; + Type *AccessTy = Retry.second; + if (!createCheckForAccess(RtCheck, Access, AccessTy, StridesMap, + DepSetId, TheLoop, RunningDepId, ASId, + ShouldCheckWrap, /*Assume=*/true)) { + CanDoAliasSetRT = false; + UncomputablePtr = Access.getPointer(); + break; } } } @@ -2461,7 +2591,7 @@ void LoopAccessInfo::print(raw_ostream &OS, unsigned Depth) const { if (auto *Dependences = DepChecker->getDependences()) { OS.indent(Depth) << "Dependences:\n"; - for (auto &Dep : *Dependences) { + for (const auto &Dep : *Dependences) { Dep.print(OS, Depth + 2, DepChecker->getMemoryInstructions()); OS << "\n"; } diff --git a/llvm/lib/Analysis/LoopCacheAnalysis.cpp b/llvm/lib/Analysis/LoopCacheAnalysis.cpp index 2cbf1f7f2d28..85f2dad86711 100644 --- a/llvm/lib/Analysis/LoopCacheAnalysis.cpp +++ b/llvm/lib/Analysis/LoopCacheAnalysis.cpp @@ -289,18 +289,14 @@ CacheCostTy IndexedReference::computeRefCost(const Loop &L, LLVM_DEBUG(dbgs() << "TripCount=" << *TripCount << "\n"); const SCEV *RefCost = nullptr; - if (isConsecutive(L, CLS)) { + const SCEV *Stride = nullptr; + if (isConsecutive(L, Stride, CLS)) { // If the indexed reference is 'consecutive' the cost is // (TripCount*Stride)/CLS. - const SCEV *Coeff = getLastCoefficient(); - const SCEV *ElemSize = Sizes.back(); - assert(Coeff->getType() == ElemSize->getType() && - "Expecting the same type"); - const SCEV *Stride = SE.getMulExpr(Coeff, ElemSize); + assert(Stride != nullptr && + "Stride should not be null for consecutive access!"); Type *WiderType = SE.getWiderType(Stride->getType(), TripCount->getType()); const SCEV *CacheLineSize = SE.getConstant(WiderType, CLS); - if (SE.isKnownNegative(Stride)) - Stride = SE.getNegativeSCEV(Stride); Stride = SE.getNoopOrAnyExtend(Stride, WiderType); TripCount = SE.getNoopOrAnyExtend(TripCount, WiderType); const SCEV *Numerator = SE.getMulExpr(Stride, TripCount); @@ -464,7 +460,8 @@ bool IndexedReference::isLoopInvariant(const Loop &L) const { return allCoeffForLoopAreZero; } -bool IndexedReference::isConsecutive(const Loop &L, unsigned CLS) const { +bool IndexedReference::isConsecutive(const Loop &L, const SCEV *&Stride, + unsigned CLS) const { // The indexed reference is 'consecutive' if the only coefficient that uses // the loop induction variable is the last one... const SCEV *LastSubscript = Subscripts.back(); @@ -478,7 +475,19 @@ bool IndexedReference::isConsecutive(const Loop &L, unsigned CLS) const { // ...and the access stride is less than the cache line size. const SCEV *Coeff = getLastCoefficient(); const SCEV *ElemSize = Sizes.back(); - const SCEV *Stride = SE.getMulExpr(Coeff, ElemSize); + Type *WiderType = SE.getWiderType(Coeff->getType(), ElemSize->getType()); + // FIXME: This assumes that all values are signed integers which may + // be incorrect in unusual codes and incorrectly use sext instead of zext. + // for (uint32_t i = 0; i < 512; ++i) { + // uint8_t trunc = i; + // A[trunc] = 42; + // } + // This consecutively iterates twice over A. If `trunc` is sign-extended, + // we would conclude that this may iterate backwards over the array. + // However, LoopCacheAnalysis is heuristic anyway and transformations must + // not result in wrong optimizations if the heuristic was incorrect. + Stride = SE.getMulExpr(SE.getNoopOrSignExtend(Coeff, WiderType), + SE.getNoopOrSignExtend(ElemSize, WiderType)); const SCEV *CacheLineSize = SE.getConstant(Stride->getType(), CLS); Stride = SE.isKnownNegative(Stride) ? SE.getNegativeSCEV(Stride) : Stride; diff --git a/llvm/lib/Analysis/LoopInfo.cpp b/llvm/lib/Analysis/LoopInfo.cpp index 29c2437ff5ea..693b9ebd450a 100644 --- a/llvm/lib/Analysis/LoopInfo.cpp +++ b/llvm/lib/Analysis/LoopInfo.cpp @@ -425,12 +425,12 @@ bool Loop::isCanonical(ScalarEvolution &SE) const { // Check that 'BB' doesn't have any uses outside of the 'L' static bool isBlockInLCSSAForm(const Loop &L, const BasicBlock &BB, - const DominatorTree &DT) { + const DominatorTree &DT, bool IgnoreTokens) { for (const Instruction &I : BB) { // Tokens can't be used in PHI nodes and live-out tokens prevent loop // optimizations, so for the purposes of considered LCSSA form, we // can ignore them. - if (I.getType()->isTokenTy()) + if (IgnoreTokens && I.getType()->isTokenTy()) continue; for (const Use &U : I.uses()) { @@ -455,20 +455,20 @@ static bool isBlockInLCSSAForm(const Loop &L, const BasicBlock &BB, return true; } -bool Loop::isLCSSAForm(const DominatorTree &DT) const { +bool Loop::isLCSSAForm(const DominatorTree &DT, bool IgnoreTokens) const { // For each block we check that it doesn't have any uses outside of this loop. return all_of(this->blocks(), [&](const BasicBlock *BB) { - return isBlockInLCSSAForm(*this, *BB, DT); + return isBlockInLCSSAForm(*this, *BB, DT, IgnoreTokens); }); } -bool Loop::isRecursivelyLCSSAForm(const DominatorTree &DT, - const LoopInfo &LI) const { +bool Loop::isRecursivelyLCSSAForm(const DominatorTree &DT, const LoopInfo &LI, + bool IgnoreTokens) const { // For each block we check that it doesn't have any uses outside of its // innermost loop. This process will transitively guarantee that the current // loop and all of the nested loops are in LCSSA form. return all_of(this->blocks(), [&](const BasicBlock *BB) { - return isBlockInLCSSAForm(*LI.getLoopFor(BB), *BB, DT); + return isBlockInLCSSAForm(*LI.getLoopFor(BB), *BB, DT, IgnoreTokens); }); } @@ -482,11 +482,8 @@ bool Loop::isLoopSimplifyForm() const { bool Loop::isSafeToClone() const { // Return false if any loop blocks contain indirectbrs, or there are any calls // to noduplicate functions. - // FIXME: it should be ok to clone CallBrInst's if we correctly update the - // operand list to reflect the newly cloned labels. for (BasicBlock *BB : this->blocks()) { - if (isa<IndirectBrInst>(BB->getTerminator()) || - isa<CallBrInst>(BB->getTerminator())) + if (isa<IndirectBrInst>(BB->getTerminator())) return false; for (Instruction &I : *BB) diff --git a/llvm/lib/Analysis/MemoryBuiltins.cpp b/llvm/lib/Analysis/MemoryBuiltins.cpp index f5b121c98ec4..31e4380e4379 100644 --- a/llvm/lib/Analysis/MemoryBuiltins.cpp +++ b/llvm/lib/Analysis/MemoryBuiltins.cpp @@ -179,6 +179,11 @@ static const Function *getCalledFunction(const Value *V, static Optional<AllocFnsTy> getAllocationDataForFunction(const Function *Callee, AllocType AllocTy, const TargetLibraryInfo *TLI) { + // Don't perform a slow TLI lookup, if this function doesn't return a pointer + // and thus can't be an allocation function. + if (!Callee->getReturnType()->isPointerTy()) + return None; + // Make sure that the function is available. LibFunc TLIFn; if (!TLI || !TLI->getLibFunc(*Callee, TLIFn) || !TLI->has(TLIFn)) @@ -266,15 +271,42 @@ static Optional<AllocFnsTy> getAllocationSize(const Value *V, return Result; } +static AllocFnKind getAllocFnKind(const Value *V) { + if (const auto *CB = dyn_cast<CallBase>(V)) { + Attribute Attr = CB->getFnAttr(Attribute::AllocKind); + if (Attr.isValid()) + return AllocFnKind(Attr.getValueAsInt()); + } + return AllocFnKind::Unknown; +} + +static AllocFnKind getAllocFnKind(const Function *F) { + Attribute Attr = F->getFnAttribute(Attribute::AllocKind); + if (Attr.isValid()) + return AllocFnKind(Attr.getValueAsInt()); + return AllocFnKind::Unknown; +} + +static bool checkFnAllocKind(const Value *V, AllocFnKind Wanted) { + return (getAllocFnKind(V) & Wanted) != AllocFnKind::Unknown; +} + +static bool checkFnAllocKind(const Function *F, AllocFnKind Wanted) { + return (getAllocFnKind(F) & Wanted) != AllocFnKind::Unknown; +} + /// Tests if a value is a call or invoke to a library function that /// allocates or reallocates memory (either malloc, calloc, realloc, or strdup /// like). bool llvm::isAllocationFn(const Value *V, const TargetLibraryInfo *TLI) { - return getAllocationData(V, AnyAlloc, TLI).has_value(); + return getAllocationData(V, AnyAlloc, TLI).has_value() || + checkFnAllocKind(V, AllocFnKind::Alloc | AllocFnKind::Realloc); } bool llvm::isAllocationFn( - const Value *V, function_ref<const TargetLibraryInfo &(Function &)> GetTLI) { - return getAllocationData(V, AnyAlloc, GetTLI).has_value(); + const Value *V, + function_ref<const TargetLibraryInfo &(Function &)> GetTLI) { + return getAllocationData(V, AnyAlloc, GetTLI).has_value() || + checkFnAllocKind(V, AllocFnKind::Alloc | AllocFnKind::Realloc); } /// Tests if a value is a call or invoke to a library function that @@ -304,30 +336,36 @@ bool llvm::isMallocOrCallocLikeFn(const Value *V, const TargetLibraryInfo *TLI) /// Tests if a value is a call or invoke to a library function that /// allocates memory (either malloc, calloc, or strdup like). bool llvm::isAllocLikeFn(const Value *V, const TargetLibraryInfo *TLI) { - return getAllocationData(V, AllocLike, TLI).has_value(); -} - -/// Tests if a value is a call or invoke to a library function that -/// reallocates memory (e.g., realloc). -bool llvm::isReallocLikeFn(const Value *V, const TargetLibraryInfo *TLI) { - return getAllocationData(V, ReallocLike, TLI).has_value(); + return getAllocationData(V, AllocLike, TLI).has_value() || + checkFnAllocKind(V, AllocFnKind::Alloc); } /// Tests if a functions is a call or invoke to a library function that /// reallocates memory (e.g., realloc). bool llvm::isReallocLikeFn(const Function *F, const TargetLibraryInfo *TLI) { - return getAllocationDataForFunction(F, ReallocLike, TLI).has_value(); + return getAllocationDataForFunction(F, ReallocLike, TLI).has_value() || + checkFnAllocKind(F, AllocFnKind::Realloc); } -bool llvm::isAllocRemovable(const CallBase *CB, const TargetLibraryInfo *TLI) { - assert(isAllocationFn(CB, TLI)); +Value *llvm::getReallocatedOperand(const CallBase *CB, + const TargetLibraryInfo *TLI) { + if (getAllocationData(CB, ReallocLike, TLI).has_value()) { + // All currently supported realloc functions reallocate the first argument. + return CB->getArgOperand(0); + } + if (checkFnAllocKind(CB, AllocFnKind::Realloc)) + return CB->getArgOperandWithAttribute(Attribute::AllocatedPointer); + return nullptr; +} +bool llvm::isRemovableAlloc(const CallBase *CB, const TargetLibraryInfo *TLI) { // Note: Removability is highly dependent on the source language. For // example, recent C++ requires direct calls to the global allocation // [basic.stc.dynamic.allocation] to be observable unless part of a new // expression [expr.new paragraph 13]. - // Historically we've treated the C family allocation routines as removable + // Historically we've treated the C family allocation routines and operator + // new as removable return isAllocLikeFn(CB, TLI); } @@ -357,9 +395,8 @@ static bool CheckedZextOrTrunc(APInt &I, unsigned IntTyBits) { } Optional<APInt> -llvm::getAllocSize(const CallBase *CB, - const TargetLibraryInfo *TLI, - std::function<const Value*(const Value*)> Mapper) { +llvm::getAllocSize(const CallBase *CB, const TargetLibraryInfo *TLI, + function_ref<const Value *(const Value *)> Mapper) { // Note: This handles both explicitly listed allocation functions and // allocsize. The code structure could stand to be cleaned up a bit. Optional<AllocFnsTy> FnData = getAllocationSize(CB, TLI); @@ -434,6 +471,12 @@ Constant *llvm::getInitialValueOfAllocation(const Value *V, if (isCallocLikeFn(Alloc, TLI)) return Constant::getNullValue(Ty); + AllocFnKind AK = getAllocFnKind(Alloc); + if ((AK & AllocFnKind::Uninitialized) != AllocFnKind::Unknown) + return UndefValue::get(Ty); + if ((AK & AllocFnKind::Zeroed) != AllocFnKind::Unknown) + return Constant::getNullValue(Ty); + return nullptr; } @@ -497,14 +540,23 @@ Optional<StringRef> llvm::getAllocationFamily(const Value *I, if (Callee == nullptr || IsNoBuiltin) return None; LibFunc TLIFn; - if (!TLI || !TLI->getLibFunc(*Callee, TLIFn) || !TLI->has(TLIFn)) - return None; - const auto AllocData = getAllocationDataForFunction(Callee, AnyAlloc, TLI); - if (AllocData) - return mangledNameForMallocFamily(AllocData.value().Family); - const auto FreeData = getFreeFunctionDataForFunction(Callee, TLIFn); - if (FreeData) - return mangledNameForMallocFamily(FreeData.value().Family); + + if (TLI && TLI->getLibFunc(*Callee, TLIFn) && TLI->has(TLIFn)) { + // Callee is some known library function. + const auto AllocData = getAllocationDataForFunction(Callee, AnyAlloc, TLI); + if (AllocData) + return mangledNameForMallocFamily(AllocData.value().Family); + const auto FreeData = getFreeFunctionDataForFunction(Callee, TLIFn); + if (FreeData) + return mangledNameForMallocFamily(FreeData.value().Family); + } + // Callee isn't a known library function, still check attributes. + if (checkFnAllocKind(I, AllocFnKind::Free | AllocFnKind::Alloc | + AllocFnKind::Realloc)) { + Attribute Attr = cast<CallBase>(I)->getFnAttr("alloc-family"); + if (Attr.isValid()) + return Attr.getValueAsString(); + } return None; } @@ -512,7 +564,7 @@ Optional<StringRef> llvm::getAllocationFamily(const Value *I, bool llvm::isLibFreeFunction(const Function *F, const LibFunc TLIFn) { Optional<FreeFnsTy> FnData = getFreeFunctionDataForFunction(F, TLIFn); if (!FnData) - return false; + return checkFnAllocKind(F, AllocFnKind::Free); // Check free prototype. // FIXME: workaround for PR5130, this will be obsolete when a nobuiltin @@ -528,20 +580,24 @@ bool llvm::isLibFreeFunction(const Function *F, const LibFunc TLIFn) { return true; } -/// isFreeCall - Returns non-null if the value is a call to the builtin free() -const CallInst *llvm::isFreeCall(const Value *I, const TargetLibraryInfo *TLI) { +Value *llvm::getFreedOperand(const CallBase *CB, const TargetLibraryInfo *TLI) { bool IsNoBuiltinCall; - const Function *Callee = getCalledFunction(I, IsNoBuiltinCall); + const Function *Callee = getCalledFunction(CB, IsNoBuiltinCall); if (Callee == nullptr || IsNoBuiltinCall) return nullptr; LibFunc TLIFn; - if (!TLI || !TLI->getLibFunc(*Callee, TLIFn) || !TLI->has(TLIFn)) - return nullptr; + if (TLI && TLI->getLibFunc(*Callee, TLIFn) && TLI->has(TLIFn) && + isLibFreeFunction(Callee, TLIFn)) { + // All currently supported free functions free the first argument. + return CB->getArgOperand(0); + } - return isLibFreeFunction(Callee, TLIFn) ? dyn_cast<CallInst>(I) : nullptr; -} + if (checkFnAllocKind(CB, AllocFnKind::Free)) + return CB->getArgOperandWithAttribute(Attribute::AllocatedPointer); + return nullptr; +} //===----------------------------------------------------------------------===// // Utility functions to compute size of objects. @@ -765,8 +821,7 @@ SizeOffsetType ObjectSizeOffsetVisitor::visitArgument(Argument &A) { } SizeOffsetType ObjectSizeOffsetVisitor::visitCallBase(CallBase &CB) { - auto Mapper = [](const Value *V) { return V; }; - if (Optional<APInt> Size = getAllocSize(&CB, TLI, Mapper)) + if (Optional<APInt> Size = getAllocSize(&CB, TLI)) return std::make_pair(*Size, Zero); return unknown(); } diff --git a/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp b/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp index 690d575ef979..fce9d5b24faf 100644 --- a/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp +++ b/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp @@ -139,10 +139,12 @@ static ModRefInfo GetLocation(const Instruction *Inst, MemoryLocation &Loc, return ModRefInfo::ModRef; } - if (const CallInst *CI = isFreeCall(Inst, &TLI)) { - // calls to free() deallocate the entire structure - Loc = MemoryLocation::getAfter(CI->getArgOperand(0)); - return ModRefInfo::Mod; + if (const CallBase *CB = dyn_cast<CallBase>(Inst)) { + if (Value *FreedOp = getFreedOperand(CB, &TLI)) { + // calls to free() deallocate the entire structure + Loc = MemoryLocation::getAfter(FreedOp); + return ModRefInfo::Mod; + } } if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) { diff --git a/llvm/lib/Analysis/MemoryProfileInfo.cpp b/llvm/lib/Analysis/MemoryProfileInfo.cpp new file mode 100644 index 000000000000..3d11cb81226e --- /dev/null +++ b/llvm/lib/Analysis/MemoryProfileInfo.cpp @@ -0,0 +1,226 @@ +//===-- MemoryProfileInfo.cpp - memory profile info ------------------------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains utilities to analyze memory profile information. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/MemoryProfileInfo.h" +#include "llvm/Support/CommandLine.h" + +using namespace llvm; +using namespace llvm::memprof; + +#define DEBUG_TYPE "memory-profile-info" + +// Upper bound on accesses per byte for marking an allocation cold. +cl::opt<float> MemProfAccessesPerByteColdThreshold( + "memprof-accesses-per-byte-cold-threshold", cl::init(10.0), cl::Hidden, + cl::desc("The threshold the accesses per byte must be under to consider " + "an allocation cold")); + +// Lower bound on lifetime to mark an allocation cold (in addition to accesses +// per byte above). This is to avoid pessimizing short lived objects. +cl::opt<unsigned> MemProfMinLifetimeColdThreshold( + "memprof-min-lifetime-cold-threshold", cl::init(200), cl::Hidden, + cl::desc("The minimum lifetime (s) for an allocation to be considered " + "cold")); + +AllocationType llvm::memprof::getAllocType(uint64_t MaxAccessCount, + uint64_t MinSize, + uint64_t MinLifetime) { + if (((float)MaxAccessCount) / MinSize < MemProfAccessesPerByteColdThreshold && + // MinLifetime is expected to be in ms, so convert the threshold to ms. + MinLifetime >= MemProfMinLifetimeColdThreshold * 1000) + return AllocationType::Cold; + return AllocationType::NotCold; +} + +MDNode *llvm::memprof::buildCallstackMetadata(ArrayRef<uint64_t> CallStack, + LLVMContext &Ctx) { + std::vector<Metadata *> StackVals; + for (auto Id : CallStack) { + auto *StackValMD = + ValueAsMetadata::get(ConstantInt::get(Type::getInt64Ty(Ctx), Id)); + StackVals.push_back(StackValMD); + } + return MDNode::get(Ctx, StackVals); +} + +MDNode *llvm::memprof::getMIBStackNode(const MDNode *MIB) { + assert(MIB->getNumOperands() == 2); + // The stack metadata is the first operand of each memprof MIB metadata. + return cast<MDNode>(MIB->getOperand(0)); +} + +AllocationType llvm::memprof::getMIBAllocType(const MDNode *MIB) { + assert(MIB->getNumOperands() == 2); + // The allocation type is currently the second operand of each memprof + // MIB metadata. This will need to change as we add additional allocation + // types that can be applied based on the allocation profile data. + auto *MDS = dyn_cast<MDString>(MIB->getOperand(1)); + assert(MDS); + if (MDS->getString().equals("cold")) + return AllocationType::Cold; + return AllocationType::NotCold; +} + +static std::string getAllocTypeAttributeString(AllocationType Type) { + switch (Type) { + case AllocationType::NotCold: + return "notcold"; + break; + case AllocationType::Cold: + return "cold"; + break; + default: + assert(false && "Unexpected alloc type"); + } + llvm_unreachable("invalid alloc type"); +} + +static void addAllocTypeAttribute(LLVMContext &Ctx, CallBase *CI, + AllocationType AllocType) { + auto AllocTypeString = getAllocTypeAttributeString(AllocType); + auto A = llvm::Attribute::get(Ctx, "memprof", AllocTypeString); + CI->addFnAttr(A); +} + +static bool hasSingleAllocType(uint8_t AllocTypes) { + const unsigned NumAllocTypes = countPopulation(AllocTypes); + assert(NumAllocTypes != 0); + return NumAllocTypes == 1; +} + +void CallStackTrie::addCallStack(AllocationType AllocType, + ArrayRef<uint64_t> StackIds) { + bool First = true; + CallStackTrieNode *Curr = nullptr; + for (auto StackId : StackIds) { + // If this is the first stack frame, add or update alloc node. + if (First) { + First = false; + if (Alloc) { + assert(AllocStackId == StackId); + Alloc->AllocTypes |= static_cast<uint8_t>(AllocType); + } else { + AllocStackId = StackId; + Alloc = new CallStackTrieNode(AllocType); + } + Curr = Alloc; + continue; + } + // Update existing caller node if it exists. + auto Next = Curr->Callers.find(StackId); + if (Next != Curr->Callers.end()) { + Curr = Next->second; + Curr->AllocTypes |= static_cast<uint8_t>(AllocType); + continue; + } + // Otherwise add a new caller node. + auto *New = new CallStackTrieNode(AllocType); + Curr->Callers[StackId] = New; + Curr = New; + } + assert(Curr); +} + +void CallStackTrie::addCallStack(MDNode *MIB) { + MDNode *StackMD = getMIBStackNode(MIB); + assert(StackMD); + std::vector<uint64_t> CallStack; + CallStack.reserve(StackMD->getNumOperands()); + for (auto &MIBStackIter : StackMD->operands()) { + auto *StackId = mdconst::dyn_extract<ConstantInt>(MIBStackIter); + assert(StackId); + CallStack.push_back(StackId->getZExtValue()); + } + addCallStack(getMIBAllocType(MIB), CallStack); +} + +static MDNode *createMIBNode(LLVMContext &Ctx, + std::vector<uint64_t> &MIBCallStack, + AllocationType AllocType) { + std::vector<Metadata *> MIBPayload( + {buildCallstackMetadata(MIBCallStack, Ctx)}); + MIBPayload.push_back( + MDString::get(Ctx, getAllocTypeAttributeString(AllocType))); + return MDNode::get(Ctx, MIBPayload); +} + +// Recursive helper to trim contexts and create metadata nodes. +// Caller should have pushed Node's loc to MIBCallStack. Doing this in the +// caller makes it simpler to handle the many early returns in this method. +bool CallStackTrie::buildMIBNodes(CallStackTrieNode *Node, LLVMContext &Ctx, + std::vector<uint64_t> &MIBCallStack, + std::vector<Metadata *> &MIBNodes, + bool CalleeHasAmbiguousCallerContext) { + // Trim context below the first node in a prefix with a single alloc type. + // Add an MIB record for the current call stack prefix. + if (hasSingleAllocType(Node->AllocTypes)) { + MIBNodes.push_back( + createMIBNode(Ctx, MIBCallStack, (AllocationType)Node->AllocTypes)); + return true; + } + + // We don't have a single allocation for all the contexts sharing this prefix, + // so recursively descend into callers in trie. + if (!Node->Callers.empty()) { + bool NodeHasAmbiguousCallerContext = Node->Callers.size() > 1; + bool AddedMIBNodesForAllCallerContexts = true; + for (auto &Caller : Node->Callers) { + MIBCallStack.push_back(Caller.first); + AddedMIBNodesForAllCallerContexts &= + buildMIBNodes(Caller.second, Ctx, MIBCallStack, MIBNodes, + NodeHasAmbiguousCallerContext); + // Remove Caller. + MIBCallStack.pop_back(); + } + if (AddedMIBNodesForAllCallerContexts) + return true; + // We expect that the callers should be forced to add MIBs to disambiguate + // the context in this case (see below). + assert(!NodeHasAmbiguousCallerContext); + } + + // If we reached here, then this node does not have a single allocation type, + // and we didn't add metadata for a longer call stack prefix including any of + // Node's callers. That means we never hit a single allocation type along all + // call stacks with this prefix. This can happen due to recursion collapsing + // or the stack being deeper than tracked by the profiler runtime, leading to + // contexts with different allocation types being merged. In that case, we + // trim the context just below the deepest context split, which is this + // node if the callee has an ambiguous caller context (multiple callers), + // since the recursive calls above returned false. Conservatively give it + // non-cold allocation type. + if (!CalleeHasAmbiguousCallerContext) + return false; + MIBNodes.push_back(createMIBNode(Ctx, MIBCallStack, AllocationType::NotCold)); + return true; +} + +// Build and attach the minimal necessary MIB metadata. If the alloc has a +// single allocation type, add a function attribute instead. Returns true if +// memprof metadata attached, false if not (attribute added). +bool CallStackTrie::buildAndAttachMIBMetadata(CallBase *CI) { + auto &Ctx = CI->getContext(); + if (hasSingleAllocType(Alloc->AllocTypes)) { + addAllocTypeAttribute(Ctx, CI, (AllocationType)Alloc->AllocTypes); + return false; + } + std::vector<uint64_t> MIBCallStack; + MIBCallStack.push_back(AllocStackId); + std::vector<Metadata *> MIBNodes; + assert(!Alloc->Callers.empty() && "addCallStack has not been called yet"); + buildMIBNodes(Alloc, Ctx, MIBCallStack, MIBNodes, + /*CalleeHasAmbiguousCallerContext=*/true); + assert(MIBCallStack.size() == 1 && + "Should only be left with Alloc's location in stack"); + CI->setMetadata(LLVMContext::MD_memprof, MDNode::get(Ctx, MIBNodes)); + return true; +} diff --git a/llvm/lib/Analysis/MemorySSAUpdater.cpp b/llvm/lib/Analysis/MemorySSAUpdater.cpp index eb75118210b9..9ad60f774e9f 100644 --- a/llvm/lib/Analysis/MemorySSAUpdater.cpp +++ b/llvm/lib/Analysis/MemorySSAUpdater.cpp @@ -451,7 +451,7 @@ void MemorySSAUpdater::insertDef(MemoryDef *MD, bool RenameUses) { } // Existing Phi blocks may need renaming too, if an access was previously // optimized and the inserted Defs "covers" the Optimized value. - for (auto &MP : ExistingPhis) { + for (const auto &MP : ExistingPhis) { MemoryPhi *Phi = dyn_cast_or_null<MemoryPhi>(MP); if (Phi) MSSA->renamePass(Phi->getBlock(), nullptr, Visited); @@ -462,7 +462,7 @@ void MemorySSAUpdater::insertDef(MemoryDef *MD, bool RenameUses) { void MemorySSAUpdater::fixupDefs(const SmallVectorImpl<WeakVH> &Vars) { SmallPtrSet<const BasicBlock *, 8> Seen; SmallVector<const BasicBlock *, 16> Worklist; - for (auto &Var : Vars) { + for (const auto &Var : Vars) { MemoryAccess *NewDef = dyn_cast_or_null<MemoryAccess>(Var); if (!NewDef) continue; @@ -744,10 +744,10 @@ void MemorySSAUpdater::updateForClonedLoop(const LoopBlocksRPO &LoopBlocks, cloneUsesAndDefs(BB, NewBlock, VMap, MPhiMap); }; - for (auto BB : llvm::concat<BasicBlock *const>(LoopBlocks, ExitBlocks)) + for (auto *BB : llvm::concat<BasicBlock *const>(LoopBlocks, ExitBlocks)) ProcessBlock(BB); - for (auto BB : llvm::concat<BasicBlock *const>(LoopBlocks, ExitBlocks)) + for (auto *BB : llvm::concat<BasicBlock *const>(LoopBlocks, ExitBlocks)) if (MemoryPhi *MPhi = MSSA->getMemoryAccess(BB)) if (MemoryAccess *NewPhi = MPhiMap.lookup(MPhi)) FixPhiIncomingValues(MPhi, cast<MemoryPhi>(NewPhi)); @@ -811,7 +811,7 @@ void MemorySSAUpdater::applyUpdates(ArrayRef<CFGUpdate> Updates, SmallVector<CFGUpdate, 4> DeleteUpdates; SmallVector<CFGUpdate, 4> RevDeleteUpdates; SmallVector<CFGUpdate, 4> InsertUpdates; - for (auto &Update : Updates) { + for (const auto &Update : Updates) { if (Update.getKind() == DT.Insert) InsertUpdates.push_back({DT.Insert, Update.getFrom(), Update.getTo()}); else { @@ -958,7 +958,7 @@ void MemorySSAUpdater::applyInsertUpdates(ArrayRef<CFGUpdate> Updates, }; SmallDenseMap<BasicBlock *, PredInfo> PredMap; - for (auto &Edge : Updates) { + for (const auto &Edge : Updates) { BasicBlock *BB = Edge.getTo(); auto &AddedBlockSet = PredMap[BB].Added; AddedBlockSet.insert(Edge.getFrom()); @@ -1003,7 +1003,7 @@ void MemorySSAUpdater::applyInsertUpdates(ArrayRef<CFGUpdate> Updates, // First create MemoryPhis in all blocks that don't have one. Create in the // order found in Updates, not in PredMap, to get deterministic numbering. - for (auto &Edge : Updates) { + for (const auto &Edge : Updates) { BasicBlock *BB = Edge.getTo(); if (PredMap.count(BB) && !MSSA->getMemoryAccess(BB)) InsertedPhis.push_back(MSSA->createMemoryPhi(BB)); @@ -1400,7 +1400,7 @@ void MemorySSAUpdater::removeBlocks( } void MemorySSAUpdater::tryRemoveTrivialPhis(ArrayRef<WeakVH> UpdatedPHIs) { - for (auto &VH : UpdatedPHIs) + for (const auto &VH : UpdatedPHIs) if (auto *MPhi = cast_or_null<MemoryPhi>(VH)) tryRemoveTrivialPhi(MPhi); } diff --git a/llvm/lib/Analysis/ModuleDebugInfoPrinter.cpp b/llvm/lib/Analysis/ModuleDebugInfoPrinter.cpp index 373aaa48b1d1..756f92e1aac4 100644 --- a/llvm/lib/Analysis/ModuleDebugInfoPrinter.cpp +++ b/llvm/lib/Analysis/ModuleDebugInfoPrinter.cpp @@ -95,7 +95,7 @@ static void printModuleDebugInfo(raw_ostream &O, const Module *M, O << '\n'; } - for (auto GVU : Finder.global_variables()) { + for (auto *GVU : Finder.global_variables()) { const auto *GV = GVU->getVariable(); O << "Global variable: " << GV->getName(); printFile(O, GV->getFilename(), GV->getDirectory(), GV->getLine()); diff --git a/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp b/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp index 2b98634ef7bf..c52b27a38fe9 100644 --- a/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp +++ b/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp @@ -400,7 +400,7 @@ static void computeFunctionSummary( // to enable importing for subsequent indirect call promotion and // inlining. if (auto *MD = I.getMetadata(LLVMContext::MD_callees)) { - for (auto &Op : MD->operands()) { + for (const auto &Op : MD->operands()) { Function *Callee = mdconst::extract_or_null<Function>(Op); if (Callee) CallGraphEdges[Index.getOrInsertValueInfo(Callee)]; @@ -412,7 +412,7 @@ static void computeFunctionSummary( auto CandidateProfileData = ICallAnalysis.getPromotionCandidatesForInstruction( &I, NumVals, TotalCount, NumCandidates); - for (auto &Candidate : CandidateProfileData) + for (const auto &Candidate : CandidateProfileData) CallGraphEdges[Index.getOrInsertValueInfo(Candidate.Value)] .updateHotness(getHotness(Candidate.Count, PSI)); } @@ -451,7 +451,7 @@ static void computeFunctionSummary( // If both load and store instruction reference the same variable // we won't be able to optimize it. Add all such reference edges // to RefEdges set. - for (auto &VI : StoreRefEdges) + for (const auto &VI : StoreRefEdges) if (LoadRefEdges.remove(VI)) RefEdges.insert(VI); @@ -459,11 +459,11 @@ static void computeFunctionSummary( // All new reference edges inserted in two loops below are either // read or write only. They will be grouped in the end of RefEdges // vector, so we can use a single integer value to identify them. - for (auto &VI : LoadRefEdges) + for (const auto &VI : LoadRefEdges) RefEdges.insert(VI); unsigned FirstWORef = RefEdges.size(); - for (auto &VI : StoreRefEdges) + for (const auto &VI : StoreRefEdges) RefEdges.insert(VI); Refs = RefEdges.takeVector(); @@ -646,15 +646,18 @@ static void computeVariableSummary(ModuleSummaryIndex &Index, Index.addGlobalValueSummary(V, std::move(GVarSummary)); } -static void -computeAliasSummary(ModuleSummaryIndex &Index, const GlobalAlias &A, - DenseSet<GlobalValue::GUID> &CantBePromoted) { +static void computeAliasSummary(ModuleSummaryIndex &Index, const GlobalAlias &A, + DenseSet<GlobalValue::GUID> &CantBePromoted) { + // Skip summary for indirect function aliases as summary for aliasee will not + // be emitted. + const GlobalObject *Aliasee = A.getAliaseeObject(); + if (isa<GlobalIFunc>(Aliasee)) + return; bool NonRenamableLocal = isNonRenamableLocal(A); GlobalValueSummary::GVFlags Flags( A.getLinkage(), A.getVisibility(), NonRenamableLocal, /* Live = */ false, A.isDSOLocal(), A.canBeOmittedFromSymbolTable()); auto AS = std::make_unique<AliasSummary>(Flags); - auto *Aliasee = A.getAliaseeObject(); auto AliaseeVI = Index.getValueInfo(Aliasee->getGUID()); assert(AliaseeVI && "Alias expects aliasee summary to be available"); assert(AliaseeVI.getSummaryList().size() == 1 && @@ -668,7 +671,7 @@ computeAliasSummary(ModuleSummaryIndex &Index, const GlobalAlias &A, // Set LiveRoot flag on entries matching the given value name. static void setLiveRoot(ModuleSummaryIndex &Index, StringRef Name) { if (ValueInfo VI = Index.getValueInfo(GlobalValue::getGUID(Name))) - for (auto &Summary : VI.getSummaryList()) + for (const auto &Summary : VI.getSummaryList()) Summary->setLive(true); } @@ -776,7 +779,7 @@ ModuleSummaryIndex llvm::buildModuleSummaryIndex( // Compute summaries for all functions defined in module, and save in the // index. - for (auto &F : M) { + for (const auto &F : M) { if (F.isDeclaration()) continue; @@ -811,6 +814,13 @@ ModuleSummaryIndex llvm::buildModuleSummaryIndex( for (const GlobalAlias &A : M.aliases()) computeAliasSummary(Index, A, CantBePromoted); + // Iterate through ifuncs, set their resolvers all alive. + for (const GlobalIFunc &I : M.ifuncs()) { + I.applyAlongResolverPath([&Index](const GlobalValue &GV) { + Index.getGlobalValueSummary(GV)->setLive(true); + }); + } + for (auto *V : LocalsUsed) { auto *Summary = Index.getGlobalValueSummary(*V); assert(Summary && "Missing summary for global value"); diff --git a/llvm/lib/Analysis/MustExecute.cpp b/llvm/lib/Analysis/MustExecute.cpp index ad8322d7bd79..ac6590c1d8a2 100644 --- a/llvm/lib/Analysis/MustExecute.cpp +++ b/llvm/lib/Analysis/MustExecute.cpp @@ -81,7 +81,7 @@ void ICFLoopSafetyInfo::computeLoopSafetyInfo(const Loop *CurLoop) { MW.clear(); MayThrow = false; // Figure out the fact that at least one block may throw. - for (auto &BB : CurLoop->blocks()) + for (const auto &BB : CurLoop->blocks()) if (ICF.hasICF(&*BB)) { MayThrow = true; break; @@ -164,7 +164,7 @@ static void collectTransitivePredecessors( if (BB == CurLoop->getHeader()) return; SmallVector<const BasicBlock *, 4> WorkList; - for (auto *Pred : predecessors(BB)) { + for (const auto *Pred : predecessors(BB)) { Predecessors.insert(Pred); WorkList.push_back(Pred); } @@ -180,7 +180,7 @@ static void collectTransitivePredecessors( // @nested and @nested_no_throw in test/Analysis/MustExecute/loop-header.ll. // We can ignore backedge of all loops containing BB to get a sligtly more // optimistic result. - for (auto *PredPred : predecessors(Pred)) + for (const auto *PredPred : predecessors(Pred)) if (Predecessors.insert(PredPred).second) WorkList.push_back(PredPred); } @@ -207,7 +207,7 @@ bool LoopSafetyInfo::allLoopPathsLeadToBlock(const Loop *CurLoop, // 3) Exit blocks which are not taken on 1st iteration. // Memoize blocks we've already checked. SmallPtrSet<const BasicBlock *, 4> CheckedSuccessors; - for (auto *Pred : Predecessors) { + for (const auto *Pred : Predecessors) { // Predecessor block may throw, so it has a side exit. if (blockMayThrow(Pred)) return false; @@ -217,7 +217,7 @@ bool LoopSafetyInfo::allLoopPathsLeadToBlock(const Loop *CurLoop, if (DT->dominates(BB, Pred)) continue; - for (auto *Succ : successors(Pred)) + for (const auto *Succ : successors(Pred)) if (CheckedSuccessors.insert(Succ).second && Succ != BB && !Predecessors.count(Succ)) // By discharging conditions that are not executed on the 1st iteration, @@ -285,7 +285,7 @@ bool ICFLoopSafetyInfo::doesNotWriteMemoryBefore(const BasicBlock *BB, collectTransitivePredecessors(CurLoop, BB, Predecessors); // Find if there any instruction in either predecessor that could write // to memory. - for (auto *Pred : Predecessors) + for (const auto *Pred : Predecessors) if (MW.mayWriteToMemory(Pred)) return false; return true; @@ -413,7 +413,7 @@ class MustExecuteAnnotatedWriter : public AssemblyAnnotationWriter { public: MustExecuteAnnotatedWriter(const Function &F, DominatorTree &DT, LoopInfo &LI) { - for (auto &I: instructions(F)) { + for (const auto &I: instructions(F)) { Loop *L = LI.getLoopFor(I.getParent()); while (L) { if (isMustExecuteIn(I, L, &DT)) { @@ -425,8 +425,8 @@ public: } MustExecuteAnnotatedWriter(const Module &M, DominatorTree &DT, LoopInfo &LI) { - for (auto &F : M) - for (auto &I: instructions(F)) { + for (const auto &F : M) + for (const auto &I: instructions(F)) { Loop *L = LI.getLoopFor(I.getParent()); while (L) { if (isMustExecuteIn(I, L, &DT)) { diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp index f61806bd1dad..d46248aa3889 100644 --- a/llvm/lib/Analysis/ScalarEvolution.cpp +++ b/llvm/lib/Analysis/ScalarEvolution.cpp @@ -1158,7 +1158,7 @@ const SCEV *ScalarEvolution::getLosslessPtrToIntExpr(const SCEV *Op, const SCEV *visitAddExpr(const SCEVAddExpr *Expr) { SmallVector<const SCEV *, 2> Operands; bool Changed = false; - for (auto *Op : Expr->operands()) { + for (const auto *Op : Expr->operands()) { Operands.push_back(visit(Op)); Changed |= Op != Operands.back(); } @@ -1168,7 +1168,7 @@ const SCEV *ScalarEvolution::getLosslessPtrToIntExpr(const SCEV *Op, const SCEV *visitMulExpr(const SCEVMulExpr *Expr) { SmallVector<const SCEV *, 2> Operands; bool Changed = false; - for (auto *Op : Expr->operands()) { + for (const auto *Op : Expr->operands()) { Operands.push_back(visit(Op)); Changed |= Op != Operands.back(); } @@ -4662,7 +4662,7 @@ ScalarEvolution::getUMinFromMismatchedTypes(SmallVectorImpl<const SCEV *> &Ops, // Find the max type first. Type *MaxType = nullptr; - for (auto *S : Ops) + for (const auto *S : Ops) if (MaxType) MaxType = getWiderType(MaxType, S->getType()); else @@ -4671,7 +4671,7 @@ ScalarEvolution::getUMinFromMismatchedTypes(SmallVectorImpl<const SCEV *> &Ops, // Extend all ops to max type. SmallVector<const SCEV *, 2> PromotedOps; - for (auto *S : Ops) + for (const auto *S : Ops) PromotedOps.push_back(getNoopOrZeroExtend(S, MaxType)); // Generate umin. @@ -6636,7 +6636,7 @@ ScalarEvolution::getRangeRef(const SCEV *S, // Make sure that we do not run over cycled Phis. if (PendingPhiRanges.insert(Phi).second) { ConstantRange RangeFromOps(BitWidth, /*isFullSet=*/false); - for (auto &Op : Phi->operands()) { + for (const auto &Op : Phi->operands()) { auto OpRange = getRangeRef(getSCEV(Op), SignHint); RangeFromOps = RangeFromOps.unionWith(OpRange); // No point to continue if we already have a full set. @@ -6651,6 +6651,13 @@ ScalarEvolution::getRangeRef(const SCEV *S, } } + // vscale can't be equal to zero + if (const auto *II = dyn_cast<IntrinsicInst>(U->getValue())) + if (II->getIntrinsicID() == Intrinsic::vscale) { + ConstantRange Disallowed = APInt::getZero(BitWidth); + ConservativeResult = ConservativeResult.difference(Disallowed); + } + return setRange(U, SignHint, std::move(ConservativeResult)); } @@ -6973,13 +6980,13 @@ static void collectUniqueOps(const SCEV *S, Ops.push_back(S); }; if (auto *S2 = dyn_cast<SCEVCastExpr>(S)) - for (auto *Op : S2->operands()) + for (const auto *Op : S2->operands()) InsertUnique(Op); else if (auto *S2 = dyn_cast<SCEVNAryExpr>(S)) - for (auto *Op : S2->operands()) + for (const auto *Op : S2->operands()) InsertUnique(Op); else if (auto *S2 = dyn_cast<SCEVUDivExpr>(S)) - for (auto *Op : S2->operands()) + for (const auto *Op : S2->operands()) InsertUnique(Op); } @@ -7001,7 +7008,7 @@ ScalarEvolution::getDefiningScopeBound(ArrayRef<const SCEV *> Ops, Worklist.push_back(S); }; - for (auto *S : Ops) + for (const auto *S : Ops) pushOp(S); const Instruction *Bound = nullptr; @@ -7013,7 +7020,7 @@ ScalarEvolution::getDefiningScopeBound(ArrayRef<const SCEV *> Ops, } else { SmallVector<const SCEV *, 4> Ops; collectUniqueOps(S, Ops); - for (auto *Op : Ops) + for (const auto *Op : Ops) pushOp(Op); } } @@ -7117,7 +7124,7 @@ bool ScalarEvolution::isAddRecNeverPoison(const Instruction *I, const Loop *L) { while (!PoisonStack.empty() && !LatchControlDependentOnPoison) { const Instruction *Poison = PoisonStack.pop_back_val(); - for (auto *PoisonUser : Poison->users()) { + for (const auto *PoisonUser : Poison->users()) { if (propagatesPoison(cast<Operator>(PoisonUser))) { if (Pushed.insert(cast<Instruction>(PoisonUser)).second) PoisonStack.push_back(cast<Instruction>(PoisonUser)); @@ -7242,7 +7249,7 @@ ScalarEvolution::getOperandsToCreate(Value *V, SmallVectorImpl<Value *> &Ops) { Operator *U = cast<Operator>(V); if (auto BO = MatchBinaryOp(U, DT)) { bool IsConstArg = isa<ConstantInt>(BO->RHS); - switch (U->getOpcode()) { + switch (BO->Opcode) { case Instruction::Add: { // For additions and multiplications, traverse add/mul chains for which we // can potentially create a single SCEV, to reduce the number of @@ -7284,7 +7291,10 @@ ScalarEvolution::getOperandsToCreate(Value *V, SmallVectorImpl<Value *> &Ops) { } while (true); return nullptr; } - + case Instruction::Sub: + case Instruction::UDiv: + case Instruction::URem: + break; case Instruction::AShr: case Instruction::Shl: case Instruction::Xor: @@ -7296,7 +7306,10 @@ ScalarEvolution::getOperandsToCreate(Value *V, SmallVectorImpl<Value *> &Ops) { if (!IsConstArg && BO->LHS->getType()->isIntegerTy(1)) return nullptr; break; + case Instruction::LShr: + return getUnknown(V); default: + llvm_unreachable("Unhandled binop"); break; } @@ -7340,12 +7353,34 @@ ScalarEvolution::getOperandsToCreate(Value *V, SmallVectorImpl<Value *> &Ops) { // Keep constructing SCEVs' for phis recursively for now. return nullptr; - case Instruction::Select: + case Instruction::Select: { + // Check if U is a select that can be simplified to a SCEVUnknown. + auto CanSimplifyToUnknown = [this, U]() { + if (U->getType()->isIntegerTy(1) || isa<ConstantInt>(U->getOperand(0))) + return false; + + auto *ICI = dyn_cast<ICmpInst>(U->getOperand(0)); + if (!ICI) + return false; + Value *LHS = ICI->getOperand(0); + Value *RHS = ICI->getOperand(1); + if (ICI->getPredicate() == CmpInst::ICMP_EQ || + ICI->getPredicate() == CmpInst::ICMP_NE) { + if (!(isa<ConstantInt>(RHS) && cast<ConstantInt>(RHS)->isZero())) + return true; + } else if (getTypeSizeInBits(LHS->getType()) > + getTypeSizeInBits(U->getType())) + return true; + return false; + }; + if (CanSimplifyToUnknown()) + return getUnknown(U); + for (Value *Inc : U->operands()) Ops.push_back(Inc); return nullptr; break; - + } case Instruction::Call: case Instruction::Invoke: if (Value *RV = cast<CallBase>(U)->getReturnedArgOperand()) { @@ -8338,7 +8373,7 @@ ScalarEvolution::BackedgeTakenInfo::getExact(const Loop *L, ScalarEvolution *SE, // All exiting blocks we have gathered dominate loop's latch, so exact trip // count is simply a minimum out of all these calculated exit counts. SmallVector<const SCEV *, 2> Ops; - for (auto &ENT : ExitNotTaken) { + for (const auto &ENT : ExitNotTaken) { const SCEV *BECount = ENT.ExactNotTaken; assert(BECount != SE->getCouldNotCompute() && "Bad exit SCEV!"); assert(SE->DT.dominates(ENT.ExitingBlock, Latch) && @@ -8348,7 +8383,7 @@ ScalarEvolution::BackedgeTakenInfo::getExact(const Loop *L, ScalarEvolution *SE, Ops.push_back(BECount); if (Preds) - for (auto *P : ENT.Predicates) + for (const auto *P : ENT.Predicates) Preds->push_back(P); assert((Preds || ENT.hasAlwaysTruePredicate()) && @@ -8365,7 +8400,7 @@ ScalarEvolution::BackedgeTakenInfo::getExact(const Loop *L, ScalarEvolution *SE, const SCEV * ScalarEvolution::BackedgeTakenInfo::getExact(const BasicBlock *ExitingBlock, ScalarEvolution *SE) const { - for (auto &ENT : ExitNotTaken) + for (const auto &ENT : ExitNotTaken) if (ENT.ExitingBlock == ExitingBlock && ENT.hasAlwaysTruePredicate()) return ENT.ExactNotTaken; @@ -8374,7 +8409,7 @@ ScalarEvolution::BackedgeTakenInfo::getExact(const BasicBlock *ExitingBlock, const SCEV *ScalarEvolution::BackedgeTakenInfo::getConstantMax( const BasicBlock *ExitingBlock, ScalarEvolution *SE) const { - for (auto &ENT : ExitNotTaken) + for (const auto &ENT : ExitNotTaken) if (ENT.ExitingBlock == ExitingBlock && ENT.hasAlwaysTruePredicate()) return ENT.MaxNotTaken; @@ -8433,8 +8468,8 @@ ScalarEvolution::ExitLimit::ExitLimit( assert((isa<SCEVCouldNotCompute>(MaxNotTaken) || isa<SCEVConstant>(MaxNotTaken)) && "No point in having a non-constant max backedge taken count!"); - for (auto *PredSet : PredSetList) - for (auto *P : *PredSet) + for (const auto *PredSet : PredSetList) + for (const auto *P : *PredSet) addPredicate(P); assert((isa<SCEVCouldNotCompute>(E) || !E->getType()->isPointerTy()) && "Backedge count should be int"); @@ -10522,8 +10557,8 @@ bool ScalarEvolution::isKnownViaInduction(ICmpInst::Predicate Pred, // Domination relationship must be a linear order on collected loops. #ifndef NDEBUG - for (auto *L1 : LoopsUsed) - for (auto *L2 : LoopsUsed) + for (const auto *L1 : LoopsUsed) + for (const auto *L2 : LoopsUsed) assert((DT.dominates(L1->getHeader(), L2->getHeader()) || DT.dominates(L2->getHeader(), L1->getHeader())) && "Domination relationship is not a linear order"); @@ -10977,8 +11012,10 @@ ScalarEvolution::isLoopBackedgeGuardedByCond(const Loop *L, ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS) { // Interpret a null as meaning no loop, where there is obviously no guard - // (interprocedural conditions notwithstanding). - if (!L) return true; + // (interprocedural conditions notwithstanding). Do not bother about + // unreachable loops. + if (!L || !DT.isReachableFromEntry(L->getHeader())) + return true; if (VerifyIR) assert(!verifyFunction(*L->getHeader()->getParent(), &dbgs()) && @@ -11035,12 +11072,6 @@ ScalarEvolution::isLoopBackedgeGuardedByCond(const Loop *L, return true; } - // If the loop is not reachable from the entry block, we risk running into an - // infinite loop as we walk up into the dom tree. These loops do not matter - // anyway, so we just return a conservative answer when we see them. - if (!DT.isReachableFromEntry(L->getHeader())) - return false; - if (isImpliedViaGuard(Latch, Pred, LHS, RHS)) return true; @@ -11086,6 +11117,9 @@ bool ScalarEvolution::isBasicBlockEntryGuardedByCond(const BasicBlock *BB, ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS) { + // Do not bother proving facts for unreachable code. + if (!DT.isReachableFromEntry(BB)) + return true; if (VerifyIR) assert(!verifyFunction(*BB->getParent(), &dbgs()) && "This cannot be done on broken IR!"); @@ -11162,14 +11196,13 @@ bool ScalarEvolution::isBasicBlockEntryGuardedByCond(const BasicBlock *BB, if (ProveViaGuard(Pair.first)) return true; - const BranchInst *LoopEntryPredicate = + const BranchInst *BlockEntryPredicate = dyn_cast<BranchInst>(Pair.first->getTerminator()); - if (!LoopEntryPredicate || - LoopEntryPredicate->isUnconditional()) + if (!BlockEntryPredicate || BlockEntryPredicate->isUnconditional()) continue; - if (ProveViaCond(LoopEntryPredicate->getCondition(), - LoopEntryPredicate->getSuccessor(0) != Pair.second)) + if (ProveViaCond(BlockEntryPredicate->getCondition(), + BlockEntryPredicate->getSuccessor(0) != Pair.second)) return true; } @@ -13179,7 +13212,7 @@ static void PrintLoopInfo(raw_ostream &OS, ScalarEvolution *SE, if (!isa<SCEVCouldNotCompute>(PBT)) { OS << "Predicated backedge-taken count is " << *PBT << "\n"; OS << " Predicates:\n"; - for (auto *P : Preds) + for (const auto *P : Preds) P->print(OS, 4); } else { OS << "Unpredictable predicated backedge-taken count. "; @@ -13256,7 +13289,7 @@ void ScalarEvolution::print(raw_ostream &OS) const { } bool First = true; - for (auto *Iter = L; Iter; Iter = Iter->getParentLoop()) { + for (const auto *Iter = L; Iter; Iter = Iter->getParentLoop()) { if (First) { OS << "\t\t" "LoopDispositions: { "; First = false; @@ -13268,7 +13301,7 @@ void ScalarEvolution::print(raw_ostream &OS) const { OS << ": " << loopDispositionToStr(SE.getLoopDisposition(SV, Iter)); } - for (auto *InnerL : depth_first(L)) { + for (const auto *InnerL : depth_first(L)) { if (InnerL == L) continue; if (First) { @@ -13348,7 +13381,7 @@ ScalarEvolution::computeLoopDisposition(const SCEV *S, const Loop *L) { // This recurrence is variant w.r.t. L if any of its operands // are variant. - for (auto *Op : AR->operands()) + for (const auto *Op : AR->operands()) if (!isLoopInvariant(Op, L)) return LoopVariant; @@ -13363,7 +13396,7 @@ ScalarEvolution::computeLoopDisposition(const SCEV *S, const Loop *L) { case scSMinExpr: case scSequentialUMinExpr: { bool HasVarying = false; - for (auto *Op : cast<SCEVNAryExpr>(S)->operands()) { + for (const auto *Op : cast<SCEVNAryExpr>(S)->operands()) { LoopDisposition D = getLoopDisposition(Op, L); if (D == LoopVariant) return LoopVariant; @@ -13529,12 +13562,12 @@ void ScalarEvolution::forgetMemoizedResults(ArrayRef<const SCEV *> SCEVs) { const SCEV *Curr = Worklist.pop_back_val(); auto Users = SCEVUsers.find(Curr); if (Users != SCEVUsers.end()) - for (auto *User : Users->second) + for (const auto *User : Users->second) if (ToForget.insert(User).second) Worklist.push_back(User); } - for (auto *S : ToForget) + for (const auto *S : ToForget) forgetMemoizedResultsImpl(S); for (auto I = PredicatedSCEVRewrites.begin(); @@ -13747,7 +13780,7 @@ void ScalarEvolution::verify() const { if (ValidLoops.insert(L).second) Worklist.append(L->begin(), L->end()); } - for (auto &KV : ValueExprMap) { + for (const auto &KV : ValueExprMap) { #ifndef NDEBUG // Check for SCEV expressions referencing invalid/deleted loops. if (auto *AR = dyn_cast<SCEVAddRecExpr>(KV.second)) { @@ -14018,7 +14051,7 @@ public: const SCEV *visitUnknown(const SCEVUnknown *Expr) { if (Pred) { if (auto *U = dyn_cast<SCEVUnionPredicate>(Pred)) { - for (auto *Pred : U->getPredicates()) + for (const auto *Pred : U->getPredicates()) if (const auto *IPred = dyn_cast<SCEVComparePredicate>(Pred)) if (IPred->getLHS() == Expr && IPred->getPredicate() == ICmpInst::ICMP_EQ) @@ -14098,7 +14131,7 @@ private: PredicatedRewrite = SE.createAddRecFromPHIWithCasts(Expr); if (!PredicatedRewrite) return Expr; - for (auto *P : PredicatedRewrite->second){ + for (const auto *P : PredicatedRewrite->second){ // Wrap predicates from outer loops are not supported. if (auto *WP = dyn_cast<const SCEVWrapPredicate>(P)) { if (L != WP->getExpr()->getLoop()) @@ -14135,7 +14168,7 @@ const SCEVAddRecExpr *ScalarEvolution::convertSCEVToAddRecWithPredicates( // Since the transformation was successful, we can now transfer the SCEV // predicates. - for (auto *P : TransformPreds) + for (const auto *P : TransformPreds) Preds.insert(P); return AddRec; @@ -14234,7 +14267,7 @@ SCEVWrapPredicate::getImpliedFlags(const SCEVAddRecExpr *AR, /// Union predicates don't get cached so create a dummy set ID for it. SCEVUnionPredicate::SCEVUnionPredicate(ArrayRef<const SCEVPredicate *> Preds) : SCEVPredicate(FoldingSetNodeIDRef(nullptr, 0), P_Union) { - for (auto *P : Preds) + for (const auto *P : Preds) add(P); } @@ -14253,13 +14286,13 @@ bool SCEVUnionPredicate::implies(const SCEVPredicate *N) const { } void SCEVUnionPredicate::print(raw_ostream &OS, unsigned Depth) const { - for (auto Pred : Preds) + for (const auto *Pred : Preds) Pred->print(OS, Depth); } void SCEVUnionPredicate::add(const SCEVPredicate *N) { if (const auto *Set = dyn_cast<SCEVUnionPredicate>(N)) { - for (auto Pred : Set->Preds) + for (const auto *Pred : Set->Preds) add(Pred); return; } @@ -14276,7 +14309,7 @@ PredicatedScalarEvolution::PredicatedScalarEvolution(ScalarEvolution &SE, void ScalarEvolution::registerUser(const SCEV *User, ArrayRef<const SCEV *> Ops) { - for (auto *Op : Ops) + for (const auto *Op : Ops) // We do not expect that forgetting cached data for SCEVConstants will ever // open any prospects for sharpening or introduce any correctness issues, // so we don't bother storing their dependencies. @@ -14307,7 +14340,7 @@ const SCEV *PredicatedScalarEvolution::getBackedgeTakenCount() { if (!BackedgeCount) { SmallVector<const SCEVPredicate *, 4> Preds; BackedgeCount = SE.getPredicatedBackedgeTakenCount(&L, Preds); - for (auto *P : Preds) + for (const auto *P : Preds) addPredicate(*P); } return BackedgeCount; @@ -14378,7 +14411,7 @@ const SCEVAddRecExpr *PredicatedScalarEvolution::getAsAddRec(Value *V) { if (!New) return nullptr; - for (auto *P : NewPreds) + for (const auto *P : NewPreds) addPredicate(*P); RewriteMap[SE.getSCEV(V)] = {Generation, New}; diff --git a/llvm/lib/Analysis/StackLifetime.cpp b/llvm/lib/Analysis/StackLifetime.cpp index 52e8566aca3c..162fd75c73e0 100644 --- a/llvm/lib/Analysis/StackLifetime.cpp +++ b/llvm/lib/Analysis/StackLifetime.cpp @@ -182,7 +182,7 @@ void StackLifetime::calculateLocalLiveness() { // Compute LiveIn by unioning together the LiveOut sets of all preds. BitVector LocalLiveIn; - for (auto *PredBB : predecessors(BB)) { + for (const auto *PredBB : predecessors(BB)) { LivenessMap::const_iterator I = BlockLiveness.find(PredBB); // If a predecessor is unreachable, ignore it. if (I == BlockLiveness.end()) diff --git a/llvm/lib/Analysis/StackSafetyAnalysis.cpp b/llvm/lib/Analysis/StackSafetyAnalysis.cpp index 94b646ab7c06..9698af3ca85c 100644 --- a/llvm/lib/Analysis/StackSafetyAnalysis.cpp +++ b/llvm/lib/Analysis/StackSafetyAnalysis.cpp @@ -206,7 +206,7 @@ template <typename CalleeTy> struct FunctionInfo { O << " allocas uses:\n"; if (F) { - for (auto &I : instructions(F)) { + for (const auto &I : instructions(F)) { if (const AllocaInst *AI = dyn_cast<AllocaInst>(&I)) { auto &AS = Allocas.find(AI)->second; O << " " << AI->getName() << "[" @@ -763,7 +763,7 @@ const ConstantRange *findParamAccess(const FunctionSummary &FS, uint32_t ParamNo) { assert(FS.isLive()); assert(FS.isDSOLocal()); - for (auto &PS : FS.paramAccesses()) + for (const auto &PS : FS.paramAccesses()) if (ParamNo == PS.ParamNo) return &PS.Use; return nullptr; @@ -823,7 +823,7 @@ GVToSSI createGlobalStackSafetyInfo( Copy.begin()->first->getParent()->getDataLayout().getPointerSizeInBits(); StackSafetyDataFlowAnalysis<GlobalValue> SSDFA(PointerSize, std::move(Copy)); - for (auto &F : SSDFA.run()) { + for (const auto &F : SSDFA.run()) { auto FI = F.second; auto &SrcF = Functions[F.first]; for (auto &KV : FI.Allocas) { @@ -922,7 +922,7 @@ StackSafetyInfo::getParamAccesses(ModuleSummaryIndex &Index) const { FunctionSummary::ParamAccess &Param = ParamAccesses.back(); Param.Calls.reserve(PS.Calls.size()); - for (auto &C : PS.Calls) { + for (const auto &C : PS.Calls) { // Parameter forwarded into another function by any or unknown offset // will make ParamAccess::Range as FullSet anyway. So we can drop the // entire parameter like we did above. @@ -978,7 +978,7 @@ void StackSafetyGlobalInfo::print(raw_ostream &O) const { if (SSI.empty()) return; const Module &M = *SSI.begin()->first->getParent(); - for (auto &F : M.functions()) { + for (const auto &F : M.functions()) { if (!F.isDeclaration()) { SSI.find(&F)->second.print(O, F.getName(), &F); O << " safe accesses:" @@ -1094,7 +1094,7 @@ bool StackSafetyGlobalInfoWrapperPass::runOnModule(Module &M) { bool llvm::needsParamAccessSummary(const Module &M) { if (StackSafetyRun) return true; - for (auto &F : M.functions()) + for (const auto &F : M.functions()) if (F.hasFnAttribute(Attribute::SanitizeMemTag)) return true; return false; @@ -1126,13 +1126,13 @@ void llvm::generateParamAccessSummary(ModuleSummaryIndex &Index) { continue; if (FS->isLive() && FS->isDSOLocal()) { FunctionInfo<FunctionSummary> FI; - for (auto &PS : FS->paramAccesses()) { + for (const auto &PS : FS->paramAccesses()) { auto &US = FI.Params .emplace(PS.ParamNo, FunctionSummary::ParamAccess::RangeWidth) .first->second; US.Range = PS.Use; - for (auto &Call : PS.Calls) { + for (const auto &Call : PS.Calls) { assert(!Call.Offsets.isFullSet()); FunctionSummary *S = findCalleeFunctionSummary(Call.Callee, FS->modulePath()); @@ -1158,10 +1158,10 @@ void llvm::generateParamAccessSummary(ModuleSummaryIndex &Index) { NumCombinedDataFlowNodes += Functions.size(); StackSafetyDataFlowAnalysis<FunctionSummary> SSDFA( FunctionSummary::ParamAccess::RangeWidth, std::move(Functions)); - for (auto &KV : SSDFA.run()) { + for (const auto &KV : SSDFA.run()) { std::vector<FunctionSummary::ParamAccess> NewParams; NewParams.reserve(KV.second.Params.size()); - for (auto &Param : KV.second.Params) { + for (const auto &Param : KV.second.Params) { // It's not needed as FullSet is processed the same as a missing value. if (Param.second.Range.isFullSet()) continue; diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp index 6e34a8303c08..cfa6e3a97626 100644 --- a/llvm/lib/Analysis/TargetTransformInfo.cpp +++ b/llvm/lib/Analysis/TargetTransformInfo.cpp @@ -58,14 +58,16 @@ bool HardwareLoopInfo::canAnalyze(LoopInfo &LI) { } IntrinsicCostAttributes::IntrinsicCostAttributes( - Intrinsic::ID Id, const CallBase &CI, InstructionCost ScalarizationCost) + Intrinsic::ID Id, const CallBase &CI, InstructionCost ScalarizationCost, + bool TypeBasedOnly) : II(dyn_cast<IntrinsicInst>(&CI)), RetTy(CI.getType()), IID(Id), ScalarizationCost(ScalarizationCost) { if (const auto *FPMO = dyn_cast<FPMathOperator>(&CI)) FMF = FPMO->getFastMathFlags(); - Arguments.insert(Arguments.begin(), CI.arg_begin(), CI.arg_end()); + if (!TypeBasedOnly) + Arguments.insert(Arguments.begin(), CI.arg_begin(), CI.arg_end()); FunctionType *FTy = CI.getCalledFunction()->getFunctionType(); ParamTys.insert(ParamTys.begin(), FTy->param_begin(), FTy->param_end()); } @@ -294,8 +296,8 @@ bool TargetTransformInfo::isHardwareLoopProfitable( bool TargetTransformInfo::preferPredicateOverEpilogue( Loop *L, LoopInfo *LI, ScalarEvolution &SE, AssumptionCache &AC, TargetLibraryInfo *TLI, DominatorTree *DT, - const LoopAccessInfo *LAI) const { - return TTIImpl->preferPredicateOverEpilogue(L, LI, SE, AC, TLI, DT, LAI); + LoopVectorizationLegality *LVL) const { + return TTIImpl->preferPredicateOverEpilogue(L, LI, SE, AC, TLI, DT, LVL); } PredicationStyle TargetTransformInfo::emitGetActiveLaneMask() const { diff --git a/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp b/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp index 560f46d39d0d..216027778fab 100644 --- a/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp +++ b/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp @@ -303,7 +303,7 @@ public: /// given offset. Update the offset to be relative to the field type. TBAAStructTypeNode getField(uint64_t &Offset) const { bool NewFormat = isNewFormat(); - const ArrayRef<MDOperand> Operands(Node->op_begin(), Node->op_end()); + const ArrayRef<MDOperand> Operands = Node->operands(); const unsigned NumOperands = Operands.size(); if (NewFormat) { @@ -811,7 +811,8 @@ MDNode *AAMDNodes::extendToTBAA(MDNode *MD, ssize_t Len) { return nullptr; // Otherwise, create TBAA with the new Len - SmallVector<Metadata *, 4> NextNodes(MD->operands()); + ArrayRef<MDOperand> MDOperands = MD->operands(); + SmallVector<Metadata *, 4> NextNodes(MDOperands.begin(), MDOperands.end()); ConstantInt *PreviousSize = mdconst::extract<ConstantInt>(NextNodes[3]); // Don't create a new MDNode if it is the same length. diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp index add2d427e05b..1f3798d1338e 100644 --- a/llvm/lib/Analysis/ValueTracking.cpp +++ b/llvm/lib/Analysis/ValueTracking.cpp @@ -2297,7 +2297,7 @@ static bool isKnownNonNullFromDominatingCondition(const Value *V, return false; unsigned NumUsesExplored = 0; - for (auto *U : V->users()) { + for (const auto *U : V->users()) { // Avoid massive lists if (NumUsesExplored >= DomConditionsMaxUses) break; @@ -2338,7 +2338,7 @@ static bool isKnownNonNullFromDominatingCondition(const Value *V, SmallVector<const User *, 4> WorkList; SmallPtrSet<const User *, 4> Visited; - for (auto *CmpU : U->users()) { + for (const auto *CmpU : U->users()) { assert(WorkList.empty() && "Should be!"); if (Visited.insert(CmpU).second) WorkList.push_back(CmpU); @@ -2352,7 +2352,7 @@ static bool isKnownNonNullFromDominatingCondition(const Value *V, // TODO: Support similar logic of OR and EQ predicate? if (NonNullIfTrue) if (match(Curr, m_LogicalAnd(m_Value(), m_Value()))) { - for (auto *CurrU : Curr->users()) + for (const auto *CurrU : Curr->users()) if (Visited.insert(CurrU).second) WorkList.push_back(CurrU); continue; @@ -5073,7 +5073,7 @@ bool llvm::isOverflowIntrinsicNoWrap(const WithOverflowInst *WO, if (DT.dominates(NoWrapEdge, Result->getParent())) continue; - for (auto &RU : Result->uses()) + for (const auto &RU : Result->uses()) if (!DT.dominates(NoWrapEdge, RU)) return false; } @@ -5645,7 +5645,7 @@ static bool programUndefinedIfUndefOrPoison(const Value *V, // whether a value is directly passed to an instruction that must take // well-defined operands. - for (auto &I : make_range(Begin, End)) { + for (const auto &I : make_range(Begin, End)) { if (isa<DbgInfoIntrinsic>(I)) continue; if (--ScanLimit == 0) @@ -5676,7 +5676,7 @@ static bool programUndefinedIfUndefOrPoison(const Value *V, Visited.insert(BB); while (true) { - for (auto &I : make_range(Begin, End)) { + for (const auto &I : make_range(Begin, End)) { if (isa<DbgInfoIntrinsic>(I)) continue; if (--ScanLimit == 0) diff --git a/llvm/lib/Analysis/VectorUtils.cpp b/llvm/lib/Analysis/VectorUtils.cpp index 894680cda1fc..c4795a80ead2 100644 --- a/llvm/lib/Analysis/VectorUtils.cpp +++ b/llvm/lib/Analysis/VectorUtils.cpp @@ -762,7 +762,7 @@ static void addToAccessGroupList(ListT &List, MDNode *AccGroups) { return; } - for (auto &AccGroupListOp : AccGroups->operands()) { + for (const auto &AccGroupListOp : AccGroups->operands()) { auto *Item = cast<MDNode>(AccGroupListOp.get()); assert(isValidAsAccessGroup(Item) && "List item must be an access group"); List.insert(Item); @@ -1497,7 +1497,7 @@ void VFABI::getVectorVariantNames( SmallVector<StringRef, 8> ListAttr; S.split(ListAttr, ","); - for (auto &S : SetVector<StringRef>(ListAttr.begin(), ListAttr.end())) { + for (const auto &S : SetVector<StringRef>(ListAttr.begin(), ListAttr.end())) { #ifndef NDEBUG LLVM_DEBUG(dbgs() << "VFABI: adding mapping '" << S << "'\n"); Optional<VFInfo> Info = VFABI::tryDemangleForVFABI(S, *(CI.getModule())); diff --git a/llvm/lib/BinaryFormat/AMDGPUMetadataVerifier.cpp b/llvm/lib/BinaryFormat/AMDGPUMetadataVerifier.cpp index 1613e7e42a0a..c5ab35d94860 100644 --- a/llvm/lib/BinaryFormat/AMDGPUMetadataVerifier.cpp +++ b/llvm/lib/BinaryFormat/AMDGPUMetadataVerifier.cpp @@ -260,6 +260,9 @@ bool MetadataVerifier::verifyKernel(msgpack::DocNode &Node) { return false; if (!verifyIntegerEntry(KernelMap, ".private_segment_fixed_size", true)) return false; + if (!verifyScalarEntry(KernelMap, ".uses_dynamic_stack", false, + msgpack::Type::Boolean)) + return false; if (!verifyIntegerEntry(KernelMap, ".kernarg_segment_align", true)) return false; if (!verifyIntegerEntry(KernelMap, ".wavefront_size", true)) diff --git a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp index 8d5a2555f9af..1d6c21bd66d1 100644 --- a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp +++ b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp @@ -5510,6 +5510,61 @@ Error BitcodeReader::parseFunctionBody(Function *F) { if (!OperandBundles.empty()) UpgradeOperandBundles(OperandBundles); + if (auto *IA = dyn_cast<InlineAsm>(Callee)) { + InlineAsm::ConstraintInfoVector ConstraintInfo = IA->ParseConstraints(); + auto IsLabelConstraint = [](const InlineAsm::ConstraintInfo &CI) { + return CI.Type == InlineAsm::isLabel; + }; + if (none_of(ConstraintInfo, IsLabelConstraint)) { + // Upgrade explicit blockaddress arguments to label constraints. + // Verify that the last arguments are blockaddress arguments that + // match the indirect destinations. Clang always generates callbr + // in this form. We could support reordering with more effort. + unsigned FirstBlockArg = Args.size() - IndirectDests.size(); + for (unsigned ArgNo = FirstBlockArg; ArgNo < Args.size(); ++ArgNo) { + unsigned LabelNo = ArgNo - FirstBlockArg; + auto *BA = dyn_cast<BlockAddress>(Args[ArgNo]); + if (!BA || BA->getFunction() != F || + LabelNo > IndirectDests.size() || + BA->getBasicBlock() != IndirectDests[LabelNo]) + return error("callbr argument does not match indirect dest"); + } + + // Remove blockaddress arguments. + Args.erase(Args.begin() + FirstBlockArg, Args.end()); + ArgTyIDs.erase(ArgTyIDs.begin() + FirstBlockArg, ArgTyIDs.end()); + + // Recreate the function type with less arguments. + SmallVector<Type *> ArgTys; + for (Value *Arg : Args) + ArgTys.push_back(Arg->getType()); + FTy = + FunctionType::get(FTy->getReturnType(), ArgTys, FTy->isVarArg()); + + // Update constraint string to use label constraints. + std::string Constraints = IA->getConstraintString(); + unsigned ArgNo = 0; + size_t Pos = 0; + for (const auto &CI : ConstraintInfo) { + if (CI.hasArg()) { + if (ArgNo >= FirstBlockArg) + Constraints.insert(Pos, "!"); + ++ArgNo; + } + + // Go to next constraint in string. + Pos = Constraints.find(',', Pos); + if (Pos == std::string::npos) + break; + ++Pos; + } + + Callee = InlineAsm::get(FTy, IA->getAsmString(), Constraints, + IA->hasSideEffects(), IA->isAlignStack(), + IA->getDialect(), IA->canThrow()); + } + } + I = CallBrInst::Create(FTy, Callee, DefaultDest, IndirectDests, Args, OperandBundles); ResTypeID = getContainedTypeID(FTyID); diff --git a/llvm/lib/Bitcode/Reader/MetadataLoader.cpp b/llvm/lib/Bitcode/Reader/MetadataLoader.cpp index 0d57ae4ef9df..13d53a35084d 100644 --- a/llvm/lib/Bitcode/Reader/MetadataLoader.cpp +++ b/llvm/lib/Bitcode/Reader/MetadataLoader.cpp @@ -1226,10 +1226,12 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata( break; } - MetadataList.assignValue( - LocalAsMetadata::get(ValueList.getValueFwdRef( - Record[1], Ty, TyID, /*ConstExprInsertBB*/ nullptr)), - NextMetadataNo); + Value *V = ValueList.getValueFwdRef(Record[1], Ty, TyID, + /*ConstExprInsertBB*/ nullptr); + if (!V) + return error("Invalid value reference from old fn metadata"); + + MetadataList.assignValue(LocalAsMetadata::get(V), NextMetadataNo); NextMetadataNo++; break; } @@ -1248,8 +1250,11 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata( if (Ty->isMetadataTy()) Elts.push_back(getMD(Record[i + 1])); else if (!Ty->isVoidTy()) { - auto *MD = ValueAsMetadata::get(ValueList.getValueFwdRef( - Record[i + 1], Ty, TyID, /*ConstExprInsertBB*/ nullptr)); + Value *V = ValueList.getValueFwdRef(Record[i + 1], Ty, TyID, + /*ConstExprInsertBB*/ nullptr); + if (!V) + return error("Invalid value reference from old metadata"); + auto *MD = ValueAsMetadata::get(V); assert(isa<ConstantAsMetadata>(MD) && "Expected non-function-local metadata"); Elts.push_back(MD); @@ -1269,10 +1274,12 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata( if (Ty->isMetadataTy() || Ty->isVoidTy()) return error("Invalid record"); - MetadataList.assignValue( - ValueAsMetadata::get(ValueList.getValueFwdRef( - Record[1], Ty, TyID, /*ConstExprInsertBB*/ nullptr)), - NextMetadataNo); + Value *V = ValueList.getValueFwdRef(Record[1], Ty, TyID, + /*ConstExprInsertBB*/ nullptr); + if (!V) + return error("Invalid value reference from metadata"); + + MetadataList.assignValue(ValueAsMetadata::get(V), NextMetadataNo); NextMetadataNo++; break; } diff --git a/llvm/lib/Bitcode/Reader/ValueList.h b/llvm/lib/Bitcode/Reader/ValueList.h index 995d46f01f75..a5b3f6e20707 100644 --- a/llvm/lib/Bitcode/Reader/ValueList.h +++ b/llvm/lib/Bitcode/Reader/ValueList.h @@ -21,7 +21,6 @@ namespace llvm { -class Constant; class Error; class Type; class Value; diff --git a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp index 590562ce2796..d7e012fb6a9e 100644 --- a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp +++ b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp @@ -4104,8 +4104,9 @@ void ModuleBitcodeWriterBase::writePerModuleGlobalValueSummary() { for (const GlobalAlias &A : M.aliases()) { auto *Aliasee = A.getAliaseeObject(); - if (!Aliasee->hasName()) - // Nameless function don't have an entry in the summary, skip it. + // Skip ifunc and nameless functions which don't have an entry in the + // summary. + if (!Aliasee->hasName() || isa<GlobalIFunc>(Aliasee)) continue; auto AliasId = VE.getValueID(&A); auto AliaseeId = VE.getValueID(Aliasee); diff --git a/llvm/lib/Bitcode/Writer/ValueEnumerator.cpp b/llvm/lib/Bitcode/Writer/ValueEnumerator.cpp index 727ec2e02cc2..998f629aaa4e 100644 --- a/llvm/lib/Bitcode/Writer/ValueEnumerator.cpp +++ b/llvm/lib/Bitcode/Writer/ValueEnumerator.cpp @@ -802,7 +802,7 @@ void ValueEnumerator::organizeMetadata() { // - by function, then // - by isa<MDString> // and then sort by the original/current ID. Since the IDs are guaranteed to - // be unique, the result of std::sort will be deterministic. There's no need + // be unique, the result of llvm::sort will be deterministic. There's no need // for std::stable_sort. llvm::sort(Order, [this](MDIndex LHS, MDIndex RHS) { return std::make_tuple(LHS.F, getMetadataTypeOrder(LHS.get(MDs)), LHS.ID) < diff --git a/llvm/lib/CodeGen/AsmPrinter/AccelTable.cpp b/llvm/lib/CodeGen/AsmPrinter/AccelTable.cpp index b10d79f4b5a6..9526bf7610b4 100644 --- a/llvm/lib/CodeGen/AsmPrinter/AccelTable.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/AccelTable.cpp @@ -245,8 +245,8 @@ public: void AccelTableWriter::emitHashes() const { uint64_t PrevHash = std::numeric_limits<uint64_t>::max(); unsigned BucketIdx = 0; - for (auto &Bucket : Contents.getBuckets()) { - for (auto &Hash : Bucket) { + for (const auto &Bucket : Contents.getBuckets()) { + for (const auto &Hash : Bucket) { uint32_t HashValue = Hash->HashValue; if (SkipIdenticalHashes && PrevHash == HashValue) continue; @@ -327,7 +327,7 @@ void AppleAccelTableWriter::emitData() const { const auto &Buckets = Contents.getBuckets(); for (const AccelTableBase::HashList &Bucket : Buckets) { uint64_t PrevHash = std::numeric_limits<uint64_t>::max(); - for (auto &Hash : Bucket) { + for (const auto &Hash : Bucket) { // Terminate the previous entry if there is no hash collision with the // current one. if (PrevHash != std::numeric_limits<uint64_t>::max() && @@ -667,12 +667,12 @@ void AccelTableBase::print(raw_ostream &OS) const { } OS << "Buckets and Hashes: \n"; - for (auto &Bucket : Buckets) - for (auto &Hash : Bucket) + for (const auto &Bucket : Buckets) + for (const auto &Hash : Bucket) Hash->print(OS); OS << "Data: \n"; - for (auto &E : Entries) + for (const auto &E : Entries) E.second.print(OS); } diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index 94612a51d2e1..e0050a47a6f6 100644 --- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -488,7 +488,7 @@ bool AsmPrinter::doInitialization(Module &M) { GCModuleInfo *MI = getAnalysisIfAvailable<GCModuleInfo>(); assert(MI && "AsmPrinter didn't require GCModuleInfo?"); - for (auto &I : *MI) + for (const auto &I : *MI) if (GCMetadataPrinter *MP = GetOrCreateGCPrinter(*I)) MP->beginAssembly(M, *MI, *this); @@ -1731,7 +1731,7 @@ static unsigned getNumGlobalVariableUses(const Constant *C) { return 1; unsigned NumUses = 0; - for (auto *CU : C->users()) + for (const auto *CU : C->users()) NumUses += getNumGlobalVariableUses(dyn_cast<Constant>(CU)); return NumUses; @@ -1754,7 +1754,7 @@ static bool isGOTEquivalentCandidate(const GlobalVariable *GV, // To be a got equivalent, at least one of its users need to be a constant // expression used by another global variable. - for (auto *U : GV->users()) + for (const auto *U : GV->users()) NumGOTEquivUsers += getNumGlobalVariableUses(dyn_cast<Constant>(U)); return NumGOTEquivUsers > 0; @@ -1797,7 +1797,7 @@ void AsmPrinter::emitGlobalGOTEquivs() { } GlobalGOTEquivs.clear(); - for (auto *GV : FailedCandidates) + for (const auto *GV : FailedCandidates) emitGlobalVariable(GV); } @@ -2731,6 +2731,8 @@ const MCExpr *AsmPrinter::lowerConstant(const Constant *CV) { // to represent relocations on supported targets. Expressions involving only // constant addresses are constant folded instead. switch (CE->getOpcode()) { + default: + break; // Error case Instruction::AddrSpaceCast: { const Constant *Op = CE->getOperand(0); unsigned DstAS = CE->getType()->getPointerAddressSpace(); @@ -2738,24 +2740,7 @@ const MCExpr *AsmPrinter::lowerConstant(const Constant *CV) { if (TM.isNoopAddrSpaceCast(SrcAS, DstAS)) return lowerConstant(Op); - // Fallthrough to error. - LLVM_FALLTHROUGH; - } - default: { - // If the code isn't optimized, there may be outstanding folding - // opportunities. Attempt to fold the expression using DataLayout as a - // last resort before giving up. - Constant *C = ConstantFoldConstant(CE, getDataLayout()); - if (C != CE) - return lowerConstant(C); - - // Otherwise report the problem to the user. - std::string S; - raw_string_ostream OS(S); - OS << "Unsupported expression in static initializer: "; - CE->printAsOperand(OS, /*PrintType=*/false, - !MF ? nullptr : MF->getFunction().getParent()); - report_fatal_error(Twine(OS.str())); + break; // Error } case Instruction::GetElementPtr: { // Generate a symbolic expression for the byte address @@ -2860,6 +2845,21 @@ const MCExpr *AsmPrinter::lowerConstant(const Constant *CV) { return MCBinaryExpr::createAdd(LHS, RHS, Ctx); } } + + // If the code isn't optimized, there may be outstanding folding + // opportunities. Attempt to fold the expression using DataLayout as a + // last resort before giving up. + Constant *C = ConstantFoldConstant(CE, getDataLayout()); + if (C != CE) + return lowerConstant(C); + + // Otherwise report the problem to the user. + std::string S; + raw_string_ostream OS(S); + OS << "Unsupported expression in static initializer: "; + CE->printAsOperand(OS, /*PrintType=*/false, + !MF ? nullptr : MF->getFunction().getParent()); + report_fatal_error(Twine(OS.str())); } static void emitGlobalConstantImpl(const DataLayout &DL, const Constant *C, @@ -3359,9 +3359,12 @@ void AsmPrinter::emitGlobalConstant(const DataLayout &DL, const Constant *CV, } if (!AliasList) return; - for (const auto &AliasPair : *AliasList) - report_fatal_error("Aliases with offset " + Twine(AliasPair.first) + - " were not emitted."); + // TODO: These remaining aliases are not emitted in the correct location. Need + // to handle the case where the alias offset doesn't refer to any sub-element. + for (auto &AliasPair : *AliasList) { + for (const GlobalAlias *GA : AliasPair.second) + OutStreamer->emitLabel(getSymbol(GA)); + } } void AsmPrinter::emitMachineConstantPoolValue(MachineConstantPoolValue *MCPV) { @@ -3717,7 +3720,7 @@ void AsmPrinter::emitStackMaps(StackMaps &SM) { // No GC strategy, use the default format. NeedsDefault = true; else - for (auto &I : *MI) { + for (const auto &I : *MI) { if (GCMetadataPrinter *MP = GetOrCreateGCPrinter(*I)) if (MP->emitStackMaps(SM, *this)) continue; diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp index 719fec06aa33..bfa53f5b9374 100644 --- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp @@ -309,7 +309,7 @@ void AsmPrinter::emitDwarfDIE(const DIE &Die) const { // Emit the DIE children if any. if (Die.hasChildren()) { - for (auto &Child : Die.children()) + for (const auto &Child : Die.children()) emitDwarfDIE(Child); OutStreamer->AddComment("End Of Children Mark"); diff --git a/llvm/lib/CodeGen/AsmPrinter/DIEHash.cpp b/llvm/lib/CodeGen/AsmPrinter/DIEHash.cpp index 5da50d7aab9f..1d546e5fd72e 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DIEHash.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/DIEHash.cpp @@ -374,7 +374,7 @@ void DIEHash::computeHash(const DIE &Die) { addAttributes(Die); // Then hash each of the children of the DIE. - for (auto &C : Die.children()) { + for (const auto &C : Die.children()) { // 7.27 Step 7 // If C is a nested type entry or a member function entry, ... if (isType(C.getTag()) || (C.getTag() == dwarf::DW_TAG_subprogram && isType(C.getParent()->getTag()))) { diff --git a/llvm/lib/CodeGen/AsmPrinter/DbgEntityHistoryCalculator.cpp b/llvm/lib/CodeGen/AsmPrinter/DbgEntityHistoryCalculator.cpp index 1358f4d25990..dabbfb45f687 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DbgEntityHistoryCalculator.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/DbgEntityHistoryCalculator.cpp @@ -340,11 +340,11 @@ static void clobberRegEntries(InlinedEntity Var, unsigned RegNo, if (Entry.getInstr()->hasDebugOperandForReg(RegNo)) { IndicesToErase.push_back(Index); Entry.endEntry(ClobberIndex); - for (auto &MO : Entry.getInstr()->debug_operands()) + for (const auto &MO : Entry.getInstr()->debug_operands()) if (MO.isReg() && MO.getReg() && MO.getReg() != RegNo) MaybeRemovedRegisters.insert(MO.getReg()); } else { - for (auto &MO : Entry.getInstr()->debug_operands()) + for (const auto &MO : Entry.getInstr()->debug_operands()) if (MO.isReg() && MO.getReg()) KeepRegisters.insert(MO.getReg()); } diff --git a/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp b/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp index 660a064687d3..8ebbed974abb 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp @@ -304,7 +304,7 @@ void DebugHandlerBase::beginFunction(const MachineFunction *MF) { LabelsBeforeInsn[Entries.front().getInstr()] = Asm->getFunctionBegin(); if (Entries.front().getInstr()->getDebugExpression()->isFragment()) { // Mark all non-overlapping initial fragments. - for (auto I = Entries.begin(); I != Entries.end(); ++I) { + for (const auto *I = Entries.begin(); I != Entries.end(); ++I) { if (!I->isDbgValue()) continue; const DIExpression *Fragment = I->getInstr()->getDebugExpression(); diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp index b3f99d346faa..b26960cdebb8 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp @@ -848,7 +848,7 @@ DIE *DwarfCompileUnit::constructVariableDIEImpl(const DbgVariable &DV, Optional<unsigned> NVPTXAddressSpace; DIELoc *Loc = new (DIEValueAllocator) DIELoc; DIEDwarfExpression DwarfExpr(*Asm, *this, *Loc); - for (auto &Fragment : DV.getFrameIndexExprs()) { + for (const auto &Fragment : DV.getFrameIndexExprs()) { Register FrameReg; const DIExpression *Expr = Fragment.Expr; const TargetFrameLowering *TFI = Asm->MF->getSubtarget().getFrameLowering(); @@ -970,7 +970,7 @@ sortLocalVars(SmallVectorImpl<DbgVariable *> &Input) { SmallDenseSet<DbgVariable *, 8> Visiting; // Initialize the worklist and the DIVariable lookup table. - for (auto Var : reverse(Input)) { + for (auto *Var : reverse(Input)) { DbgVar.insert({Var->getVariable(), Var}); WorkList.push_back({Var, 0}); } @@ -1005,7 +1005,7 @@ sortLocalVars(SmallVectorImpl<DbgVariable *> &Input) { // Push dependencies and this node onto the worklist, so that this node is // visited again after all of its dependencies are handled. WorkList.push_back({Var, 1}); - for (auto *Dependency : dependencies(Var)) { + for (const auto *Dependency : dependencies(Var)) { // Don't add dependency if it is in a different lexical scope or a global. if (const auto *Dep = dyn_cast<const DILocalVariable>(Dependency)) if (DbgVariable *Var = DbgVar.lookup(Dep)) diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp index 866338a949f3..54af14429907 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp @@ -819,7 +819,7 @@ static void collectCallSiteParameters(const MachineInstr *CallMI, } // Do not emit CSInfo for undef forwarding registers. - for (auto &MO : CallMI->uses()) + for (const auto &MO : CallMI->uses()) if (MO.isReg() && MO.isUndef()) ForwardedRegWorklist.erase(MO.getReg()); @@ -2235,7 +2235,7 @@ void DwarfDebug::endFunctionImpl(const MachineFunction *MF) { #endif // Construct abstract scopes. for (LexicalScope *AScope : LScopes.getAbstractScopesList()) { - auto *SP = cast<DISubprogram>(AScope->getScopeNode()); + const auto *SP = cast<DISubprogram>(AScope->getScopeNode()); for (const DINode *DN : SP->getRetainedNodes()) { if (!Processed.insert(InlinedEntity(DN, nullptr)).second) continue; @@ -2527,7 +2527,7 @@ void DwarfDebug::emitDebugLocEntry(ByteStreamer &Streamer, using Encoding = DWARFExpression::Operation::Encoding; uint64_t Offset = 0; - for (auto &Op : Expr) { + for (const auto &Op : Expr) { assert(Op.getCode() != dwarf::DW_OP_const_type && "3 operand ops not yet supported"); Streamer.emitInt8(Op.getCode(), Comment != End ? *(Comment++) : ""); diff --git a/llvm/lib/CodeGen/AtomicExpandPass.cpp b/llvm/lib/CodeGen/AtomicExpandPass.cpp index ad9dc517539a..f21c1bf4e914 100644 --- a/llvm/lib/CodeGen/AtomicExpandPass.cpp +++ b/llvm/lib/CodeGen/AtomicExpandPass.cpp @@ -187,7 +187,7 @@ bool AtomicExpand::runOnFunction(Function &F) { AtomicInsts.push_back(&I); bool MadeChange = false; - for (auto I : AtomicInsts) { + for (auto *I : AtomicInsts) { auto LI = dyn_cast<LoadInst>(I); auto SI = dyn_cast<StoreInst>(I); auto RMWI = dyn_cast<AtomicRMWInst>(I); @@ -1371,7 +1371,7 @@ bool AtomicExpand::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) { // Look for any users of the cmpxchg that are just comparing the loaded value // against the desired one, and replace them with the CFG-derived version. SmallVector<ExtractValueInst *, 2> PrunedInsts; - for (auto User : CI->users()) { + for (auto *User : CI->users()) { ExtractValueInst *EV = dyn_cast<ExtractValueInst>(User); if (!EV) continue; @@ -1388,7 +1388,7 @@ bool AtomicExpand::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) { } // We can remove the instructions now we're no longer iterating through them. - for (auto EV : PrunedInsts) + for (auto *EV : PrunedInsts) EV->eraseFromParent(); if (!CI->use_empty()) { diff --git a/llvm/lib/CodeGen/BasicBlockSections.cpp b/llvm/lib/CodeGen/BasicBlockSections.cpp index f05f5b9f9947..958212a0e448 100644 --- a/llvm/lib/CodeGen/BasicBlockSections.cpp +++ b/llvm/lib/CodeGen/BasicBlockSections.cpp @@ -268,8 +268,8 @@ void llvm::sortBasicBlocksAndUpdateBranches( // If the exception section begins with a landing pad, that landing pad will // assume a zero offset (relative to @LPStart) in the LSDA. However, a value of // zero implies "no landing pad." This function inserts a NOP just before the EH -// pad label to ensure a nonzero offset. Returns true if padding is not needed. -static bool avoidZeroOffsetLandingPad(MachineFunction &MF) { +// pad label to ensure a nonzero offset. +void llvm::avoidZeroOffsetLandingPad(MachineFunction &MF) { for (auto &MBB : MF) { if (MBB.isBeginSection() && MBB.isEHPad()) { MachineBasicBlock::iterator MI = MBB.begin(); @@ -278,10 +278,8 @@ static bool avoidZeroOffsetLandingPad(MachineFunction &MF) { MCInst Nop = MF.getSubtarget().getInstrInfo()->getNop(); BuildMI(MBB, MI, DebugLoc(), MF.getSubtarget().getInstrInfo()->get(Nop.getOpcode())); - return false; } } - return true; } // This checks if the source of this function has drifted since this binary was @@ -297,7 +295,7 @@ static bool hasInstrProfHashMismatch(MachineFunction &MF) { auto *Existing = MF.getFunction().getMetadata(LLVMContext::MD_annotation); if (Existing) { MDTuple *Tuple = cast<MDTuple>(Existing); - for (auto &N : Tuple->operands()) + for (const auto &N : Tuple->operands()) if (cast<MDString>(N.get())->getString() == MetadataName) return true; } diff --git a/llvm/lib/CodeGen/CalcSpillWeights.cpp b/llvm/lib/CodeGen/CalcSpillWeights.cpp index 689e49978d43..519b24c21d7a 100644 --- a/llvm/lib/CodeGen/CalcSpillWeights.cpp +++ b/llvm/lib/CodeGen/CalcSpillWeights.cpp @@ -121,7 +121,7 @@ bool VirtRegAuxInfo::isRematerializable(const LiveInterval &LI, assert(MI && "Dead valno in interval"); } - if (!TII.isTriviallyReMaterializable(*MI, LIS.getAliasAnalysis())) + if (!TII.isTriviallyReMaterializable(*MI)) return false; } return true; @@ -279,7 +279,7 @@ float VirtRegAuxInfo::weightCalcHelper(LiveInterval &LI, SlotIndex *Start, MRI.clearSimpleHint(LI.reg()); std::set<Register> HintedRegs; - for (auto &Hint : CopyHints) { + for (const auto &Hint : CopyHints) { if (!HintedRegs.insert(Hint.Reg).second || (TargetHint.first != 0 && Hint.Reg == TargetHint.second)) // Don't add the same reg twice or the target-type hint again. diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp index 6778af22f532..b6c762b93ca5 100644 --- a/llvm/lib/CodeGen/CodeGenPrepare.cpp +++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp @@ -730,7 +730,7 @@ bool CodeGenPrepare::eliminateFallThrough(Function &F) { // (Repeatedly) merging blocks into their predecessors can create redundant // debug intrinsics. - for (auto &Pred : Preds) + for (const auto &Pred : Preds) if (auto *BB = cast_or_null<BasicBlock>(Pred)) RemoveRedundantDbgInstrs(BB); @@ -3684,7 +3684,7 @@ private: // Phi we added (subject to match) and both of them is in the same basic // block then we can match our pair if values match. So we state that // these values match and add it to work list to verify that. - for (auto B : Item.first->blocks()) { + for (auto *B : Item.first->blocks()) { Value *FirstValue = Item.first->getIncomingValueForBlock(B); Value *SecondValue = Item.second->getIncomingValueForBlock(B); if (FirstValue == SecondValue) @@ -5227,18 +5227,31 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr, WeakTrackingVH SunkAddrVH = SunkAddrs[Addr]; Value * SunkAddr = SunkAddrVH.pointsToAliveValue() ? SunkAddrVH : nullptr; + Type *IntPtrTy = DL->getIntPtrType(Addr->getType()); if (SunkAddr) { LLVM_DEBUG(dbgs() << "CGP: Reusing nonlocal addrmode: " << AddrMode << " for " << *MemoryInst << "\n"); - if (SunkAddr->getType() != Addr->getType()) - SunkAddr = Builder.CreatePointerCast(SunkAddr, Addr->getType()); + if (SunkAddr->getType() != Addr->getType()) { + if (SunkAddr->getType()->getPointerAddressSpace() != + Addr->getType()->getPointerAddressSpace() && + !DL->isNonIntegralPointerType(Addr->getType())) { + // There are two reasons the address spaces might not match: a no-op + // addrspacecast, or a ptrtoint/inttoptr pair. Either way, we emit a + // ptrtoint/inttoptr pair to ensure we match the original semantics. + // TODO: allow bitcast between different address space pointers with the + // same size. + SunkAddr = Builder.CreatePtrToInt(SunkAddr, IntPtrTy, "sunkaddr"); + SunkAddr = + Builder.CreateIntToPtr(SunkAddr, Addr->getType(), "sunkaddr"); + } else + SunkAddr = Builder.CreatePointerCast(SunkAddr, Addr->getType()); + } } else if (AddrSinkUsingGEPs || (!AddrSinkUsingGEPs.getNumOccurrences() && SubtargetInfo->addrSinkUsingGEPs())) { // By default, we use the GEP-based method when AA is used later. This // prevents new inttoptr/ptrtoint pairs from degrading AA capabilities. LLVM_DEBUG(dbgs() << "CGP: SINKING nonlocal addrmode: " << AddrMode << " for " << *MemoryInst << "\n"); - Type *IntPtrTy = DL->getIntPtrType(Addr->getType()); Value *ResultPtr = nullptr, *ResultIndex = nullptr; // First, find the pointer. @@ -5361,8 +5374,21 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr, AddrMode.InBounds); } - if (SunkAddr->getType() != Addr->getType()) - SunkAddr = Builder.CreatePointerCast(SunkAddr, Addr->getType()); + if (SunkAddr->getType() != Addr->getType()) { + if (SunkAddr->getType()->getPointerAddressSpace() != + Addr->getType()->getPointerAddressSpace() && + !DL->isNonIntegralPointerType(Addr->getType())) { + // There are two reasons the address spaces might not match: a no-op + // addrspacecast, or a ptrtoint/inttoptr pair. Either way, we emit a + // ptrtoint/inttoptr pair to ensure we match the original semantics. + // TODO: allow bitcast between different address space pointers with + // the same size. + SunkAddr = Builder.CreatePtrToInt(SunkAddr, IntPtrTy, "sunkaddr"); + SunkAddr = + Builder.CreateIntToPtr(SunkAddr, Addr->getType(), "sunkaddr"); + } else + SunkAddr = Builder.CreatePointerCast(SunkAddr, Addr->getType()); + } } } else { // We'd require a ptrtoint/inttoptr down the line, which we can't do for @@ -7793,9 +7819,11 @@ static bool tryUnmergingGEPsAcrossIndirectBr(GetElementPtrInst *GEPI, } // After unmerging, verify that GEPIOp is actually only used in SrcBlock (not // alive on IndirectBr edges). - assert(find_if(GEPIOp->users(), [&](User *Usr) { - return cast<Instruction>(Usr)->getParent() != SrcBlock; - }) == GEPIOp->users().end() && "GEPIOp is used outside SrcBlock"); + assert(llvm::none_of(GEPIOp->users(), + [&](User *Usr) { + return cast<Instruction>(Usr)->getParent() != SrcBlock; + }) && + "GEPIOp is used outside SrcBlock"); return true; } diff --git a/llvm/lib/CodeGen/DFAPacketizer.cpp b/llvm/lib/CodeGen/DFAPacketizer.cpp index 42192f41dbda..34fb1d286a58 100644 --- a/llvm/lib/CodeGen/DFAPacketizer.cpp +++ b/llvm/lib/CodeGen/DFAPacketizer.cpp @@ -239,7 +239,7 @@ void VLIWPacketizerList::PacketizeMIs(MachineBasicBlock *MBB, }); if (ResourceAvail && shouldAddToPacket(MI)) { // Dependency check for MI with instructions in CurrentPacketMIs. - for (auto MJ : CurrentPacketMIs) { + for (auto *MJ : CurrentPacketMIs) { SUnit *SUJ = MIToSUnit[MJ]; assert(SUJ && "Missing SUnit Info!"); diff --git a/llvm/lib/CodeGen/EarlyIfConversion.cpp b/llvm/lib/CodeGen/EarlyIfConversion.cpp index 32858d043383..c108f0088d43 100644 --- a/llvm/lib/CodeGen/EarlyIfConversion.cpp +++ b/llvm/lib/CodeGen/EarlyIfConversion.cpp @@ -576,7 +576,7 @@ static bool hasSameValue(const MachineRegisterInfo &MRI, // If the instruction could modify memory, or there may be some intervening // store between the two, we can't consider them to be equal. - if (TDef->mayLoadOrStore() && !TDef->isDereferenceableInvariantLoad(nullptr)) + if (TDef->mayLoadOrStore() && !TDef->isDereferenceableInvariantLoad()) return false; // We also can't guarantee that they are the same if, for example, the @@ -808,7 +808,7 @@ void updateDomTree(MachineDominatorTree *DomTree, const SSAIfConv &IfConv, // TBB and FBB should not dominate any blocks. // Tail children should be transferred to Head. MachineDomTreeNode *HeadNode = DomTree->getNode(IfConv.Head); - for (auto B : Removed) { + for (auto *B : Removed) { MachineDomTreeNode *Node = DomTree->getNode(B); assert(Node != HeadNode && "Cannot erase the head node"); while (Node->getNumChildren()) { @@ -826,7 +826,7 @@ void updateLoops(MachineLoopInfo *Loops, return; // If-conversion doesn't change loop structure, and it doesn't mess with back // edges, so updating LoopInfo is simply removing the dead blocks. - for (auto B : Removed) + for (auto *B : Removed) Loops->removeBlock(B); } } // namespace @@ -1065,7 +1065,7 @@ bool EarlyIfConverter::runOnMachineFunction(MachineFunction &MF) { // if-conversion in a single pass. The tryConvertIf() function may erase // blocks, but only blocks dominated by the head block. This makes it safe to // update the dominator tree while the post-order iterator is still active. - for (auto DomNode : post_order(DomTree)) + for (auto *DomNode : post_order(DomTree)) if (tryConvertIf(DomNode->getBlock())) Changed = true; @@ -1198,7 +1198,7 @@ bool EarlyIfPredicator::runOnMachineFunction(MachineFunction &MF) { // if-conversion in a single pass. The tryConvertIf() function may erase // blocks, but only blocks dominated by the head block. This makes it safe to // update the dominator tree while the post-order iterator is still active. - for (auto DomNode : post_order(DomTree)) + for (auto *DomNode : post_order(DomTree)) if (tryConvertIf(DomNode->getBlock())) Changed = true; diff --git a/llvm/lib/CodeGen/ExpandVectorPredication.cpp b/llvm/lib/CodeGen/ExpandVectorPredication.cpp index 59932a542bbc..db4d42bf3ca4 100644 --- a/llvm/lib/CodeGen/ExpandVectorPredication.cpp +++ b/llvm/lib/CodeGen/ExpandVectorPredication.cpp @@ -15,6 +15,7 @@ #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Analysis/ValueTracking.h" +#include "llvm/Analysis/VectorUtils.h" #include "llvm/CodeGen/Passes.h" #include "llvm/IR/Constants.h" #include "llvm/IR/Function.h" @@ -82,8 +83,11 @@ STATISTIC(NumLoweredVPOps, "Number of folded vector predication operations"); /// \returns Whether the vector mask \p MaskVal has all lane bits set. static bool isAllTrueMask(Value *MaskVal) { - auto *ConstVec = dyn_cast<ConstantVector>(MaskVal); - return ConstVec && ConstVec->isAllOnesValue(); + if (Value *SplattedVal = getSplatValue(MaskVal)) + if (auto *ConstValue = dyn_cast<Constant>(SplattedVal)) + return ConstValue->isAllOnesValue(); + + return false; } /// \returns A non-excepting divisor constant for this type. @@ -171,6 +175,10 @@ struct CachingVPExpander { Value *expandPredicationInReduction(IRBuilder<> &Builder, VPReductionIntrinsic &PI); + /// \brief Lower this VP memory operation to a non-VP intrinsic. + Value *expandPredicationInMemoryIntrinsic(IRBuilder<> &Builder, + VPIntrinsic &VPI); + /// \brief Query TTI and expand the vector predication in \p P accordingly. Value *expandPredication(VPIntrinsic &PI); @@ -389,6 +397,71 @@ CachingVPExpander::expandPredicationInReduction(IRBuilder<> &Builder, return Reduction; } +Value * +CachingVPExpander::expandPredicationInMemoryIntrinsic(IRBuilder<> &Builder, + VPIntrinsic &VPI) { + assert(VPI.canIgnoreVectorLengthParam()); + + const auto &DL = F.getParent()->getDataLayout(); + + Value *MaskParam = VPI.getMaskParam(); + Value *PtrParam = VPI.getMemoryPointerParam(); + Value *DataParam = VPI.getMemoryDataParam(); + bool IsUnmasked = isAllTrueMask(MaskParam); + + MaybeAlign AlignOpt = VPI.getPointerAlignment(); + + Value *NewMemoryInst = nullptr; + switch (VPI.getIntrinsicID()) { + default: + llvm_unreachable("Not a VP memory intrinsic"); + case Intrinsic::vp_store: + if (IsUnmasked) { + StoreInst *NewStore = + Builder.CreateStore(DataParam, PtrParam, /*IsVolatile*/ false); + if (AlignOpt.has_value()) + NewStore->setAlignment(AlignOpt.value()); + NewMemoryInst = NewStore; + } else + NewMemoryInst = Builder.CreateMaskedStore( + DataParam, PtrParam, AlignOpt.valueOrOne(), MaskParam); + + break; + case Intrinsic::vp_load: + if (IsUnmasked) { + LoadInst *NewLoad = + Builder.CreateLoad(VPI.getType(), PtrParam, /*IsVolatile*/ false); + if (AlignOpt.has_value()) + NewLoad->setAlignment(AlignOpt.value()); + NewMemoryInst = NewLoad; + } else + NewMemoryInst = Builder.CreateMaskedLoad( + VPI.getType(), PtrParam, AlignOpt.valueOrOne(), MaskParam); + + break; + case Intrinsic::vp_scatter: { + auto *ElementType = + cast<VectorType>(DataParam->getType())->getElementType(); + NewMemoryInst = Builder.CreateMaskedScatter( + DataParam, PtrParam, + AlignOpt.value_or(DL.getPrefTypeAlign(ElementType)), MaskParam); + break; + } + case Intrinsic::vp_gather: { + auto *ElementType = cast<VectorType>(VPI.getType())->getElementType(); + NewMemoryInst = Builder.CreateMaskedGather( + VPI.getType(), PtrParam, + AlignOpt.value_or(DL.getPrefTypeAlign(ElementType)), MaskParam, nullptr, + VPI.getName()); + break; + } + } + + assert(NewMemoryInst); + replaceOperation(*NewMemoryInst, VPI); + return NewMemoryInst; +} + void CachingVPExpander::discardEVLParameter(VPIntrinsic &VPI) { LLVM_DEBUG(dbgs() << "Discard EVL parameter in " << VPI << "\n"); @@ -465,6 +538,16 @@ Value *CachingVPExpander::expandPredication(VPIntrinsic &VPI) { if (auto *VPRI = dyn_cast<VPReductionIntrinsic>(&VPI)) return expandPredicationInReduction(Builder, *VPRI); + switch (VPI.getIntrinsicID()) { + default: + break; + case Intrinsic::vp_load: + case Intrinsic::vp_store: + case Intrinsic::vp_gather: + case Intrinsic::vp_scatter: + return expandPredicationInMemoryIntrinsic(Builder, VPI); + } + return &VPI; } diff --git a/llvm/lib/CodeGen/FaultMaps.cpp b/llvm/lib/CodeGen/FaultMaps.cpp index 3ec666227651..3f8fe2402d65 100644 --- a/llvm/lib/CodeGen/FaultMaps.cpp +++ b/llvm/lib/CodeGen/FaultMaps.cpp @@ -85,7 +85,7 @@ void FaultMaps::emitFunctionInfo(const MCSymbol *FnLabel, OS.emitInt32(0); // Reserved - for (auto &Fault : FFI) { + for (const auto &Fault : FFI) { LLVM_DEBUG(dbgs() << WFMP << " fault type: " << faultTypeToString(Fault.Kind) << "\n"); OS.emitInt32(Fault.Kind); diff --git a/llvm/lib/CodeGen/GlobalISel/CSEInfo.cpp b/llvm/lib/CodeGen/GlobalISel/CSEInfo.cpp index ac140e745600..6a0d1c33d3e3 100644 --- a/llvm/lib/CodeGen/GlobalISel/CSEInfo.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CSEInfo.cpp @@ -319,7 +319,7 @@ const GISelInstProfileBuilder & GISelInstProfileBuilder::addNodeID(const MachineInstr *MI) const { addNodeIDMBB(MI->getParent()); addNodeIDOpcode(MI->getOpcode()); - for (auto &Op : MI->operands()) + for (const auto &Op : MI->operands()) addNodeIDMachineOperand(Op); addNodeIDFlag(MI->getFlags()); return *this; diff --git a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp index b06043fb4c31..6c36c6445c65 100644 --- a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp @@ -116,7 +116,7 @@ bool CallLowering::lowerCall(MachineIRBuilder &MIRBuilder, const CallBase &CB, // we'll pass to the assigner function. unsigned i = 0; unsigned NumFixedArgs = CB.getFunctionType()->getNumParams(); - for (auto &Arg : CB.args()) { + for (const auto &Arg : CB.args()) { ArgInfo OrigArg{ArgRegs[i], *Arg.get(), i, getAttributesForArgIdx(CB, i), i < NumFixedArgs}; setArgFlags(OrigArg, i + AttributeList::FirstArgIndex, DL, CB); @@ -960,7 +960,7 @@ bool CallLowering::parametersInCSRMatch( const SmallVectorImpl<CCValAssign> &OutLocs, const SmallVectorImpl<ArgInfo> &OutArgs) const { for (unsigned i = 0; i < OutLocs.size(); ++i) { - auto &ArgLoc = OutLocs[i]; + const auto &ArgLoc = OutLocs[i]; // If it's not a register, it's fine. if (!ArgLoc.isRegLoc()) continue; diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp index ad0c0c8315dc..da054b9c14fb 100644 --- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp @@ -2385,7 +2385,7 @@ bool CombinerHelper::matchEqualDefs(const MachineOperand &MOP1, // loading from. To be safe, let's just assume that all loads and stores // are different (unless we have something which is guaranteed to not // change.) - if (I1->mayLoadOrStore() && !I1->isDereferenceableInvariantLoad(nullptr)) + if (I1->mayLoadOrStore() && !I1->isDereferenceableInvariantLoad()) return false; // If both instructions are loads or stores, they are equal only if both @@ -2396,7 +2396,7 @@ bool CombinerHelper::matchEqualDefs(const MachineOperand &MOP1, if (!LS1 || !LS2) return false; - if (!I2->isDereferenceableInvariantLoad(nullptr) || + if (!I2->isDereferenceableInvariantLoad() || (LS1->getMemSizeInBits() != LS2->getMemSizeInBits())) return false; } @@ -4800,24 +4800,22 @@ MachineInstr *CombinerHelper::buildUDivUsingMul(MachineInstr &MI) { auto BuildUDIVPattern = [&](const Constant *C) { auto *CI = cast<ConstantInt>(C); const APInt &Divisor = CI->getValue(); - UnsignedDivisonByConstantInfo magics = - UnsignedDivisonByConstantInfo::get(Divisor); + UnsignedDivisionByConstantInfo magics = + UnsignedDivisionByConstantInfo::get(Divisor); unsigned PreShift = 0, PostShift = 0; // If the divisor is even, we can avoid using the expensive fixup by // shifting the divided value upfront. - if (magics.IsAdd != 0 && !Divisor[0]) { + if (magics.IsAdd && !Divisor[0]) { PreShift = Divisor.countTrailingZeros(); // Get magic number for the shifted divisor. magics = - UnsignedDivisonByConstantInfo::get(Divisor.lshr(PreShift), PreShift); - assert(magics.IsAdd == 0 && "Should use cheap fixup now"); + UnsignedDivisionByConstantInfo::get(Divisor.lshr(PreShift), PreShift); + assert(!magics.IsAdd && "Should use cheap fixup now"); } - APInt Magic = magics.Magic; - unsigned SelNPQ; - if (magics.IsAdd == 0 || Divisor.isOneValue()) { + if (!magics.IsAdd || Divisor.isOneValue()) { assert(magics.ShiftAmount < Divisor.getBitWidth() && "We shouldn't generate an undefined shift!"); PostShift = magics.ShiftAmount; @@ -4829,7 +4827,7 @@ MachineInstr *CombinerHelper::buildUDivUsingMul(MachineInstr &MI) { PreShifts.push_back( MIB.buildConstant(ScalarShiftAmtTy, PreShift).getReg(0)); - MagicFactors.push_back(MIB.buildConstant(ScalarTy, Magic).getReg(0)); + MagicFactors.push_back(MIB.buildConstant(ScalarTy, magics.Magic).getReg(0)); NPQFactors.push_back( MIB.buildConstant(ScalarTy, SelNPQ ? APInt::getOneBitSet(EltBits, EltBits - 1) diff --git a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp index 947facc87b71..dbdcfe0b6f0b 100644 --- a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp +++ b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp @@ -15,6 +15,7 @@ #include "llvm/ADT/ScopeExit.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/BranchProbabilityInfo.h" #include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/Analysis/ValueTracking.h" @@ -166,8 +167,10 @@ void IRTranslator::getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired<StackProtector>(); AU.addRequired<TargetPassConfig>(); AU.addRequired<GISelCSEAnalysisWrapperPass>(); - if (OptLevel != CodeGenOpt::None) + if (OptLevel != CodeGenOpt::None) { AU.addRequired<BranchProbabilityInfoWrapperPass>(); + AU.addRequired<AAResultsWrapperPass>(); + } AU.addRequired<TargetLibraryInfoWrapperPass>(); AU.addPreserved<TargetLibraryInfoWrapperPass>(); getSelectionDAGFallbackAnalysisUsage(AU); @@ -684,7 +687,7 @@ bool IRTranslator::translateSwitch(const User &U, MachineIRBuilder &MIB) { BranchProbabilityInfo *BPI = FuncInfo.BPI; CaseClusterVector Clusters; Clusters.reserve(SI.getNumCases()); - for (auto &I : SI.cases()) { + for (const auto &I : SI.cases()) { MachineBasicBlock *Succ = &getMBB(*I.getCaseSuccessor()); assert(Succ && "Could not find successor mbb in mapping"); const ConstantInt *CaseVal = I.getCaseValue(); @@ -1275,26 +1278,41 @@ static bool isSwiftError(const Value *V) { bool IRTranslator::translateLoad(const User &U, MachineIRBuilder &MIRBuilder) { const LoadInst &LI = cast<LoadInst>(U); - if (DL->getTypeStoreSize(LI.getType()) == 0) + + unsigned StoreSize = DL->getTypeStoreSize(LI.getType()); + if (StoreSize == 0) return true; ArrayRef<Register> Regs = getOrCreateVRegs(LI); ArrayRef<uint64_t> Offsets = *VMap.getOffsets(LI); Register Base = getOrCreateVReg(*LI.getPointerOperand()); + AAMDNodes AAInfo = LI.getAAMetadata(); - Type *OffsetIRTy = DL->getIntPtrType(LI.getPointerOperandType()); + const Value *Ptr = LI.getPointerOperand(); + Type *OffsetIRTy = DL->getIntPtrType(Ptr->getType()); LLT OffsetTy = getLLTForType(*OffsetIRTy, *DL); - if (CLI->supportSwiftError() && isSwiftError(LI.getPointerOperand())) { + if (CLI->supportSwiftError() && isSwiftError(Ptr)) { assert(Regs.size() == 1 && "swifterror should be single pointer"); - Register VReg = SwiftError.getOrCreateVRegUseAt(&LI, &MIRBuilder.getMBB(), - LI.getPointerOperand()); + Register VReg = + SwiftError.getOrCreateVRegUseAt(&LI, &MIRBuilder.getMBB(), Ptr); MIRBuilder.buildCopy(Regs[0], VReg); return true; } auto &TLI = *MF->getSubtarget().getTargetLowering(); MachineMemOperand::Flags Flags = TLI.getLoadMemOperandFlags(LI, *DL); + if (AA && !(Flags & MachineMemOperand::MOInvariant)) { + if (AA->pointsToConstantMemory( + MemoryLocation(Ptr, LocationSize::precise(StoreSize), AAInfo))) { + Flags |= MachineMemOperand::MOInvariant; + + // FIXME: pointsToConstantMemory probably does not imply dereferenceable, + // but the previous usage implied it did. Probably should check + // isDereferenceableAndAlignedPointer. + Flags |= MachineMemOperand::MODereferenceable; + } + } const MDNode *Ranges = Regs.size() == 1 ? LI.getMetadata(LLVMContext::MD_range) : nullptr; @@ -1306,7 +1324,7 @@ bool IRTranslator::translateLoad(const User &U, MachineIRBuilder &MIRBuilder) { Align BaseAlign = getMemOpAlign(LI); auto MMO = MF->getMachineMemOperand( Ptr, Flags, MRI->getType(Regs[i]), - commonAlignment(BaseAlign, Offsets[i] / 8), LI.getAAMetadata(), Ranges, + commonAlignment(BaseAlign, Offsets[i] / 8), AAInfo, Ranges, LI.getSyncScopeID(), LI.getOrdering()); MIRBuilder.buildLoad(Regs[i], Addr, *MMO); } @@ -1400,7 +1418,7 @@ bool IRTranslator::translateInsertValue(const User &U, ArrayRef<uint64_t> DstOffsets = *VMap.getOffsets(U); ArrayRef<Register> SrcRegs = getOrCreateVRegs(*Src); ArrayRef<Register> InsertedRegs = getOrCreateVRegs(*U.getOperand(1)); - auto InsertedIt = InsertedRegs.begin(); + auto *InsertedIt = InsertedRegs.begin(); for (unsigned i = 0; i < DstRegs.size(); ++i) { if (DstOffsets[i] >= Offset && InsertedIt != InsertedRegs.end()) @@ -1563,9 +1581,9 @@ bool IRTranslator::translateGetElementPtr(const User &U, bool IRTranslator::translateMemFunc(const CallInst &CI, MachineIRBuilder &MIRBuilder, unsigned Opcode) { - + const Value *SrcPtr = CI.getArgOperand(1); // If the source is undef, then just emit a nop. - if (isa<UndefValue>(CI.getArgOperand(1))) + if (isa<UndefValue>(SrcPtr)) return true; SmallVector<Register, 3> SrcRegs; @@ -1595,15 +1613,20 @@ bool IRTranslator::translateMemFunc(const CallInst &CI, unsigned IsVol = cast<ConstantInt>(CI.getArgOperand(CI.arg_size() - 1))->getZExtValue(); + ConstantInt *CopySize = nullptr; + if (auto *MCI = dyn_cast<MemCpyInst>(&CI)) { DstAlign = MCI->getDestAlign().valueOrOne(); SrcAlign = MCI->getSourceAlign().valueOrOne(); + CopySize = dyn_cast<ConstantInt>(MCI->getArgOperand(2)); } else if (auto *MCI = dyn_cast<MemCpyInlineInst>(&CI)) { DstAlign = MCI->getDestAlign().valueOrOne(); SrcAlign = MCI->getSourceAlign().valueOrOne(); + CopySize = dyn_cast<ConstantInt>(MCI->getArgOperand(2)); } else if (auto *MMI = dyn_cast<MemMoveInst>(&CI)) { DstAlign = MMI->getDestAlign().valueOrOne(); SrcAlign = MMI->getSourceAlign().valueOrOne(); + CopySize = dyn_cast<ConstantInt>(MMI->getArgOperand(2)); } else { auto *MSI = cast<MemSetInst>(&CI); DstAlign = MSI->getDestAlign().valueOrOne(); @@ -1617,14 +1640,31 @@ bool IRTranslator::translateMemFunc(const CallInst &CI, } // Create mem operands to store the alignment and volatile info. - auto VolFlag = IsVol ? MachineMemOperand::MOVolatile : MachineMemOperand::MONone; - ICall.addMemOperand(MF->getMachineMemOperand( - MachinePointerInfo(CI.getArgOperand(0)), - MachineMemOperand::MOStore | VolFlag, 1, DstAlign)); + MachineMemOperand::Flags LoadFlags = MachineMemOperand::MOLoad; + MachineMemOperand::Flags StoreFlags = MachineMemOperand::MOStore; + if (IsVol) { + LoadFlags |= MachineMemOperand::MOVolatile; + StoreFlags |= MachineMemOperand::MOVolatile; + } + + AAMDNodes AAInfo = CI.getAAMetadata(); + if (AA && CopySize && + AA->pointsToConstantMemory(MemoryLocation( + SrcPtr, LocationSize::precise(CopySize->getZExtValue()), AAInfo))) { + LoadFlags |= MachineMemOperand::MOInvariant; + + // FIXME: pointsToConstantMemory probably does not imply dereferenceable, + // but the previous usage implied it did. Probably should check + // isDereferenceableAndAlignedPointer. + LoadFlags |= MachineMemOperand::MODereferenceable; + } + + ICall.addMemOperand( + MF->getMachineMemOperand(MachinePointerInfo(CI.getArgOperand(0)), + StoreFlags, 1, DstAlign, AAInfo)); if (Opcode != TargetOpcode::G_MEMSET) ICall.addMemOperand(MF->getMachineMemOperand( - MachinePointerInfo(CI.getArgOperand(1)), - MachineMemOperand::MOLoad | VolFlag, 1, SrcAlign)); + MachinePointerInfo(SrcPtr), LoadFlags, 1, SrcAlign, AAInfo)); return true; } @@ -1785,7 +1825,7 @@ bool IRTranslator::translateSimpleIntrinsic(const CallInst &CI, // Yes. Let's translate it. SmallVector<llvm::SrcOp, 4> VRegs; - for (auto &Arg : CI.args()) + for (const auto &Arg : CI.args()) VRegs.push_back(getOrCreateVReg(*Arg)); MIRBuilder.buildInstr(Op, {getOrCreateVReg(CI)}, VRegs, @@ -2305,7 +2345,7 @@ bool IRTranslator::translateCallBase(const CallBase &CB, SmallVector<ArrayRef<Register>, 8> Args; Register SwiftInVReg = 0; Register SwiftErrorVReg = 0; - for (auto &Arg : CB.args()) { + for (const auto &Arg : CB.args()) { if (CLI->supportSwiftError() && isSwiftError(Arg)) { assert(SwiftInVReg == 0 && "Expected only one swift error argument"); LLT Ty = getLLTForType(*Arg->getType(), *DL); @@ -2394,7 +2434,7 @@ bool IRTranslator::translateCall(const User &U, MachineIRBuilder &MIRBuilder) { if (isa<FPMathOperator>(CI)) MIB->copyIRFlags(CI); - for (auto &Arg : enumerate(CI.args())) { + for (const auto &Arg : enumerate(CI.args())) { // If this is required to be an immediate, don't materialize it in a // register. if (CI.paramHasAttr(Arg.index(), Attribute::ImmArg)) { @@ -2947,7 +2987,7 @@ void IRTranslator::finishPendingPhis() { for (unsigned i = 0; i < PI->getNumIncomingValues(); ++i) { auto IRPred = PI->getIncomingBlock(i); ArrayRef<Register> ValRegs = getOrCreateVRegs(*PI->getIncomingValue(i)); - for (auto Pred : getMachinePredBBs({IRPred, PI->getParent()})) { + for (auto *Pred : getMachinePredBBs({IRPred, PI->getParent()})) { if (SeenPreds.count(Pred) || !PhiMBB->isPredecessor(Pred)) continue; SeenPreds.insert(Pred); @@ -3347,10 +3387,13 @@ bool IRTranslator::runOnMachineFunction(MachineFunction &CurMF) { TM.resetTargetOptions(F); EnableOpts = OptLevel != CodeGenOpt::None && !skipFunction(F); FuncInfo.MF = MF; - if (EnableOpts) + if (EnableOpts) { + AA = &getAnalysis<AAResultsWrapperPass>().getAAResults(); FuncInfo.BPI = &getAnalysis<BranchProbabilityInfoWrapperPass>().getBPI(); - else + } else { + AA = nullptr; FuncInfo.BPI = nullptr; + } FuncInfo.CanLowerReturn = CLI->checkReturnTypeForCallConv(*MF); diff --git a/llvm/lib/CodeGen/GlobalISel/InlineAsmLowering.cpp b/llvm/lib/CodeGen/GlobalISel/InlineAsmLowering.cpp index 95ae8383b6fa..e0357c50e555 100644 --- a/llvm/lib/CodeGen/GlobalISel/InlineAsmLowering.cpp +++ b/llvm/lib/CodeGen/GlobalISel/InlineAsmLowering.cpp @@ -332,6 +332,8 @@ bool InlineAsmLowering::lowerInlineAsm( } ++ResNo; } else { + assert(OpInfo.Type != InlineAsm::isLabel && + "GlobalISel currently doesn't support callbr"); OpInfo.ConstraintVT = MVT::Other; } @@ -427,7 +429,8 @@ bool InlineAsmLowering::lowerInlineAsm( } break; - case InlineAsm::isInput: { + case InlineAsm::isInput: + case InlineAsm::isLabel: { if (OpInfo.isMatchingInputConstraint()) { unsigned DefIdx = OpInfo.getMatchedOperand(); // Find operand with register def that corresponds to DefIdx. diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp index fb046d519ac8..52ee13757f27 100644 --- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -2393,30 +2393,14 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) { return Legalized; } case TargetOpcode::G_FCONSTANT: { + // To avoid changing the bits of the constant due to extension to a larger + // type and then using G_FPTRUNC, we simply convert to a G_CONSTANT. MachineOperand &SrcMO = MI.getOperand(1); - LLVMContext &Ctx = MIRBuilder.getMF().getFunction().getContext(); - APFloat Val = SrcMO.getFPImm()->getValueAPF(); - bool LosesInfo; - switch (WideTy.getSizeInBits()) { - case 32: - Val.convert(APFloat::IEEEsingle(), APFloat::rmNearestTiesToEven, - &LosesInfo); - break; - case 64: - Val.convert(APFloat::IEEEdouble(), APFloat::rmNearestTiesToEven, - &LosesInfo); - break; - default: - return UnableToLegalize; - } - - assert(!LosesInfo && "extend should always be lossless"); - - Observer.changingInstr(MI); - SrcMO.setFPImm(ConstantFP::get(Ctx, Val)); - - widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC); - Observer.changedInstr(MI); + APInt Val = SrcMO.getFPImm()->getValueAPF().bitcastToAPInt(); + MIRBuilder.setInstrAndDebugLoc(MI); + auto IntCst = MIRBuilder.buildConstant(MI.getOperand(0).getReg(), Val); + widenScalarDst(*IntCst, WideTy, 0, TargetOpcode::G_TRUNC); + MI.eraseFromParent(); return Legalized; } case TargetOpcode::G_IMPLICIT_DEF: { diff --git a/llvm/lib/CodeGen/GlobalISel/LoadStoreOpt.cpp b/llvm/lib/CodeGen/GlobalISel/LoadStoreOpt.cpp index d4fbf7d15089..be1bc865d1e1 100644 --- a/llvm/lib/CodeGen/GlobalISel/LoadStoreOpt.cpp +++ b/llvm/lib/CodeGen/GlobalISel/LoadStoreOpt.cpp @@ -298,7 +298,7 @@ bool LoadStoreOpt::mergeStores(SmallVectorImpl<GStore *> &StoresToMerge) { const auto &LegalSizes = LegalStoreSizes[AS]; #ifndef NDEBUG - for (auto StoreMI : StoresToMerge) + for (auto *StoreMI : StoresToMerge) assert(MRI->getType(StoreMI->getValueReg()) == OrigTy); #endif @@ -366,7 +366,7 @@ bool LoadStoreOpt::doSingleStoreMerge(SmallVectorImpl<GStore *> &Stores) { // directly. Otherwise, we need to generate some instructions to merge the // existing values together into a wider type. SmallVector<APInt, 8> ConstantVals; - for (auto Store : Stores) { + for (auto *Store : Stores) { auto MaybeCst = getIConstantVRegValWithLookThrough(Store->getValueReg(), *MRI); if (!MaybeCst) { @@ -415,7 +415,7 @@ bool LoadStoreOpt::doSingleStoreMerge(SmallVectorImpl<GStore *> &Stores) { return R; }); - for (auto MI : Stores) + for (auto *MI : Stores) InstsToErase.insert(MI); return true; } diff --git a/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp b/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp index 0d9580e25606..2e22dae35e5a 100644 --- a/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp +++ b/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp @@ -646,7 +646,7 @@ MachineIRBuilder::buildBuildVectorConstant(const DstOp &Res, SmallVector<SrcOp> TmpVec; TmpVec.reserve(Ops.size()); LLT EltTy = Res.getLLTTy(*getMRI()).getElementType(); - for (auto &Op : Ops) + for (const auto &Op : Ops) TmpVec.push_back(buildConstant(EltTy, Op)); return buildInstr(TargetOpcode::G_BUILD_VECTOR, Res, TmpVec); } diff --git a/llvm/lib/CodeGen/GlobalISel/Utils.cpp b/llvm/lib/CodeGen/GlobalISel/Utils.cpp index 7781761bc131..013c8700e8ae 100644 --- a/llvm/lib/CodeGen/GlobalISel/Utils.cpp +++ b/llvm/lib/CodeGen/GlobalISel/Utils.cpp @@ -228,7 +228,7 @@ bool llvm::isTriviallyDead(const MachineInstr &MI, return false; // Instructions without side-effects are dead iff they only define dead vregs. - for (auto &MO : MI.operands()) { + for (const auto &MO : MI.operands()) { if (!MO.isReg() || !MO.isDef()) continue; diff --git a/llvm/lib/CodeGen/HardwareLoops.cpp b/llvm/lib/CodeGen/HardwareLoops.cpp index 67d6a3df7807..258ad1931b12 100644 --- a/llvm/lib/CodeGen/HardwareLoops.cpp +++ b/llvm/lib/CodeGen/HardwareLoops.cpp @@ -332,7 +332,7 @@ void HardwareLoop::Create() { // Run through the basic blocks of the loop and see if any of them have dead // PHIs that can be removed. - for (auto I : L->blocks()) + for (auto *I : L->blocks()) DeleteDeadPHIs(I); } @@ -407,13 +407,13 @@ Value *HardwareLoop::InitLoopCount() { BasicBlock *Predecessor = BB->getSinglePredecessor(); // If it's not safe to create a while loop then don't force it and create a // do-while loop instead - if (!isSafeToExpandAt(ExitCount, Predecessor->getTerminator(), SE)) + if (!SCEVE.isSafeToExpandAt(ExitCount, Predecessor->getTerminator())) UseLoopGuard = false; else BB = Predecessor; } - if (!isSafeToExpandAt(ExitCount, BB->getTerminator(), SE)) { + if (!SCEVE.isSafeToExpandAt(ExitCount, BB->getTerminator())) { LLVM_DEBUG(dbgs() << "- Bailing, unsafe to expand ExitCount " << *ExitCount << "\n"); return nullptr; diff --git a/llvm/lib/CodeGen/ImplicitNullChecks.cpp b/llvm/lib/CodeGen/ImplicitNullChecks.cpp index fc97938ccd3e..da6ec76bd770 100644 --- a/llvm/lib/CodeGen/ImplicitNullChecks.cpp +++ b/llvm/lib/CodeGen/ImplicitNullChecks.cpp @@ -758,7 +758,7 @@ void ImplicitNullChecks::rewriteNullChecks( ArrayRef<ImplicitNullChecks::NullCheck> NullCheckList) { DebugLoc DL; - for (auto &NC : NullCheckList) { + for (const auto &NC : NullCheckList) { // Remove the conditional branch dependent on the null check. unsigned BranchesRemoved = TII->removeBranch(*NC.getCheckBlock()); (void)BranchesRemoved; diff --git a/llvm/lib/CodeGen/InlineSpiller.cpp b/llvm/lib/CodeGen/InlineSpiller.cpp index 06c660807c5c..3ea1d6c7f1ef 100644 --- a/llvm/lib/CodeGen/InlineSpiller.cpp +++ b/llvm/lib/CodeGen/InlineSpiller.cpp @@ -86,7 +86,6 @@ class HoistSpillHelper : private LiveRangeEdit::Delegate { MachineFunction &MF; LiveIntervals &LIS; LiveStacks &LSS; - AliasAnalysis *AA; MachineDominatorTree &MDT; MachineLoopInfo &Loops; VirtRegMap &VRM; @@ -140,7 +139,6 @@ public: VirtRegMap &vrm) : MF(mf), LIS(pass.getAnalysis<LiveIntervals>()), LSS(pass.getAnalysis<LiveStacks>()), - AA(&pass.getAnalysis<AAResultsWrapperPass>().getAAResults()), MDT(pass.getAnalysis<MachineDominatorTree>()), Loops(pass.getAnalysis<MachineLoopInfo>()), VRM(vrm), MRI(mf.getRegInfo()), TII(*mf.getSubtarget().getInstrInfo()), @@ -159,7 +157,6 @@ class InlineSpiller : public Spiller { MachineFunction &MF; LiveIntervals &LIS; LiveStacks &LSS; - AliasAnalysis *AA; MachineDominatorTree &MDT; MachineLoopInfo &Loops; VirtRegMap &VRM; @@ -200,7 +197,6 @@ public: VirtRegAuxInfo &VRAI) : MF(MF), LIS(Pass.getAnalysis<LiveIntervals>()), LSS(Pass.getAnalysis<LiveStacks>()), - AA(&Pass.getAnalysis<AAResultsWrapperPass>().getAAResults()), MDT(Pass.getAnalysis<MachineDominatorTree>()), Loops(Pass.getAnalysis<MachineLoopInfo>()), VRM(VRM), MRI(MF.getRegInfo()), TII(*MF.getSubtarget().getInstrInfo()), @@ -659,7 +655,7 @@ bool InlineSpiller::reMaterializeFor(LiveInterval &VirtReg, MachineInstr &MI) { /// reMaterializeAll - Try to rematerialize as many uses as possible, /// and trim the live ranges after. void InlineSpiller::reMaterializeAll() { - if (!Edit->anyRematerializable(AA)) + if (!Edit->anyRematerializable()) return; UsedValues.clear(); @@ -702,7 +698,7 @@ void InlineSpiller::reMaterializeAll() { if (DeadDefs.empty()) return; LLVM_DEBUG(dbgs() << "Remat created " << DeadDefs.size() << " dead defs.\n"); - Edit->eliminateDeadDefs(DeadDefs, RegsToSpill, AA); + Edit->eliminateDeadDefs(DeadDefs, RegsToSpill); // LiveRangeEdit::eliminateDeadDef is used to remove dead define instructions // after rematerialization. To remove a VNI for a vreg from its LiveInterval, @@ -1180,7 +1176,7 @@ void InlineSpiller::spillAll() { // Hoisted spills may cause dead code. if (!DeadDefs.empty()) { LLVM_DEBUG(dbgs() << "Eliminating " << DeadDefs.size() << " dead defs\n"); - Edit->eliminateDeadDefs(DeadDefs, RegsToSpill, AA); + Edit->eliminateDeadDefs(DeadDefs, RegsToSpill); } // Finally delete the SnippetCopies. @@ -1298,7 +1294,7 @@ void HoistSpillHelper::rmRedundantSpills( // For each spill saw, check SpillBBToSpill[] and see if its BB already has // another spill inside. If a BB contains more than one spill, only keep the // earlier spill with smaller SlotIndex. - for (const auto CurrentSpill : Spills) { + for (auto *const CurrentSpill : Spills) { MachineBasicBlock *Block = CurrentSpill->getParent(); MachineDomTreeNode *Node = MDT.getBase().getNode(Block); MachineInstr *PrevSpill = SpillBBToSpill[Node]; @@ -1313,7 +1309,7 @@ void HoistSpillHelper::rmRedundantSpills( SpillBBToSpill[MDT.getBase().getNode(Block)] = CurrentSpill; } } - for (const auto SpillToRm : SpillsToRm) + for (auto *const SpillToRm : SpillsToRm) Spills.erase(SpillToRm); } @@ -1347,7 +1343,7 @@ void HoistSpillHelper::getVisitOrders( // the path starting from the first node with non-redundant spill to the Root // node will be added to the WorkSet, which will contain all the possible // locations where spills may be hoisted to after the loop below is done. - for (const auto Spill : Spills) { + for (auto *const Spill : Spills) { MachineBasicBlock *Block = Spill->getParent(); MachineDomTreeNode *Node = MDT[Block]; MachineInstr *SpillToRm = nullptr; @@ -1492,7 +1488,7 @@ void HoistSpillHelper::runHoistSpills( : BranchProbability(1, 1); if (SubTreeCost > MBFI.getBlockFreq(Block) * MarginProb) { // Hoist: Move spills to current Block. - for (const auto SpillBB : SpillsInSubTree) { + for (auto *const SpillBB : SpillsInSubTree) { // When SpillBB is a BB contains original spill, insert the spill // to SpillsToRm. if (SpillsToKeep.find(SpillBB) != SpillsToKeep.end() && @@ -1609,7 +1605,7 @@ void HoistSpillHelper::hoistAllSpills() { // Remove redundant spills or change them to dead instructions. NumSpills -= SpillsToRm.size(); - for (auto const RMEnt : SpillsToRm) { + for (auto *const RMEnt : SpillsToRm) { RMEnt->setDesc(TII.get(TargetOpcode::KILL)); for (unsigned i = RMEnt->getNumOperands(); i; --i) { MachineOperand &MO = RMEnt->getOperand(i - 1); @@ -1617,7 +1613,7 @@ void HoistSpillHelper::hoistAllSpills() { RMEnt->removeOperand(i - 1); } } - Edit.eliminateDeadDefs(SpillsToRm, None, AA); + Edit.eliminateDeadDefs(SpillsToRm, None); } } diff --git a/llvm/lib/CodeGen/InterleavedAccessPass.cpp b/llvm/lib/CodeGen/InterleavedAccessPass.cpp index 55f3ad796291..0582378be4cd 100644 --- a/llvm/lib/CodeGen/InterleavedAccessPass.cpp +++ b/llvm/lib/CodeGen/InterleavedAccessPass.cpp @@ -541,7 +541,7 @@ bool InterleavedAccess::runOnFunction(Function &F) { Changed |= lowerInterleavedStore(SI, DeadInsts); } - for (auto I : DeadInsts) + for (auto *I : DeadInsts) I->eraseFromParent(); return Changed; diff --git a/llvm/lib/CodeGen/InterleavedLoadCombinePass.cpp b/llvm/lib/CodeGen/InterleavedLoadCombinePass.cpp index 43858071025a..a0f304659bca 100644 --- a/llvm/lib/CodeGen/InterleavedLoadCombinePass.cpp +++ b/llvm/lib/CodeGen/InterleavedLoadCombinePass.cpp @@ -528,8 +528,8 @@ public: if (B.size() != o.B.size()) return false; - auto ob = o.B.begin(); - for (auto &b : B) { + auto *ob = o.B.begin(); + for (const auto &b : B) { if (b != *ob) return false; ob++; @@ -1154,7 +1154,7 @@ bool InterleavedLoadCombineImpl::combine(std::list<VectorInfo> &InterleavedLoad, // Test if all participating instruction will be dead after the // transformation. If intermediate results are used, no performance gain can // be expected. Also sum the cost of the Instructions beeing left dead. - for (auto &I : Is) { + for (const auto &I : Is) { // Compute the old cost InstructionCost += TTI.getInstructionCost(I, CostKind); @@ -1182,7 +1182,7 @@ bool InterleavedLoadCombineImpl::combine(std::list<VectorInfo> &InterleavedLoad, // that the corresponding defining access dominates first LI. This guarantees // that there are no aliasing stores in between the loads. auto FMA = MSSA.getMemoryAccess(First); - for (auto LI : LIs) { + for (auto *LI : LIs) { auto MADef = MSSA.getMemoryAccess(LI)->getDefiningAccess(); if (!MSSA.dominates(MADef, FMA)) return false; diff --git a/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp b/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp index 43c12c67939e..ef49d3888f2b 100644 --- a/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp +++ b/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp @@ -284,7 +284,7 @@ public: // Initialized the preferred-location map with illegal locations, to be // filled in later. - for (auto &VLoc : VLocs) + for (const auto &VLoc : VLocs) if (VLoc.second.Kind == DbgValue::Def) ValueToLoc.insert({VLoc.second.ID, LocIdx::MakeIllegalLoc()}); @@ -507,7 +507,7 @@ public: // date. Wipe old tracking data for the location if it's been clobbered in // the meantime. if (MTracker->readMLoc(NewLoc) != VarLocs[NewLoc.asU64()]) { - for (auto &P : ActiveMLocs[NewLoc]) { + for (const auto &P : ActiveMLocs[NewLoc]) { ActiveVLocs.erase(P); } ActiveMLocs[NewLoc.asU64()].clear(); @@ -560,7 +560,7 @@ public: // explicitly undef, then stop here. if (!NewLoc && !MakeUndef) { // Try and recover a few more locations with entry values. - for (auto &Var : ActiveMLocIt->second) { + for (const auto &Var : ActiveMLocIt->second) { auto &Prop = ActiveVLocs.find(Var)->second.Properties; recoverAsEntryValue(Var, Prop, OldValue); } @@ -570,7 +570,7 @@ public: // Examine all the variables based on this location. DenseSet<DebugVariable> NewMLocs; - for (auto &Var : ActiveMLocIt->second) { + for (const auto &Var : ActiveMLocIt->second) { auto ActiveVLocIt = ActiveVLocs.find(Var); // Re-state the variable location: if there's no replacement then NewLoc // is None and a $noreg DBG_VALUE will be created. Otherwise, a DBG_VALUE @@ -623,7 +623,7 @@ public: VarLocs[Dst.asU64()] = VarLocs[Src.asU64()]; // For each variable based on Src; create a location at Dst. - for (auto &Var : MovingVars) { + for (const auto &Var : MovingVars) { auto ActiveVLocIt = ActiveVLocs.find(Var); assert(ActiveVLocIt != ActiveVLocs.end()); ActiveVLocIt->second.Loc = Dst; @@ -1224,7 +1224,7 @@ bool InstrRefBasedLDV::transferDebugInstrRef(MachineInstr &MI, // FIXME: no index for this? Register Reg = MTracker->LocIdxToLocID[L]; const TargetRegisterClass *TRC = nullptr; - for (auto *TRCI : TRI->regclasses()) + for (const auto *TRCI : TRI->regclasses()) if (TRCI->contains(Reg)) TRC = TRCI; assert(TRC && "Couldn't find target register class?"); @@ -1454,7 +1454,7 @@ void InstrRefBasedLDV::transferRegisterDef(MachineInstr &MI) { for (uint32_t DeadReg : DeadRegs) MTracker->defReg(DeadReg, CurBB, CurInst); - for (auto *MO : RegMaskPtrs) + for (const auto *MO : RegMaskPtrs) MTracker->writeRegMask(MO, CurBB, CurInst); // If this instruction writes to a spill slot, def that slot. @@ -1493,7 +1493,7 @@ void InstrRefBasedLDV::transferRegisterDef(MachineInstr &MI) { if (IgnoreSPAlias(Reg)) continue; - for (auto *MO : RegMaskPtrs) + for (const auto *MO : RegMaskPtrs) if (MO->clobbersPhysReg(Reg)) TTracker->clobberMloc(L.Idx, MI.getIterator(), false); } @@ -1822,7 +1822,7 @@ void InstrRefBasedLDV::accumulateFragmentMap(MachineInstr &MI) { // Otherwise, examine all other seen fragments for this variable, with "this" // fragment being a previously unseen fragment. Record any pair of // overlapping fragments. - for (auto &ASeenFragment : AllSeenFragments) { + for (const auto &ASeenFragment : AllSeenFragments) { // Does this previously seen fragment overlap? if (DIExpression::fragmentsOverlap(ThisFragment, ASeenFragment)) { // Yes: Mark the current fragment as being overlapped. @@ -1993,7 +1993,7 @@ bool InstrRefBasedLDV::mlocJoin( // redundant PHI that we can eliminate. SmallVector<const MachineBasicBlock *, 8> BlockOrders; - for (auto Pred : MBB.predecessors()) + for (auto *Pred : MBB.predecessors()) BlockOrders.push_back(Pred); // Visit predecessors in RPOT order. @@ -2313,7 +2313,7 @@ void InstrRefBasedLDV::buildMLocValueMap( // All successors should be visited: put any back-edges on the pending // list for the next pass-through, and any other successors to be // visited this pass, if they're not going to be already. - for (auto s : MBB->successors()) { + for (auto *s : MBB->successors()) { // Does branching to this successor represent a back-edge? if (BBToOrder[s] > BBToOrder[MBB]) { // No: visit it during this dataflow iteration. @@ -2367,7 +2367,7 @@ Optional<ValueIDNum> InstrRefBasedLDV::pickVPHILoc( if (BlockOrders.empty()) return None; - for (auto p : BlockOrders) { + for (const auto *p : BlockOrders) { unsigned ThisBBNum = p->getNumber(); auto OutValIt = LiveOuts.find(p); if (OutValIt == LiveOuts.end()) @@ -2422,7 +2422,7 @@ Optional<ValueIDNum> InstrRefBasedLDV::pickVPHILoc( // Check that all properties are the same. We can't pick a location if they're // not. const DbgValueProperties *Properties0 = Properties[0]; - for (auto *Prop : Properties) + for (const auto *Prop : Properties) if (*Prop != *Properties0) return None; @@ -2472,7 +2472,7 @@ bool InstrRefBasedLDV::vlocJoin( SmallVector<InValueT, 8> Values; bool Bail = false; int BackEdgesStart = 0; - for (auto p : BlockOrders) { + for (auto *p : BlockOrders) { // If the predecessor isn't in scope / to be explored, we'll never be // able to join any locations. if (!BlocksToExplore.contains(p)) { @@ -2577,7 +2577,7 @@ void InstrRefBasedLDV::getBlocksForScope( // instructions in scope at all. To accurately replicate VarLoc // LiveDebugValues, this means exploring all artificial successors too. // Perform a depth-first-search to enumerate those blocks. - for (auto *MBB : BlocksToExplore) { + for (const auto *MBB : BlocksToExplore) { // Depth-first-search state: each node is a block and which successor // we're currently exploring. SmallVector<std::pair<const MachineBasicBlock *, @@ -2662,7 +2662,7 @@ void InstrRefBasedLDV::buildVLocValueMap( MutBlocksToExplore.insert(const_cast<MachineBasicBlock *>(MBB)); // Picks out relevants blocks RPO order and sort them. - for (auto *MBB : BlocksToExplore) + for (const auto *MBB : BlocksToExplore) BlockOrders.push_back(const_cast<MachineBasicBlock *>(MBB)); llvm::sort(BlockOrders, Cmp); @@ -2696,7 +2696,7 @@ void InstrRefBasedLDV::buildVLocValueMap( // between blocks. This keeps the locality of working on one lexical scope at // at time, but avoids re-processing variable values because some other // variable has been assigned. - for (auto &Var : VarsWeCareAbout) { + for (const auto &Var : VarsWeCareAbout) { // Re-initialize live-ins and live-outs, to clear the remains of previous // variables live-ins / live-outs. for (unsigned int I = 0; I < NumBlocks; ++I) { @@ -2823,7 +2823,7 @@ void InstrRefBasedLDV::buildVLocValueMap( // We should visit all successors. Ensure we'll visit any non-backedge // successors during this dataflow iteration; book backedge successors // to be visited next time around. - for (auto s : MBB->successors()) { + for (auto *s : MBB->successors()) { // Ignore out of scope / not-to-be-explored successors. if (LiveInIdx.find(s) == LiveInIdx.end()) continue; @@ -2906,7 +2906,7 @@ void InstrRefBasedLDV::placePHIsForSingleVarDefinition( #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) void InstrRefBasedLDV::dump_mloc_transfer( const MLocTransferMap &mloc_transfer) const { - for (auto &P : mloc_transfer) { + for (const auto &P : mloc_transfer) { std::string foo = MTracker->LocIdxToName(P.first); std::string bar = MTracker->IDAsString(P.second); dbgs() << "Loc " << foo << " --> " << bar << "\n"; @@ -2993,7 +2993,7 @@ void InstrRefBasedLDV::makeDepthFirstEjectionMap( if (DILocationIt != ScopeToDILocation.end()) { getBlocksForScope(DILocationIt->second, BlocksToExplore, ScopeToAssignBlocks.find(WS)->second); - for (auto *MBB : BlocksToExplore) { + for (const auto *MBB : BlocksToExplore) { unsigned BBNum = MBB->getNumber(); if (EjectionMap[BBNum] == 0) EjectionMap[BBNum] = WS->getDFSOut(); @@ -3100,7 +3100,7 @@ bool InstrRefBasedLDV::depthFirstVLocAndEmit( getBlocksForScope(DILocationIt->second, BlocksToExplore, ScopeToAssignBlocks.find(WS)->second); - for (auto *MBB : BlocksToExplore) + for (const auto *MBB : BlocksToExplore) if (WS->getDFSOut() == EjectionMap[MBB->getNumber()]) EjectBlock(const_cast<MachineBasicBlock &>(*MBB)); @@ -3709,10 +3709,9 @@ Optional<ValueIDNum> InstrRefBasedLDV::resolveDbgPHIsImpl( for (auto &PHI : CreatedPHIs) SortedPHIs.push_back(PHI); - std::sort( - SortedPHIs.begin(), SortedPHIs.end(), [&](LDVSSAPhi *A, LDVSSAPhi *B) { - return BBToOrder[&A->getParent()->BB] < BBToOrder[&B->getParent()->BB]; - }); + llvm::sort(SortedPHIs, [&](LDVSSAPhi *A, LDVSSAPhi *B) { + return BBToOrder[&A->getParent()->BB] < BBToOrder[&B->getParent()->BB]; + }); for (auto &PHI : SortedPHIs) { ValueIDNum ThisBlockValueNum = diff --git a/llvm/lib/CodeGen/LiveDebugValues/VarLocBasedImpl.cpp b/llvm/lib/CodeGen/LiveDebugValues/VarLocBasedImpl.cpp index 24c00b8a10ec..32e07eb77efe 100644 --- a/llvm/lib/CodeGen/LiveDebugValues/VarLocBasedImpl.cpp +++ b/llvm/lib/CodeGen/LiveDebugValues/VarLocBasedImpl.cpp @@ -1874,7 +1874,7 @@ void VarLocBasedLDV::accumulateFragmentMap(MachineInstr &MI, // Otherwise, examine all other seen fragments for this variable, with "this" // fragment being a previously unseen fragment. Record any pair of // overlapping fragments. - for (auto &ASeenFragment : AllSeenFragments) { + for (const auto &ASeenFragment : AllSeenFragments) { // Does this previously seen fragment overlap? if (DIExpression::fragmentsOverlap(ThisFragment, ASeenFragment)) { // Yes: Mark the current fragment as being overlapped. @@ -1922,7 +1922,7 @@ bool VarLocBasedLDV::join( // For all predecessors of this MBB, find the set of VarLocs that // can be joined. int NumVisited = 0; - for (auto p : MBB.predecessors()) { + for (auto *p : MBB.predecessors()) { // Ignore backedges if we have not visited the predecessor yet. As the // predecessor hasn't yet had locations propagated into it, most locations // will not yet be valid, so treat them as all being uninitialized and @@ -2246,7 +2246,7 @@ bool VarLocBasedLDV::ExtendRanges(MachineFunction &MF, if (OLChanged) { OLChanged = false; - for (auto s : MBB->successors()) + for (auto *s : MBB->successors()) if (OnPending.insert(s).second) { Pending.push(BBToOrder[s]); } diff --git a/llvm/lib/CodeGen/LiveDebugVariables.cpp b/llvm/lib/CodeGen/LiveDebugVariables.cpp index 35cf25330186..574c0f98161e 100644 --- a/llvm/lib/CodeGen/LiveDebugVariables.cpp +++ b/llvm/lib/CodeGen/LiveDebugVariables.cpp @@ -1891,7 +1891,7 @@ void LDVImpl::emitDebugValues(VirtRegMap *VRM) { // insert position, insert all instructions at the same SlotIdx. They are // guaranteed to appear in-sequence in StashedDebugInstrs because we insert // them in order. - for (auto StashIt = StashedDebugInstrs.begin(); + for (auto *StashIt = StashedDebugInstrs.begin(); StashIt != StashedDebugInstrs.end(); ++StashIt) { SlotIndex Idx = StashIt->Idx; MachineBasicBlock *MBB = StashIt->MBB; diff --git a/llvm/lib/CodeGen/LiveIntervals.cpp b/llvm/lib/CodeGen/LiveIntervals.cpp index 1242ce20b732..8a76048bb8c4 100644 --- a/llvm/lib/CodeGen/LiveIntervals.cpp +++ b/llvm/lib/CodeGen/LiveIntervals.cpp @@ -19,7 +19,6 @@ #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/iterator_range.h" -#include "llvm/Analysis/AliasAnalysis.h" #include "llvm/CodeGen/LiveInterval.h" #include "llvm/CodeGen/LiveIntervalCalc.h" #include "llvm/CodeGen/LiveVariables.h" @@ -60,9 +59,8 @@ using namespace llvm; char LiveIntervals::ID = 0; char &llvm::LiveIntervalsID = LiveIntervals::ID; -INITIALIZE_PASS_BEGIN(LiveIntervals, "liveintervals", - "Live Interval Analysis", false, false) -INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) +INITIALIZE_PASS_BEGIN(LiveIntervals, "liveintervals", "Live Interval Analysis", + false, false) INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) INITIALIZE_PASS_DEPENDENCY(SlotIndexes) INITIALIZE_PASS_END(LiveIntervals, "liveintervals", @@ -87,8 +85,6 @@ cl::opt<bool> UseSegmentSetForPhysRegs( void LiveIntervals::getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesCFG(); - AU.addRequired<AAResultsWrapperPass>(); - AU.addPreserved<AAResultsWrapperPass>(); AU.addPreserved<LiveVariables>(); AU.addPreservedID(MachineLoopInfoID); AU.addRequiredTransitiveID(MachineDominatorsID); @@ -126,7 +122,6 @@ bool LiveIntervals::runOnMachineFunction(MachineFunction &fn) { MRI = &MF->getRegInfo(); TRI = MF->getSubtarget().getRegisterInfo(); TII = MF->getSubtarget().getInstrInfo(); - AA = &getAnalysis<AAResultsWrapperPass>().getAAResults(); Indexes = &getAnalysis<SlotIndexes>(); DomTree = &getAnalysis<MachineDominatorTree>(); @@ -1417,7 +1412,7 @@ private: NewIdxDef.getRegSlot(), (NewIdxOut + 1)->end, OldIdxVNI); OldIdxVNI->def = NewIdxDef; // Modify subsequent segments to be defined by the moved def OldIdxVNI. - for (auto Idx = NewIdxOut + 2; Idx <= OldIdxOut; ++Idx) + for (auto *Idx = NewIdxOut + 2; Idx <= OldIdxOut; ++Idx) Idx->valno = OldIdxVNI; // Aggressively remove all dead flags from the former dead definition. // Kill/dead flags shouldn't be used while live intervals exist; they @@ -1662,7 +1657,7 @@ LiveIntervals::repairIntervalsInRange(MachineBasicBlock *MBB, ArrayRef<Register> OrigRegs) { // Find anchor points, which are at the beginning/end of blocks or at // instructions that already have indexes. - while (Begin != MBB->begin() && !Indexes->hasIndex(*Begin)) + while (Begin != MBB->begin() && !Indexes->hasIndex(*std::prev(Begin))) --Begin; while (End != MBB->end() && !Indexes->hasIndex(*End)) ++End; diff --git a/llvm/lib/CodeGen/LiveRangeEdit.cpp b/llvm/lib/CodeGen/LiveRangeEdit.cpp index 58eb4110f153..2aafb746aa2c 100644 --- a/llvm/lib/CodeGen/LiveRangeEdit.cpp +++ b/llvm/lib/CodeGen/LiveRangeEdit.cpp @@ -68,17 +68,16 @@ Register LiveRangeEdit::createFrom(Register OldReg) { } bool LiveRangeEdit::checkRematerializable(VNInfo *VNI, - const MachineInstr *DefMI, - AAResults *aa) { + const MachineInstr *DefMI) { assert(DefMI && "Missing instruction"); ScannedRemattable = true; - if (!TII.isTriviallyReMaterializable(*DefMI, aa)) + if (!TII.isTriviallyReMaterializable(*DefMI)) return false; Remattable.insert(VNI); return true; } -void LiveRangeEdit::scanRemattable(AAResults *aa) { +void LiveRangeEdit::scanRemattable() { for (VNInfo *VNI : getParent().valnos) { if (VNI->isUnused()) continue; @@ -90,14 +89,14 @@ void LiveRangeEdit::scanRemattable(AAResults *aa) { MachineInstr *DefMI = LIS.getInstructionFromIndex(OrigVNI->def); if (!DefMI) continue; - checkRematerializable(OrigVNI, DefMI, aa); + checkRematerializable(OrigVNI, DefMI); } ScannedRemattable = true; } -bool LiveRangeEdit::anyRematerializable(AAResults *aa) { +bool LiveRangeEdit::anyRematerializable() { if (!ScannedRemattable) - scanRemattable(aa); + scanRemattable(); return !Remattable.empty(); } @@ -274,8 +273,7 @@ bool LiveRangeEdit::useIsKill(const LiveInterval &LI, } /// Find all live intervals that need to shrink, then remove the instruction. -void LiveRangeEdit::eliminateDeadDef(MachineInstr *MI, ToShrinkSet &ToShrink, - AAResults *AA) { +void LiveRangeEdit::eliminateDeadDef(MachineInstr *MI, ToShrinkSet &ToShrink) { assert(MI->allDefsAreDead() && "Def isn't really dead"); SlotIndex Idx = LIS.getInstructionIndex(*MI).getRegSlot(); @@ -384,7 +382,7 @@ void LiveRangeEdit::eliminateDeadDef(MachineInstr *MI, ToShrinkSet &ToShrink, // register uses. That may provoke RA to split an interval at the KILL // and later result in an invalid live segment end. if (isOrigDef && DeadRemats && !HasLiveVRegUses && - TII.isTriviallyReMaterializable(*MI, AA)) { + TII.isTriviallyReMaterializable(*MI)) { LiveInterval &NewLI = createEmptyIntervalFrom(Dest, false); VNInfo *VNI = NewLI.getNextValue(Idx, LIS.getVNInfoAllocator()); NewLI.addSegment(LiveInterval::Segment(Idx, Idx.getDeadSlot(), VNI)); @@ -414,14 +412,13 @@ void LiveRangeEdit::eliminateDeadDef(MachineInstr *MI, ToShrinkSet &ToShrink, } void LiveRangeEdit::eliminateDeadDefs(SmallVectorImpl<MachineInstr *> &Dead, - ArrayRef<Register> RegsBeingSpilled, - AAResults *AA) { + ArrayRef<Register> RegsBeingSpilled) { ToShrinkSet ToShrink; for (;;) { // Erase all dead defs. while (!Dead.empty()) - eliminateDeadDef(Dead.pop_back_val(), ToShrink, AA); + eliminateDeadDef(Dead.pop_back_val(), ToShrink); if (ToShrink.empty()) break; diff --git a/llvm/lib/CodeGen/LiveVariables.cpp b/llvm/lib/CodeGen/LiveVariables.cpp index 94bdfab5e5e0..40250171fe1e 100644 --- a/llvm/lib/CodeGen/LiveVariables.cpp +++ b/llvm/lib/CodeGen/LiveVariables.cpp @@ -758,8 +758,7 @@ void LiveVariables::replaceKillInstruction(Register Reg, MachineInstr &OldMI, /// removeVirtualRegistersKilled - Remove all killed info for the specified /// instruction. void LiveVariables::removeVirtualRegistersKilled(MachineInstr &MI) { - for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { - MachineOperand &MO = MI.getOperand(i); + for (MachineOperand &MO : MI.operands()) { if (MO.isReg() && MO.isKill()) { MO.setIsKill(false); Register Reg = MO.getReg(); diff --git a/llvm/lib/CodeGen/LowerEmuTLS.cpp b/llvm/lib/CodeGen/LowerEmuTLS.cpp index 984dc452fbfd..a517ee3794ca 100644 --- a/llvm/lib/CodeGen/LowerEmuTLS.cpp +++ b/llvm/lib/CodeGen/LowerEmuTLS.cpp @@ -78,7 +78,7 @@ bool LowerEmuTLS::runOnModule(Module &M) { if (G.isThreadLocal()) TlsVars.append({&G}); } - for (const auto G : TlsVars) + for (const auto *const G : TlsVars) Changed |= addEmuTlsVar(M, G); return Changed; } diff --git a/llvm/lib/CodeGen/MIRCanonicalizerPass.cpp b/llvm/lib/CodeGen/MIRCanonicalizerPass.cpp index eea24d8e9353..3e7b4dbc9d71 100644 --- a/llvm/lib/CodeGen/MIRCanonicalizerPass.cpp +++ b/llvm/lib/CodeGen/MIRCanonicalizerPass.cpp @@ -129,7 +129,7 @@ static bool rescheduleCanonically(unsigned &PseudoIdempotentInstCount, // Calculates the distance of MI from the beginning of its parent BB. auto getInstrIdx = [](const MachineInstr &MI) { unsigned i = 0; - for (auto &CurMI : *MI.getParent()) { + for (const auto &CurMI : *MI.getParent()) { if (&CurMI == &MI) return i; i++; @@ -416,7 +416,7 @@ bool MIRCanonicalizer::runOnMachineFunction(MachineFunction &MF) { bool Changed = false; MachineRegisterInfo &MRI = MF.getRegInfo(); VRegRenamer Renamer(MRI); - for (auto MBB : RPOList) + for (auto *MBB : RPOList) Changed |= runOnBasicBlock(MBB, BBNum++, Renamer); return Changed; diff --git a/llvm/lib/CodeGen/MIRParser/MIParser.cpp b/llvm/lib/CodeGen/MIRParser/MIParser.cpp index 0c94e1f7e474..e3d6b59c5077 100644 --- a/llvm/lib/CodeGen/MIRParser/MIParser.cpp +++ b/llvm/lib/CodeGen/MIRParser/MIParser.cpp @@ -3383,7 +3383,7 @@ static void initSlots2BasicBlocks( DenseMap<unsigned, const BasicBlock *> &Slots2BasicBlocks) { ModuleSlotTracker MST(F.getParent(), /*ShouldInitializeAllMetadata=*/false); MST.incorporateFunction(F); - for (auto &BB : F) { + for (const auto &BB : F) { if (BB.hasName()) continue; int Slot = MST.getLocalSlot(&BB); diff --git a/llvm/lib/CodeGen/MIRParser/MIRParser.cpp b/llvm/lib/CodeGen/MIRParser/MIRParser.cpp index 4944cb46c5b5..aa9522bc3459 100644 --- a/llvm/lib/CodeGen/MIRParser/MIRParser.cpp +++ b/llvm/lib/CodeGen/MIRParser/MIRParser.cpp @@ -437,7 +437,7 @@ void MIRParserImpl::setupDebugValueTracking( MF.setDebugInstrNumberingCount(MaxInstrNum); // Load any substitutions. - for (auto &Sub : YamlMF.DebugValueSubstitutions) { + for (const auto &Sub : YamlMF.DebugValueSubstitutions) { MF.makeDebugValueSubstitution({Sub.SrcInst, Sub.SrcOp}, {Sub.DstInst, Sub.DstOp}, Sub.Subreg); } @@ -975,7 +975,7 @@ bool MIRParserImpl::parseMachineMetadata(PerFunctionMIParsingState &PFS, bool MIRParserImpl::parseMachineMetadataNodes( PerFunctionMIParsingState &PFS, MachineFunction &MF, const yaml::MachineFunction &YMF) { - for (auto &MDS : YMF.MachineMetadataNodes) { + for (const auto &MDS : YMF.MachineMetadataNodes) { if (parseMachineMetadata(PFS, MDS)) return true; } diff --git a/llvm/lib/CodeGen/MLRegallocEvictAdvisor.cpp b/llvm/lib/CodeGen/MLRegallocEvictAdvisor.cpp index 7daf9025d303..d21d552227cf 100644 --- a/llvm/lib/CodeGen/MLRegallocEvictAdvisor.cpp +++ b/llvm/lib/CodeGen/MLRegallocEvictAdvisor.cpp @@ -13,10 +13,9 @@ #include "AllocationOrder.h" #include "RegAllocEvictionAdvisor.h" #include "RegAllocGreedy.h" -#include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/MLModelRunner.h" #include "llvm/Analysis/TensorSpec.h" -#if defined(LLVM_HAVE_TF_AOT_REGALLOCEVICTMODEL) || defined(LLVM_HAVE_TF_API) +#if defined(LLVM_HAVE_TF_AOT_REGALLOCEVICTMODEL) || defined(LLVM_HAVE_TF_API) #include "llvm/Analysis/ModelUnderTrainingRunner.h" #include "llvm/Analysis/NoInferenceModelRunner.h" #endif @@ -91,7 +90,6 @@ public: AU.setPreservesAll(); AU.addRequired<RegAllocEvictionAdvisorAnalysis>(); AU.addRequired<MachineBlockFrequencyInfo>(); - AU.addRequired<AAResultsWrapperPass>(); MachineFunctionPass::getAnalysisUsage(AU); } @@ -891,9 +889,7 @@ bool RegAllocScoring::runOnMachineFunction(MachineFunction &MF) { &getAnalysis<RegAllocEvictionAdvisorAnalysis>())) if (auto *Log = DevModeAnalysis->getLogger(MF)) Log->logFloatFinalReward(static_cast<float>( - calculateRegAllocScore( - MF, getAnalysis<MachineBlockFrequencyInfo>(), - getAnalysis<AAResultsWrapperPass>().getAAResults()) + calculateRegAllocScore(MF, getAnalysis<MachineBlockFrequencyInfo>()) .getScore())); return false; diff --git a/llvm/lib/CodeGen/MachineBasicBlock.cpp b/llvm/lib/CodeGen/MachineBasicBlock.cpp index 02c44fa85cd9..7381c7e6b09c 100644 --- a/llvm/lib/CodeGen/MachineBasicBlock.cpp +++ b/llvm/lib/CodeGen/MachineBasicBlock.cpp @@ -1436,7 +1436,7 @@ MachineBasicBlock::getSuccProbability(const_succ_iterator Succ) const { // ditribute the complemental of the sum to each unknown probability. unsigned KnownProbNum = 0; auto Sum = BranchProbability::getZero(); - for (auto &P : Probs) { + for (const auto &P : Probs) { if (!P.isUnknown()) { Sum += P; KnownProbNum++; diff --git a/llvm/lib/CodeGen/MachineBlockPlacement.cpp b/llvm/lib/CodeGen/MachineBlockPlacement.cpp index 4cc84f22bdde..9ff5c37627b4 100644 --- a/llvm/lib/CodeGen/MachineBlockPlacement.cpp +++ b/llvm/lib/CodeGen/MachineBlockPlacement.cpp @@ -965,7 +965,7 @@ bool MachineBlockPlacement::isTrellis( for (MachineBasicBlock *Succ : ViableSuccs) { int PredCount = 0; - for (auto SuccPred : Succ->predecessors()) { + for (auto *SuccPred : Succ->predecessors()) { // Allow triangle successors, but don't count them. if (Successors.count(SuccPred)) { // Make sure that it is actually a triangle. @@ -1063,7 +1063,7 @@ MachineBlockPlacement::getBestTrellisSuccessor( // Collect the edge frequencies of all edges that form the trellis. SmallVector<WeightedEdge, 8> Edges[2]; int SuccIndex = 0; - for (auto Succ : ViableSuccs) { + for (auto *Succ : ViableSuccs) { for (MachineBasicBlock *SuccPred : Succ->predecessors()) { // Skip any placed predecessors that are not BB if (SuccPred != BB) @@ -2451,7 +2451,7 @@ void MachineBlockPlacement::rotateLoopWithProfile( // as the sum of frequencies of exit edges we collect here, excluding the exit // edge from the tail of the loop chain. SmallVector<std::pair<MachineBasicBlock *, BlockFrequency>, 4> ExitsWithFreq; - for (auto BB : LoopChain) { + for (auto *BB : LoopChain) { auto LargestExitEdgeProb = BranchProbability::getZero(); for (auto *Succ : BB->successors()) { BlockChain *SuccChain = BlockToChain[Succ]; @@ -2561,7 +2561,7 @@ MachineBlockPlacement::collectLoopBlockSet(const MachineLoop &L) { // profile data is available. if (F->getFunction().hasProfileData() || ForceLoopColdBlock) { BlockFrequency LoopFreq(0); - for (auto LoopPred : L.getHeader()->predecessors()) + for (auto *LoopPred : L.getHeader()->predecessors()) if (!L.contains(LoopPred)) LoopFreq += MBFI->getBlockFreq(LoopPred) * MBPI->getEdgeProbability(LoopPred, L.getHeader()); diff --git a/llvm/lib/CodeGen/MachineCSE.cpp b/llvm/lib/CodeGen/MachineCSE.cpp index e60fd9f7883a..c6756b1d3737 100644 --- a/llvm/lib/CodeGen/MachineCSE.cpp +++ b/llvm/lib/CodeGen/MachineCSE.cpp @@ -415,7 +415,7 @@ bool MachineCSE::isCSECandidate(MachineInstr *MI) { // Okay, this instruction does a load. As a refinement, we allow the target // to decide whether the loaded value is actually a constant. If so, we can // actually use it as a load. - if (!MI->isDereferenceableInvariantLoad(AA)) + if (!MI->isDereferenceableInvariantLoad()) // FIXME: we should be able to hoist loads with no other side effects if // there are no other instructions which can change memory in this loop. // This is a trivial form of alias analysis. diff --git a/llvm/lib/CodeGen/MachineCombiner.cpp b/llvm/lib/CodeGen/MachineCombiner.cpp index 722a709af240..57e2cd20bdd0 100644 --- a/llvm/lib/CodeGen/MachineCombiner.cpp +++ b/llvm/lib/CodeGen/MachineCombiner.cpp @@ -92,6 +92,7 @@ private: bool doSubstitute(unsigned NewSize, unsigned OldSize, bool OptForSize); bool combineInstructions(MachineBasicBlock *); MachineInstr *getOperandDef(const MachineOperand &MO); + bool isTransientMI(const MachineInstr *MI); unsigned getDepth(SmallVectorImpl<MachineInstr *> &InsInstrs, DenseMap<unsigned, unsigned> &InstrIdxForVirtReg, MachineTraceMetrics::Trace BlockTrace); @@ -158,6 +159,43 @@ MachineInstr *MachineCombiner::getOperandDef(const MachineOperand &MO) { return DefInstr; } +/// Return true if MI is unlikely to generate an actual target instruction. +bool MachineCombiner::isTransientMI(const MachineInstr *MI) { + if (!MI->isCopy()) + return MI->isTransient(); + + // If MI is a COPY, check if its src and dst registers can be coalesced. + Register Dst = MI->getOperand(0).getReg(); + Register Src = MI->getOperand(1).getReg(); + + if (!MI->isFullCopy()) { + // If src RC contains super registers of dst RC, it can also be coalesced. + if (MI->getOperand(0).getSubReg() || Src.isPhysical() || Dst.isPhysical()) + return false; + + auto SrcSub = MI->getOperand(1).getSubReg(); + auto SrcRC = MRI->getRegClass(Src); + auto DstRC = MRI->getRegClass(Dst); + return TRI->getMatchingSuperRegClass(SrcRC, DstRC, SrcSub) != nullptr; + } + + if (Src.isPhysical() && Dst.isPhysical()) + return Src == Dst; + + if (Src.isVirtual() && Dst.isVirtual()) { + auto SrcRC = MRI->getRegClass(Src); + auto DstRC = MRI->getRegClass(Dst); + return SrcRC->hasSuperClassEq(DstRC) || SrcRC->hasSubClassEq(DstRC); + } + + if (Src.isVirtual()) + std::swap(Src, Dst); + + // Now Src is physical register, Dst is virtual register. + auto DstRC = MRI->getRegClass(Dst); + return DstRC->contains(Src); +} + /// Computes depth of instructions in vector \InsInstr. /// /// \param InsInstrs is a vector of machine instructions @@ -204,9 +242,10 @@ MachineCombiner::getDepth(SmallVectorImpl<MachineInstr *> &InsInstrs, MachineInstr *DefInstr = getOperandDef(MO); if (DefInstr) { DepthOp = BlockTrace.getInstrCycles(*DefInstr).Depth; - LatencyOp = TSchedModel.computeOperandLatency( - DefInstr, DefInstr->findRegisterDefOperandIdx(MO.getReg()), - InstrPtr, InstrPtr->findRegisterUseOperandIdx(MO.getReg())); + if (!isTransientMI(DefInstr)) + LatencyOp = TSchedModel.computeOperandLatency( + DefInstr, DefInstr->findRegisterDefOperandIdx(MO.getReg()), + InstrPtr, InstrPtr->findRegisterUseOperandIdx(MO.getReg())); } } IDepth = std::max(IDepth, DepthOp + LatencyOp); @@ -305,7 +344,7 @@ std::pair<unsigned, unsigned> MachineCombiner::getLatenciesForInstrSequences( NewRootLatency += getLatency(&MI, NewRoot, BlockTrace); unsigned RootLatency = 0; - for (auto I : DelInstrs) + for (auto *I : DelInstrs) RootLatency += TSchedModel.computeInstrLatency(I); return {NewRootLatency, RootLatency}; @@ -488,7 +527,7 @@ static void insertDeleteInstructions(MachineBasicBlock *MBB, MachineInstr &MI, for (auto *InstrPtr : DelInstrs) { InstrPtr->eraseFromParent(); // Erase all LiveRegs defined by the removed instruction - for (auto I = RegUnits.begin(); I != RegUnits.end(); ) { + for (auto *I = RegUnits.begin(); I != RegUnits.end();) { if (I->MI == InstrPtr) I = RegUnits.erase(I); else diff --git a/llvm/lib/CodeGen/MachineFrameInfo.cpp b/llvm/lib/CodeGen/MachineFrameInfo.cpp index ca5936a14779..f0190812389f 100644 --- a/llvm/lib/CodeGen/MachineFrameInfo.cpp +++ b/llvm/lib/CodeGen/MachineFrameInfo.cpp @@ -127,7 +127,7 @@ BitVector MachineFrameInfo::getPristineRegs(const MachineFunction &MF) const { BV.set(*CSR); // Saved CSRs are not pristine. - for (auto &I : getCalleeSavedInfo()) + for (const auto &I : getCalleeSavedInfo()) for (MCSubRegIterator S(I.getReg(), TRI, true); S.isValid(); ++S) BV.reset(*S); diff --git a/llvm/lib/CodeGen/MachineFunction.cpp b/llvm/lib/CodeGen/MachineFunction.cpp index f58996ea90c6..6b481a374382 100644 --- a/llvm/lib/CodeGen/MachineFunction.cpp +++ b/llvm/lib/CodeGen/MachineFunction.cpp @@ -911,8 +911,8 @@ static const MachineInstr *getCallInstr(const MachineInstr *MI) { if (!MI->isBundle()) return MI; - for (auto &BMI : make_range(getBundleStart(MI->getIterator()), - getBundleEnd(MI->getIterator()))) + for (const auto &BMI : make_range(getBundleStart(MI->getIterator()), + getBundleEnd(MI->getIterator()))) if (BMI.isCandidateForCallSiteEntry()) return &BMI; diff --git a/llvm/lib/CodeGen/MachineFunctionSplitter.cpp b/llvm/lib/CodeGen/MachineFunctionSplitter.cpp index 867a7ed584b2..3e1aace855a5 100644 --- a/llvm/lib/CodeGen/MachineFunctionSplitter.cpp +++ b/llvm/lib/CodeGen/MachineFunctionSplitter.cpp @@ -146,7 +146,7 @@ bool MachineFunctionSplitter::runOnMachineFunction(MachineFunction &MF) { return X.getSectionID().Type < Y.getSectionID().Type; }; llvm::sortBasicBlocksAndUpdateBranches(MF, Comparator); - + llvm::avoidZeroOffsetLandingPad(MF); return true; } diff --git a/llvm/lib/CodeGen/MachineInstr.cpp b/llvm/lib/CodeGen/MachineInstr.cpp index 31f45e194a97..e92dec5bea48 100644 --- a/llvm/lib/CodeGen/MachineInstr.cpp +++ b/llvm/lib/CodeGen/MachineInstr.cpp @@ -1203,7 +1203,7 @@ bool MachineInstr::isSafeToMove(AAResults *AA, bool &SawStore) const { // destination. The check for isInvariantLoad gives the target the chance to // classify the load as always returning a constant, e.g. a constant pool // load. - if (mayLoad() && !isDereferenceableInvariantLoad(AA)) + if (mayLoad() && !isDereferenceableInvariantLoad()) // Otherwise, this is a real load. If there is a store between the load and // end of block, we can't move it. return !SawStore; @@ -1348,7 +1348,7 @@ bool MachineInstr::hasOrderedMemoryRef() const { /// isDereferenceableInvariantLoad - Return true if this instruction will never /// trap and is loading from a location whose value is invariant across a run of /// this function. -bool MachineInstr::isDereferenceableInvariantLoad(AAResults *AA) const { +bool MachineInstr::isDereferenceableInvariantLoad() const { // If the instruction doesn't load at all, it isn't an invariant load. if (!mayLoad()) return false; @@ -1374,12 +1374,6 @@ bool MachineInstr::isDereferenceableInvariantLoad(AAResults *AA) const { if (const PseudoSourceValue *PSV = MMO->getPseudoValue()) { if (PSV->isConstant(&MFI)) continue; - } else if (const Value *V = MMO->getValue()) { - // If we have an AliasAnalysis, ask it whether the memory is constant. - if (AA && - AA->pointsToConstantMemory( - MemoryLocation(V, MMO->getSize(), MMO->getAAInfo()))) - continue; } // Otherwise assume conservatively. @@ -2273,7 +2267,7 @@ using MMOList = SmallVector<const MachineMemOperand *, 2>; static unsigned getSpillSlotSize(const MMOList &Accesses, const MachineFrameInfo &MFI) { unsigned Size = 0; - for (auto A : Accesses) + for (const auto *A : Accesses) if (MFI.isSpillSlotObjectIndex( cast<FixedStackPseudoSourceValue>(A->getPseudoValue()) ->getFrameIndex())) diff --git a/llvm/lib/CodeGen/MachineLICM.cpp b/llvm/lib/CodeGen/MachineLICM.cpp index 00d75f8231c7..df7b6c782b91 100644 --- a/llvm/lib/CodeGen/MachineLICM.cpp +++ b/llvm/lib/CodeGen/MachineLICM.cpp @@ -230,8 +230,7 @@ namespace { bool IsGuaranteedToExecute(MachineBasicBlock *BB); - bool isTriviallyReMaterializable(const MachineInstr &MI, - AAResults *AA) const; + bool isTriviallyReMaterializable(const MachineInstr &MI) const; void EnterScope(MachineBasicBlock *MBB); @@ -666,9 +665,9 @@ bool MachineLICMBase::IsGuaranteedToExecute(MachineBasicBlock *BB) { /// virtual register uses. Even though rematerializable RA might not actually /// rematerialize it in this scenario. In that case we do not want to hoist such /// instruction out of the loop in a belief RA will sink it back if needed. -bool MachineLICMBase::isTriviallyReMaterializable(const MachineInstr &MI, - AAResults *AA) const { - if (!TII->isTriviallyReMaterializable(MI, AA)) +bool MachineLICMBase::isTriviallyReMaterializable( + const MachineInstr &MI) const { + if (!TII->isTriviallyReMaterializable(MI)) return false; for (const MachineOperand &MO : MI.operands()) { @@ -1174,7 +1173,7 @@ bool MachineLICMBase::IsProfitableToHoist(MachineInstr &MI) { // Rematerializable instructions should always be hoisted providing the // register allocator can just pull them down again when needed. - if (isTriviallyReMaterializable(MI, AA)) + if (isTriviallyReMaterializable(MI)) return true; // FIXME: If there are long latency loop-invariant instructions inside the @@ -1227,8 +1226,8 @@ bool MachineLICMBase::IsProfitableToHoist(MachineInstr &MI) { // High register pressure situation, only hoist if the instruction is going // to be remat'ed. - if (!isTriviallyReMaterializable(MI, AA) && - !MI.isDereferenceableInvariantLoad(AA)) { + if (!isTriviallyReMaterializable(MI) && + !MI.isDereferenceableInvariantLoad()) { LLVM_DEBUG(dbgs() << "Can't remat / high reg-pressure: " << MI); return false; } @@ -1247,7 +1246,7 @@ MachineInstr *MachineLICMBase::ExtractHoistableLoad(MachineInstr *MI) { // If not, we may be able to unfold a load and hoist that. // First test whether the instruction is loading from an amenable // memory location. - if (!MI->isDereferenceableInvariantLoad(AA)) + if (!MI->isDereferenceableInvariantLoad()) return nullptr; // Next determine the register class for a temporary register. diff --git a/llvm/lib/CodeGen/MachinePipeliner.cpp b/llvm/lib/CodeGen/MachinePipeliner.cpp index 8d500398f55e..52501ca7c871 100644 --- a/llvm/lib/CodeGen/MachinePipeliner.cpp +++ b/llvm/lib/CodeGen/MachinePipeliner.cpp @@ -219,7 +219,7 @@ bool MachinePipeliner::runOnMachineFunction(MachineFunction &mf) { TII = MF->getSubtarget().getInstrInfo(); RegClassInfo.runOnMachineFunction(*MF); - for (auto &L : *MLI) + for (const auto &L : *MLI) scheduleLoop(*L); return false; @@ -231,7 +231,7 @@ bool MachinePipeliner::runOnMachineFunction(MachineFunction &mf) { /// the loop. bool MachinePipeliner::scheduleLoop(MachineLoop &L) { bool Changed = false; - for (auto &InnerLoop : L) + for (const auto &InnerLoop : L) Changed |= scheduleLoop(*InnerLoop); #ifndef NDEBUG @@ -689,7 +689,7 @@ static bool isSuccOrder(SUnit *SUa, SUnit *SUb) { Worklist.push_back(SUa); while (!Worklist.empty()) { const SUnit *SU = Worklist.pop_back_val(); - for (auto &SI : SU->Succs) { + for (const auto &SI : SU->Succs) { SUnit *SuccSU = SI.getSUnit(); if (SI.getKind() == SDep::Order) { if (Visited.count(SuccSU)) @@ -706,11 +706,11 @@ static bool isSuccOrder(SUnit *SUa, SUnit *SUb) { /// Return true if the instruction causes a chain between memory /// references before and after it. -static bool isDependenceBarrier(MachineInstr &MI, AliasAnalysis *AA) { +static bool isDependenceBarrier(MachineInstr &MI) { return MI.isCall() || MI.mayRaiseFPException() || MI.hasUnmodeledSideEffects() || (MI.hasOrderedMemoryRef() && - (!MI.mayLoad() || !MI.isDereferenceableInvariantLoad(AA))); + (!MI.mayLoad() || !MI.isDereferenceableInvariantLoad())); } /// Return the underlying objects for the memory references of an instruction. @@ -743,14 +743,14 @@ void SwingSchedulerDAG::addLoopCarriedDependences(AliasAnalysis *AA) { UndefValue::get(Type::getVoidTy(MF.getFunction().getContext())); for (auto &SU : SUnits) { MachineInstr &MI = *SU.getInstr(); - if (isDependenceBarrier(MI, AA)) + if (isDependenceBarrier(MI)) PendingLoads.clear(); else if (MI.mayLoad()) { SmallVector<const Value *, 4> Objs; ::getUnderlyingObjects(&MI, Objs); if (Objs.empty()) Objs.push_back(UnknownValue); - for (auto V : Objs) { + for (const auto *V : Objs) { SmallVector<SUnit *, 4> &SUs = PendingLoads[V]; SUs.push_back(&SU); } @@ -759,12 +759,12 @@ void SwingSchedulerDAG::addLoopCarriedDependences(AliasAnalysis *AA) { ::getUnderlyingObjects(&MI, Objs); if (Objs.empty()) Objs.push_back(UnknownValue); - for (auto V : Objs) { + for (const auto *V : Objs) { MapVector<const Value *, SmallVector<SUnit *, 4>>::iterator I = PendingLoads.find(V); if (I == PendingLoads.end()) continue; - for (auto Load : I->second) { + for (auto *Load : I->second) { if (isSuccOrder(Load, &SU)) continue; MachineInstr &LdMI = *Load->getInstr(); @@ -1407,8 +1407,8 @@ void SwingSchedulerDAG::CopyToPhiMutation::apply(ScheduleDAGInstrs *DAG) { SwingSchedulerDAG *SDAG = cast<SwingSchedulerDAG>(DAG); // Add the artificial dependencies if it does not form a cycle. - for (auto I : UseSUs) { - for (auto Src : SrcSUs) { + for (auto *I : UseSUs) { + for (auto *Src : SrcSUs) { if (!SDAG->Topo.IsReachable(I, Src) && Src != I) { Src->addPred(SDep(I, SDep::Artificial)); SDAG->Topo.AddPred(Src, I); @@ -1878,7 +1878,7 @@ void SwingSchedulerDAG::computeNodeOrder(NodeSetType &NodeSets) { Order = TopDown; LLVM_DEBUG(dbgs() << " Top down (intersect) "); } else if (NodeSets.size() == 1) { - for (auto &N : Nodes) + for (const auto &N : Nodes) if (N->Succs.size() == 0) R.insert(N); Order = BottomUp; diff --git a/llvm/lib/CodeGen/MachineScheduler.cpp b/llvm/lib/CodeGen/MachineScheduler.cpp index 5f80445a5a34..96131dc2983e 100644 --- a/llvm/lib/CodeGen/MachineScheduler.cpp +++ b/llvm/lib/CodeGen/MachineScheduler.cpp @@ -1698,7 +1698,7 @@ void BaseMemOpClusterMutation::collectMemOpRecords( << ", Width: " << Width << "\n"); } #ifndef NDEBUG - for (auto *Op : BaseOps) + for (const auto *Op : BaseOps) assert(Op); #endif } diff --git a/llvm/lib/CodeGen/MachineSink.cpp b/llvm/lib/CodeGen/MachineSink.cpp index 006ba9273dfb..0568bc6a4600 100644 --- a/llvm/lib/CodeGen/MachineSink.cpp +++ b/llvm/lib/CodeGen/MachineSink.cpp @@ -446,7 +446,7 @@ bool MachineSinking::runOnMachineFunction(MachineFunction &MF) { MadeChange |= ProcessBlock(MBB); // If we have anything we marked as toSplit, split it now. - for (auto &Pair : ToSplit) { + for (const auto &Pair : ToSplit) { auto NewSucc = Pair.first->SplitCriticalEdge(Pair.second, *this); if (NewSucc != nullptr) { LLVM_DEBUG(dbgs() << " *** Splitting critical edge: " diff --git a/llvm/lib/CodeGen/MachineStableHash.cpp b/llvm/lib/CodeGen/MachineStableHash.cpp index a85dbf1de1ee..b546a5082b07 100644 --- a/llvm/lib/CodeGen/MachineStableHash.cpp +++ b/llvm/lib/CodeGen/MachineStableHash.cpp @@ -200,7 +200,7 @@ stable_hash llvm::stableHashValue(const MachineInstr &MI, bool HashVRegs, stable_hash llvm::stableHashValue(const MachineBasicBlock &MBB) { SmallVector<stable_hash> HashComponents; // TODO: Hash more stuff like block alignment and branch probabilities. - for (auto &MI : MBB) + for (const auto &MI : MBB) HashComponents.push_back(stableHashValue(MI)); return stable_hash_combine_range(HashComponents.begin(), HashComponents.end()); @@ -209,7 +209,7 @@ stable_hash llvm::stableHashValue(const MachineBasicBlock &MBB) { stable_hash llvm::stableHashValue(const MachineFunction &MF) { SmallVector<stable_hash> HashComponents; // TODO: Hash lots more stuff like function alignment and stack objects. - for (auto &MBB : MF) + for (const auto &MBB : MF) HashComponents.push_back(stableHashValue(MBB)); return stable_hash_combine_range(HashComponents.begin(), HashComponents.end()); diff --git a/llvm/lib/CodeGen/MachineTraceMetrics.cpp b/llvm/lib/CodeGen/MachineTraceMetrics.cpp index 0a5ff276fedc..715e5da26989 100644 --- a/llvm/lib/CodeGen/MachineTraceMetrics.cpp +++ b/llvm/lib/CodeGen/MachineTraceMetrics.cpp @@ -484,7 +484,7 @@ void MachineTraceMetrics::Ensemble::computeTrace(const MachineBasicBlock *MBB) { // Run an upwards post-order search for the trace start. Bounds.Downward = false; Bounds.Visited.clear(); - for (auto I : inverse_post_order_ext(MBB, Bounds)) { + for (const auto *I : inverse_post_order_ext(MBB, Bounds)) { LLVM_DEBUG(dbgs() << " pred for " << printMBBReference(*I) << ": "); TraceBlockInfo &TBI = BlockInfo[I->getNumber()]; // All the predecessors have been visited, pick the preferred one. @@ -502,7 +502,7 @@ void MachineTraceMetrics::Ensemble::computeTrace(const MachineBasicBlock *MBB) { // Run a downwards post-order search for the trace end. Bounds.Downward = true; Bounds.Visited.clear(); - for (auto I : post_order_ext(MBB, Bounds)) { + for (const auto *I : post_order_ext(MBB, Bounds)) { LLVM_DEBUG(dbgs() << " succ for " << printMBBReference(*I) << ": "); TraceBlockInfo &TBI = BlockInfo[I->getNumber()]; // All the successors have been visited, pick the preferred one. diff --git a/llvm/lib/CodeGen/MachineVerifier.cpp b/llvm/lib/CodeGen/MachineVerifier.cpp index 7a008bae726e..93e68918b632 100644 --- a/llvm/lib/CodeGen/MachineVerifier.cpp +++ b/llvm/lib/CodeGen/MachineVerifier.cpp @@ -2802,8 +2802,8 @@ void MachineVerifier::visitMachineFunctionAfter() { // tracking numbers. if (MF->getFunction().getSubprogram()) { DenseSet<unsigned> SeenNumbers; - for (auto &MBB : *MF) { - for (auto &MI : MBB) { + for (const auto &MBB : *MF) { + for (const auto &MI : MBB) { if (auto Num = MI.peekDebugInstrNum()) { auto Result = SeenNumbers.insert((unsigned)Num); if (!Result.second) diff --git a/llvm/lib/CodeGen/RDFGraph.cpp b/llvm/lib/CodeGen/RDFGraph.cpp index ec383b9b1c65..51de99b81057 100644 --- a/llvm/lib/CodeGen/RDFGraph.cpp +++ b/llvm/lib/CodeGen/RDFGraph.cpp @@ -1395,7 +1395,7 @@ void DataFlowGraph::recordDefsForDF(BlockRefsMap &PhiM, // Finally, add the set of defs to each block in the iterated dominance // frontier. - for (auto DB : IDF) { + for (auto *DB : IDF) { NodeAddr<BlockNode*> DBA = findBlock(DB); PhiM[DBA.Id].insert(Defs.begin(), Defs.end()); } @@ -1657,7 +1657,7 @@ void DataFlowGraph::linkBlockRefs(DefStackMap &DefM, NodeAddr<BlockNode*> BA) { // Recursively process all children in the dominator tree. MachineDomTreeNode *N = MDT.getNode(BA.Addr->getCode()); - for (auto I : *N) { + for (auto *I : *N) { MachineBasicBlock *SB = I->getBlock(); NodeAddr<BlockNode*> SBA = findBlock(SB); linkBlockRefs(DefM, SBA); diff --git a/llvm/lib/CodeGen/RDFLiveness.cpp b/llvm/lib/CodeGen/RDFLiveness.cpp index 2fd947086b4d..d8eac20d16b6 100644 --- a/llvm/lib/CodeGen/RDFLiveness.cpp +++ b/llvm/lib/CodeGen/RDFLiveness.cpp @@ -61,7 +61,7 @@ namespace rdf { raw_ostream &operator<< (raw_ostream &OS, const Print<Liveness::RefMap> &P) { OS << '{'; - for (auto &I : P.Obj) { + for (const auto &I : P.Obj) { OS << ' ' << printReg(I.first, &P.G.getTRI()) << '{'; for (auto J = I.second.begin(), E = I.second.end(); J != E; ) { OS << Print<NodeId>(J->first, P.G) << PrintLaneMaskOpt(J->second); @@ -767,7 +767,7 @@ void Liveness::computeLiveIns() { } for (auto I : IDF) - for (auto S : I.second) + for (auto *S : I.second) IIDF[S].insert(I.first); computePhiInfo(); @@ -926,7 +926,7 @@ void Liveness::resetKills(MachineBasicBlock *B) { BitVector LiveIn(TRI.getNumRegs()), Live(TRI.getNumRegs()); CopyLiveIns(B, LiveIn); - for (auto SI : B->successors()) + for (auto *SI : B->successors()) CopyLiveIns(SI, Live); for (MachineInstr &MI : llvm::reverse(*B)) { @@ -1003,7 +1003,7 @@ void Liveness::traverse(MachineBasicBlock *B, RefMap &LiveIn) { // Go up the dominator tree (depth-first). MachineDomTreeNode *N = MDT.getNode(B); - for (auto I : *N) { + for (auto *I : *N) { RefMap L; MachineBasicBlock *SB = I->getBlock(); traverse(SB, L); @@ -1015,7 +1015,7 @@ void Liveness::traverse(MachineBasicBlock *B, RefMap &LiveIn) { if (Trace) { dbgs() << "\n-- " << printMBBReference(*B) << ": " << __func__ << " after recursion into: {"; - for (auto I : *N) + for (auto *I : *N) dbgs() << ' ' << I->getBlock()->getNumber(); dbgs() << " }\n"; dbgs() << " LiveIn: " << Print<RefMap>(LiveIn, DFG) << '\n'; @@ -1155,7 +1155,7 @@ void Liveness::traverse(MachineBasicBlock *B, RefMap &LiveIn) { dbgs() << " Local: " << Print<RegisterAggr>(Local, DFG) << '\n'; } - for (auto C : IIDF[B]) { + for (auto *C : IIDF[B]) { RegisterAggr &LiveC = LiveMap[C]; for (const std::pair<const RegisterId, NodeRefSet> &S : LiveIn) for (auto R : S.second) diff --git a/llvm/lib/CodeGen/ReachingDefAnalysis.cpp b/llvm/lib/CodeGen/ReachingDefAnalysis.cpp index 69db8bad54f9..d9ced9191fae 100644 --- a/llvm/lib/CodeGen/ReachingDefAnalysis.cpp +++ b/llvm/lib/CodeGen/ReachingDefAnalysis.cpp @@ -635,7 +635,7 @@ ReachingDefAnalysis::isSafeToRemove(MachineInstr *MI, InstSet &Visited, SmallPtrSet<MachineInstr*, 4> Uses; getGlobalUses(MI, MO.getReg(), Uses); - for (auto I : Uses) { + for (auto *I : Uses) { if (Ignore.count(I) || ToRemove.count(I)) continue; if (!isSafeToRemove(I, Visited, ToRemove, Ignore)) diff --git a/llvm/lib/CodeGen/RegAllocBase.cpp b/llvm/lib/CodeGen/RegAllocBase.cpp index 0c18814189eb..990dd84c829d 100644 --- a/llvm/lib/CodeGen/RegAllocBase.cpp +++ b/llvm/lib/CodeGen/RegAllocBase.cpp @@ -166,7 +166,7 @@ void RegAllocBase::allocatePhysRegs() { void RegAllocBase::postOptimization() { spiller().postOptimization(); - for (auto DeadInst : DeadRemats) { + for (auto *DeadInst : DeadRemats) { LIS->RemoveMachineInstrFromMaps(*DeadInst); DeadInst->eraseFromParent(); } diff --git a/llvm/lib/CodeGen/RegAllocBasic.cpp b/llvm/lib/CodeGen/RegAllocBasic.cpp index 7defdf04aec8..91795f3d27fe 100644 --- a/llvm/lib/CodeGen/RegAllocBasic.cpp +++ b/llvm/lib/CodeGen/RegAllocBasic.cpp @@ -135,6 +135,7 @@ INITIALIZE_PASS_DEPENDENCY(LiveIntervals) INITIALIZE_PASS_DEPENDENCY(RegisterCoalescer) INITIALIZE_PASS_DEPENDENCY(MachineScheduler) INITIALIZE_PASS_DEPENDENCY(LiveStacks) +INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) INITIALIZE_PASS_DEPENDENCY(VirtRegMap) diff --git a/llvm/lib/CodeGen/RegAllocFast.cpp b/llvm/lib/CodeGen/RegAllocFast.cpp index 72ceaa768803..9e4e26f1392e 100644 --- a/llvm/lib/CodeGen/RegAllocFast.cpp +++ b/llvm/lib/CodeGen/RegAllocFast.cpp @@ -1478,7 +1478,7 @@ void RegAllocFast::allocateBasicBlock(MachineBasicBlock &MBB) { RegUnitStates.assign(TRI->getNumRegUnits(), regFree); assert(LiveVirtRegs.empty() && "Mapping not cleared from last block?"); - for (auto &LiveReg : MBB.liveouts()) + for (const auto &LiveReg : MBB.liveouts()) setPhysRegState(LiveReg.PhysReg, regPreAssigned); Coalesced.clear(); @@ -1580,8 +1580,7 @@ FunctionPass *llvm::createFastRegisterAllocator() { return new RegAllocFast(); } -FunctionPass *llvm::createFastRegisterAllocator( - std::function<bool(const TargetRegisterInfo &TRI, - const TargetRegisterClass &RC)> Ftor, bool ClearVirtRegs) { +FunctionPass *llvm::createFastRegisterAllocator(RegClassFilterFunc Ftor, + bool ClearVirtRegs) { return new RegAllocFast(Ftor, ClearVirtRegs); } diff --git a/llvm/lib/CodeGen/RegAllocGreedy.cpp b/llvm/lib/CodeGen/RegAllocGreedy.cpp index 2efb98ae200d..4a54d7ebf8a9 100644 --- a/llvm/lib/CodeGen/RegAllocGreedy.cpp +++ b/llvm/lib/CodeGen/RegAllocGreedy.cpp @@ -180,16 +180,7 @@ FunctionPass* llvm::createGreedyRegisterAllocator() { return new RAGreedy(); } -namespace llvm { -FunctionPass* createGreedyRegisterAllocator( - std::function<bool(const TargetRegisterInfo &TRI, - const TargetRegisterClass &RC)> Ftor); - -} - -FunctionPass* llvm::createGreedyRegisterAllocator( - std::function<bool(const TargetRegisterInfo &TRI, - const TargetRegisterClass &RC)> Ftor) { +FunctionPass *llvm::createGreedyRegisterAllocator(RegClassFilterFunc Ftor) { return new RAGreedy(Ftor); } @@ -202,8 +193,6 @@ void RAGreedy::getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesCFG(); AU.addRequired<MachineBlockFrequencyInfo>(); AU.addPreserved<MachineBlockFrequencyInfo>(); - AU.addRequired<AAResultsWrapperPass>(); - AU.addPreserved<AAResultsWrapperPass>(); AU.addRequired<LiveIntervals>(); AU.addPreserved<LiveIntervals>(); AU.addRequired<SlotIndexes>(); @@ -2530,7 +2519,6 @@ bool RAGreedy::runOnMachineFunction(MachineFunction &mf) { Bundles = &getAnalysis<EdgeBundles>(); SpillPlacer = &getAnalysis<SpillPlacement>(); DebugVars = &getAnalysis<LiveDebugVariables>(); - AA = &getAnalysis<AAResultsWrapperPass>().getAAResults(); initializeCSRCost(); @@ -2552,7 +2540,7 @@ bool RAGreedy::runOnMachineFunction(MachineFunction &mf) { LLVM_DEBUG(LIS->dump()); SA.reset(new SplitAnalysis(*VRM, *LIS, *Loops)); - SE.reset(new SplitEditor(*SA, *AA, *LIS, *VRM, *DomTree, *MBFI, *VRAI)); + SE.reset(new SplitEditor(*SA, *LIS, *VRM, *DomTree, *MBFI, *VRAI)); IntfCache.init(MF, Matrix->getLiveUnions(), Indexes, LIS, TRI); GlobalCand.resize(32); // This will grow as needed. diff --git a/llvm/lib/CodeGen/RegAllocGreedy.h b/llvm/lib/CodeGen/RegAllocGreedy.h index 358e74541a54..316b12d0213b 100644 --- a/llvm/lib/CodeGen/RegAllocGreedy.h +++ b/llvm/lib/CodeGen/RegAllocGreedy.h @@ -25,7 +25,6 @@ #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" -#include "llvm/Analysis/AliasAnalysis.h" #include "llvm/CodeGen/CalcSpillWeights.h" #include "llvm/CodeGen/LiveInterval.h" #include "llvm/CodeGen/LiveRangeEdit.h" @@ -54,7 +53,6 @@ class MachineLoop; class MachineLoopInfo; class MachineOptimizationRemarkEmitter; class MachineOptimizationRemarkMissed; -class SlotIndex; class SlotIndexes; class TargetInstrInfo; class VirtRegMap; @@ -174,7 +172,6 @@ private: EdgeBundles *Bundles; SpillPlacement *SpillPlacer; LiveDebugVariables *DebugVars; - AliasAnalysis *AA; // state std::unique_ptr<Spiller> SpillerInstance; diff --git a/llvm/lib/CodeGen/RegAllocPBQP.cpp b/llvm/lib/CodeGen/RegAllocPBQP.cpp index 8c262130fb70..b3d926eeb552 100644 --- a/llvm/lib/CodeGen/RegAllocPBQP.cpp +++ b/llvm/lib/CodeGen/RegAllocPBQP.cpp @@ -783,7 +783,7 @@ void RegAllocPBQP::finalizeAlloc(MachineFunction &MF, void RegAllocPBQP::postOptimization(Spiller &VRegSpiller, LiveIntervals &LIS) { VRegSpiller.postOptimization(); /// Remove dead defs because of rematerialization. - for (auto DeadInst : DeadRemats) { + for (auto *DeadInst : DeadRemats) { LIS.RemoveMachineInstrFromMaps(*DeadInst); DeadInst->eraseFromParent(); } diff --git a/llvm/lib/CodeGen/RegAllocScore.cpp b/llvm/lib/CodeGen/RegAllocScore.cpp index 32fa5e07dd16..17e3eeef664b 100644 --- a/llvm/lib/CodeGen/RegAllocScore.cpp +++ b/llvm/lib/CodeGen/RegAllocScore.cpp @@ -74,8 +74,7 @@ double RegAllocScore::getScore() const { RegAllocScore llvm::calculateRegAllocScore(const MachineFunction &MF, - const MachineBlockFrequencyInfo &MBFI, - AAResults &AAResults) { + const MachineBlockFrequencyInfo &MBFI) { return calculateRegAllocScore( MF, [&](const MachineBasicBlock &MBB) { @@ -83,7 +82,7 @@ llvm::calculateRegAllocScore(const MachineFunction &MF, }, [&](const MachineInstr &MI) { return MF.getSubtarget().getInstrInfo()->isTriviallyReMaterializable( - MI, &AAResults); + MI); }); } diff --git a/llvm/lib/CodeGen/RegAllocScore.h b/llvm/lib/CodeGen/RegAllocScore.h index 2bcd0b5895bf..b80adae29f23 100644 --- a/llvm/lib/CodeGen/RegAllocScore.h +++ b/llvm/lib/CodeGen/RegAllocScore.h @@ -19,7 +19,6 @@ namespace llvm { -class AAResults; class MachineBasicBlock; class MachineBlockFrequencyInfo; class MachineFunction; @@ -62,8 +61,7 @@ public: /// different policies, the better policy would have a smaller score. /// The implementation is the overload below (which is also easily unittestable) RegAllocScore calculateRegAllocScore(const MachineFunction &MF, - const MachineBlockFrequencyInfo &MBFI, - AAResults &AAResults); + const MachineBlockFrequencyInfo &MBFI); /// Implementation of the above, which is also more easily unittestable. RegAllocScore calculateRegAllocScore( diff --git a/llvm/lib/CodeGen/RegisterCoalescer.cpp b/llvm/lib/CodeGen/RegisterCoalescer.cpp index 930d05324440..8a6f823c8a0c 100644 --- a/llvm/lib/CodeGen/RegisterCoalescer.cpp +++ b/llvm/lib/CodeGen/RegisterCoalescer.cpp @@ -1148,7 +1148,7 @@ bool RegisterCoalescer::removePartialRedundancy(const CoalescerPair &CP, // we need to keep the copy of B = A at the end of Pred if we remove // B = A from MBB. bool ValB_Changed = false; - for (auto VNI : IntB.valnos) { + for (auto *VNI : IntB.valnos) { if (VNI->isUnused()) continue; if (PVal->def < VNI->def && VNI->def < LIS->getMBBEndIdx(Pred)) { @@ -1306,7 +1306,7 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP, } if (!TII->isAsCheapAsAMove(*DefMI)) return false; - if (!TII->isTriviallyReMaterializable(*DefMI, AA)) + if (!TII->isTriviallyReMaterializable(*DefMI)) return false; if (!definesFullReg(*DefMI, SrcReg)) return false; diff --git a/llvm/lib/CodeGen/RegisterPressure.cpp b/llvm/lib/CodeGen/RegisterPressure.cpp index 62a459fca611..b14a36e4eeb4 100644 --- a/llvm/lib/CodeGen/RegisterPressure.cpp +++ b/llvm/lib/CodeGen/RegisterPressure.cpp @@ -581,7 +581,7 @@ void RegisterOperands::collect(const MachineInstr &MI, void RegisterOperands::detectDeadDefs(const MachineInstr &MI, const LiveIntervals &LIS) { SlotIndex SlotIdx = LIS.getInstructionIndex(MI); - for (auto RI = Defs.begin(); RI != Defs.end(); /*empty*/) { + for (auto *RI = Defs.begin(); RI != Defs.end(); /*empty*/) { Register Reg = RI->RegUnit; const LiveRange *LR = getLiveRange(LIS, Reg); if (LR != nullptr) { @@ -602,7 +602,7 @@ void RegisterOperands::adjustLaneLiveness(const LiveIntervals &LIS, const MachineRegisterInfo &MRI, SlotIndex Pos, MachineInstr *AddFlagsMI) { - for (auto I = Defs.begin(); I != Defs.end(); ) { + for (auto *I = Defs.begin(); I != Defs.end();) { LaneBitmask LiveAfter = getLiveLanesAt(LIS, MRI, true, I->RegUnit, Pos.getDeadSlot()); // If the def is all that is live after the instruction, then in case @@ -620,7 +620,7 @@ void RegisterOperands::adjustLaneLiveness(const LiveIntervals &LIS, ++I; } } - for (auto I = Uses.begin(); I != Uses.end(); ) { + for (auto *I = Uses.begin(); I != Uses.end();) { LaneBitmask LiveBefore = getLiveLanesAt(LIS, MRI, true, I->RegUnit, Pos.getBaseIndex()); LaneBitmask LaneMask = I->LaneMask & LiveBefore; diff --git a/llvm/lib/CodeGen/SafeStack.cpp b/llvm/lib/CodeGen/SafeStack.cpp index e7116ec3ea28..00a551ade213 100644 --- a/llvm/lib/CodeGen/SafeStack.cpp +++ b/llvm/lib/CodeGen/SafeStack.cpp @@ -340,7 +340,7 @@ bool SafeStack::IsSafeStackAlloca(const Value *AllocaPtr, uint64_t AllocaSize) { // analysis here, which would look at all uses of an argument inside // the function being called. auto B = CS.arg_begin(), E = CS.arg_end(); - for (auto A = B; A != E; ++A) + for (const auto *A = B; A != E; ++A) if (A->get() == V) if (!(CS.doesNotCapture(A - B) && (CS.doesNotAccessMemory(A - B) || CS.doesNotAccessMemory()))) { @@ -498,7 +498,7 @@ Value *SafeStack::moveStaticAllocasToUnsafeStack( if (ClColoring) SSC.run(); - for (auto *I : SSC.getMarkers()) { + for (const auto *I : SSC.getMarkers()) { auto *Op = dyn_cast<Instruction>(I->getOperand(1)); const_cast<IntrinsicInst *>(I)->eraseFromParent(); // Remove the operand bitcast, too, if it has no more uses left. diff --git a/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp b/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp index 07dcc34fbf15..4fc9399c2b9e 100644 --- a/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp +++ b/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp @@ -530,9 +530,9 @@ void ScheduleDAGInstrs::addVRegUseDeps(SUnit *SU, unsigned OperIdx) { /// Returns true if MI is an instruction we are unable to reason about /// (like a call or something with unmodeled side effects). -static inline bool isGlobalMemoryObject(AAResults *AA, MachineInstr *MI) { +static inline bool isGlobalMemoryObject(MachineInstr *MI) { return MI->isCall() || MI->hasUnmodeledSideEffects() || - (MI->hasOrderedMemoryRef() && !MI->isDereferenceableInvariantLoad(AA)); + (MI->hasOrderedMemoryRef() && !MI->isDereferenceableInvariantLoad()); } void ScheduleDAGInstrs::addChainDependency (SUnit *SUa, SUnit *SUb, @@ -880,7 +880,7 @@ void ScheduleDAGInstrs::buildSchedGraph(AAResults *AA, // actual addresses). // This is a barrier event that acts as a pivotal node in the DAG. - if (isGlobalMemoryObject(AA, &MI)) { + if (isGlobalMemoryObject(&MI)) { // Become the barrier chain. if (BarrierChain) @@ -917,7 +917,7 @@ void ScheduleDAGInstrs::buildSchedGraph(AAResults *AA, // If it's not a store or a variant load, we're done. if (!MI.mayStore() && - !(MI.mayLoad() && !MI.isDereferenceableInvariantLoad(AA))) + !(MI.mayLoad() && !MI.isDereferenceableInvariantLoad())) continue; // Always add dependecy edge to BarrierChain if present. diff --git a/llvm/lib/CodeGen/SelectOptimize.cpp b/llvm/lib/CodeGen/SelectOptimize.cpp index d627519a34aa..011f55efce1d 100644 --- a/llvm/lib/CodeGen/SelectOptimize.cpp +++ b/llvm/lib/CodeGen/SelectOptimize.cpp @@ -433,7 +433,7 @@ void SelectOptimize::convertProfitableSIGroups(SelectGroups &ProfSIGroups) { DebugPseudoINS.push_back(&*DIt); DIt++; } - for (auto DI : DebugPseudoINS) { + for (auto *DI : DebugPseudoINS) { DI->moveBefore(&*EndBlock->getFirstInsertionPt()); } diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 2654c00929d8..edb0756e8c3b 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -1868,8 +1868,7 @@ SDValue DAGCombiner::combine(SDNode *N) { // If N is a commutative binary node, try to eliminate it if the commuted // version is already present in the DAG. - if (!RV.getNode() && TLI.isCommutativeBinOp(N->getOpcode()) && - N->getNumValues() == 1) { + if (!RV.getNode() && TLI.isCommutativeBinOp(N->getOpcode())) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); @@ -4159,6 +4158,10 @@ SDValue DAGCombiner::visitMUL(SDNode *N) { if (SDValue RMUL = reassociateOps(ISD::MUL, DL, N0, N1, N->getFlags())) return RMUL; + // Simplify the operands using demanded-bits information. + if (SimplifyDemandedBits(SDValue(N, 0))) + return SDValue(N, 0); + return SDValue(); } @@ -5978,44 +5981,64 @@ static SDValue combineShiftAnd1ToBitTest(SDNode *And, SelectionDAG &DAG) { if (!TLI.isTypeLegal(VT)) return SDValue(); - // Look through an optional extension and find a 'not'. - // TODO: Should we favor test+set even without the 'not' op? - SDValue Not = And->getOperand(0), And1 = And->getOperand(1); - if (Not.getOpcode() == ISD::ANY_EXTEND) - Not = Not.getOperand(0); - if (!isBitwiseNot(Not) || !Not.hasOneUse() || !isOneConstant(And1)) + // Look through an optional extension. + SDValue And0 = And->getOperand(0), And1 = And->getOperand(1); + if (And0.getOpcode() == ISD::ANY_EXTEND && And0.hasOneUse()) + And0 = And0.getOperand(0); + if (!isOneConstant(And1) || !And0.hasOneUse()) return SDValue(); - // Look though an optional truncation. The source operand may not be the same - // type as the original 'and', but that is ok because we are masking off - // everything but the low bit. - SDValue Srl = Not.getOperand(0); - if (Srl.getOpcode() == ISD::TRUNCATE) - Srl = Srl.getOperand(0); + SDValue Src = And0; + + // Attempt to find a 'not' op. + // TODO: Should we favor test+set even without the 'not' op? + bool FoundNot = false; + if (isBitwiseNot(Src)) { + FoundNot = true; + Src = Src.getOperand(0); + + // Look though an optional truncation. The source operand may not be the + // same type as the original 'and', but that is ok because we are masking + // off everything but the low bit. + if (Src.getOpcode() == ISD::TRUNCATE && Src.hasOneUse()) + Src = Src.getOperand(0); + } // Match a shift-right by constant. - if (Srl.getOpcode() != ISD::SRL || !Srl.hasOneUse() || - !isa<ConstantSDNode>(Srl.getOperand(1))) + if (Src.getOpcode() != ISD::SRL || !Src.hasOneUse()) return SDValue(); // We might have looked through casts that make this transform invalid. // TODO: If the source type is wider than the result type, do the mask and // compare in the source type. - const APInt &ShiftAmt = Srl.getConstantOperandAPInt(1); - unsigned VTBitWidth = VT.getSizeInBits(); - if (ShiftAmt.uge(VTBitWidth)) + unsigned VTBitWidth = VT.getScalarSizeInBits(); + SDValue ShiftAmt = Src.getOperand(1); + auto *ShiftAmtC = dyn_cast<ConstantSDNode>(ShiftAmt); + if (!ShiftAmtC || !ShiftAmtC->getAPIntValue().ult(VTBitWidth)) return SDValue(); - if (!TLI.hasBitTest(Srl.getOperand(0), Srl.getOperand(1))) + // Set source to shift source. + Src = Src.getOperand(0); + + // Try again to find a 'not' op. + // TODO: Should we favor test+set even with two 'not' ops? + if (!FoundNot) { + if (!isBitwiseNot(Src)) + return SDValue(); + Src = Src.getOperand(0); + } + + if (!TLI.hasBitTest(Src, ShiftAmt)) return SDValue(); // Turn this into a bit-test pattern using mask op + setcc: // and (not (srl X, C)), 1 --> (and X, 1<<C) == 0 + // and (srl (not X), C)), 1 --> (and X, 1<<C) == 0 SDLoc DL(And); - SDValue X = DAG.getZExtOrTrunc(Srl.getOperand(0), DL, VT); + SDValue X = DAG.getZExtOrTrunc(Src, DL, VT); EVT CCVT = TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT); SDValue Mask = DAG.getConstant( - APInt::getOneBitSet(VTBitWidth, ShiftAmt.getZExtValue()), DL, VT); + APInt::getOneBitSet(VTBitWidth, ShiftAmtC->getZExtValue()), DL, VT); SDValue NewAnd = DAG.getNode(ISD::AND, DL, VT, X, Mask); SDValue Zero = DAG.getConstant(0, DL, VT); SDValue Setcc = DAG.getSetCC(DL, CCVT, NewAnd, Zero, ISD::SETEQ); @@ -6229,7 +6252,8 @@ SDValue DAGCombiner::visitAND(SDNode *N) { // This can be a pure constant or a vector splat, in which case we treat the // vector as a scalar and use the splat value. APInt Constant = APInt::getZero(1); - if (const ConstantSDNode *C = isConstOrConstSplat(N1)) { + if (const ConstantSDNode *C = isConstOrConstSplat( + N1, /*AllowUndef=*/false, /*AllowTruncation=*/true)) { Constant = C->getAPIntValue(); } else if (BuildVectorSDNode *Vector = dyn_cast<BuildVectorSDNode>(N1)) { APInt SplatValue, SplatUndef; @@ -6339,18 +6363,9 @@ SDValue DAGCombiner::visitAND(SDNode *N) { // fold (and (load x), 255) -> (zextload x, i8) // fold (and (extload x, i16), 255) -> (zextload x, i8) - // fold (and (any_ext (extload x, i16)), 255) -> (zextload x, i8) - if (!VT.isVector() && N1C && (N0.getOpcode() == ISD::LOAD || - (N0.getOpcode() == ISD::ANY_EXTEND && - N0.getOperand(0).getOpcode() == ISD::LOAD))) { - if (SDValue Res = reduceLoadWidth(N)) { - LoadSDNode *LN0 = N0->getOpcode() == ISD::ANY_EXTEND - ? cast<LoadSDNode>(N0.getOperand(0)) : cast<LoadSDNode>(N0); - AddToWorklist(N); - DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 0), Res); - return SDValue(N, 0); - } - } + if (N1C && N0.getOpcode() == ISD::LOAD && !VT.isVector()) + if (SDValue Res = reduceLoadWidth(N)) + return Res; if (LegalTypes) { // Attempt to propagate the AND back up to the leaves which, if they're @@ -6856,20 +6871,23 @@ SDValue DAGCombiner::visitORLike(SDValue N0, SDValue N1, SDNode *N) { } /// OR combines for which the commuted variant will be tried as well. -static SDValue visitORCommutative( - SelectionDAG &DAG, SDValue N0, SDValue N1, SDNode *N) { +static SDValue visitORCommutative(SelectionDAG &DAG, SDValue N0, SDValue N1, + SDNode *N) { EVT VT = N0.getValueType(); if (N0.getOpcode() == ISD::AND) { + SDValue N00 = N0.getOperand(0); + SDValue N01 = N0.getOperand(1); + // fold (or (and X, (xor Y, -1)), Y) -> (or X, Y) // TODO: Set AllowUndefs = true. - if (getBitwiseNotOperand(N0.getOperand(1), N0.getOperand(0), + if (getBitwiseNotOperand(N01, N00, /* AllowUndefs */ false) == N1) - return DAG.getNode(ISD::OR, SDLoc(N), VT, N0.getOperand(0), N1); + return DAG.getNode(ISD::OR, SDLoc(N), VT, N00, N1); // fold (or (and (xor Y, -1), X), Y) -> (or X, Y) - if (getBitwiseNotOperand(N0.getOperand(0), N0.getOperand(1), + if (getBitwiseNotOperand(N00, N01, /* AllowUndefs */ false) == N1) - return DAG.getNode(ISD::OR, SDLoc(N), VT, N0.getOperand(1), N1); + return DAG.getNode(ISD::OR, SDLoc(N), VT, N01, N1); } if (SDValue R = foldLogicOfShifts(N, N0, N1, DAG)) @@ -7915,7 +7933,7 @@ SDValue DAGCombiner::mergeTruncStores(StoreSDNode *N) { int64_t FirstOffset = INT64_MAX; StoreSDNode *FirstStore = nullptr; Optional<BaseIndexOffset> Base; - for (auto Store : Stores) { + for (auto *Store : Stores) { // All the stores store different parts of the CombinedValue. A truncate is // required to get the partial value. SDValue Trunc = Store->getValue(); @@ -8488,28 +8506,6 @@ SDValue DAGCombiner::visitXOR(SDNode *N) { return DAG.getNode(ISD::AND, DL, VT, NotX, N1); } - if ((N0Opcode == ISD::SRL || N0Opcode == ISD::SHL) && N0.hasOneUse()) { - ConstantSDNode *XorC = isConstOrConstSplat(N1); - ConstantSDNode *ShiftC = isConstOrConstSplat(N0.getOperand(1)); - unsigned BitWidth = VT.getScalarSizeInBits(); - if (XorC && ShiftC) { - // Don't crash on an oversized shift. We can not guarantee that a bogus - // shift has been simplified to undef. - uint64_t ShiftAmt = ShiftC->getLimitedValue(); - if (ShiftAmt < BitWidth) { - APInt Ones = APInt::getAllOnes(BitWidth); - Ones = N0Opcode == ISD::SHL ? Ones.shl(ShiftAmt) : Ones.lshr(ShiftAmt); - if (XorC->getAPIntValue() == Ones) { - // If the xor constant is a shifted -1, do a 'not' before the shift: - // xor (X << ShiftC), XorC --> (not X) << ShiftC - // xor (X >> ShiftC), XorC --> (not X) >> ShiftC - SDValue Not = DAG.getNOT(DL, N0.getOperand(0), VT); - return DAG.getNode(N0Opcode, DL, VT, Not, N0.getOperand(1)); - } - } - } - } - // fold Y = sra (X, size(X)-1); xor (add (X, Y), Y) -> (abs X) if (TLI.isOperationLegalOrCustom(ISD::ABS, VT)) { SDValue A = N0Opcode == ISD::ADD ? N0 : N1; @@ -11817,6 +11813,9 @@ SDValue DAGCombiner::foldSextSetcc(SDNode *N) { EVT N00VT = N00.getValueType(); SDLoc DL(N); + // Propagate fast-math-flags. + SelectionDAG::FlagInserter FlagsInserter(DAG, N0->getFlags()); + // On some architectures (such as SSE/NEON/etc) the SETCC result type is // the same size as the compared operands. Try to optimize sext(setcc()) // if this is the case. @@ -12358,6 +12357,9 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { return V; if (N0.getOpcode() == ISD::SETCC) { + // Propagate fast-math-flags. + SelectionDAG::FlagInserter FlagsInserter(DAG, N0->getFlags()); + // Only do this before legalize for now. if (!LegalOperations && VT.isVector() && N0.getValueType().getVectorElementType() == MVT::i1) { @@ -12549,6 +12551,9 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) { } if (N0.getOpcode() == ISD::SETCC) { + // Propagate fast-math-flags. + SelectionDAG::FlagInserter FlagsInserter(DAG, N0->getFlags()); + // For vectors: // aext(setcc) -> vsetcc // aext(setcc) -> truncate(vsetcc) @@ -13155,6 +13160,19 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { return N0.getOperand(0); } + // Try to narrow a truncate-of-sext_in_reg to the destination type: + // trunc (sign_ext_inreg X, iM) to iN --> sign_ext_inreg (trunc X to iN), iM + if (!LegalTypes && N0.getOpcode() == ISD::SIGN_EXTEND_INREG && + N0.hasOneUse()) { + SDValue X = N0.getOperand(0); + SDValue ExtVal = N0.getOperand(1); + EVT ExtVT = cast<VTSDNode>(ExtVal)->getVT(); + if (ExtVT.bitsLT(VT)) { + SDValue TrX = DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, X); + return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, TrX, ExtVal); + } + } + // If this is anyext(trunc), don't fold it, allow ourselves to be folded. if (N->hasOneUse() && (N->use_begin()->getOpcode() == ISD::ANY_EXTEND)) return SDValue(); @@ -19478,7 +19496,7 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) { return Shuf; // Handle <1 x ???> vector insertion special cases. - if (VT.getVectorNumElements() == 1) { + if (NumElts == 1) { // insert_vector_elt(x, extract_vector_elt(y, 0), 0) -> y if (InVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT && InVal.getOperand(0).getValueType() == VT && @@ -19506,80 +19524,77 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) { } } - // Attempt to fold the insertion into a legal BUILD_VECTOR. + // Attempt to convert an insert_vector_elt chain into a legal build_vector. if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)) { - auto UpdateBuildVector = [&](SmallVectorImpl<SDValue> &Ops) { - assert(Ops.size() == NumElts && "Unexpected vector size"); - - // Insert the element - if (Elt < Ops.size()) { - // All the operands of BUILD_VECTOR must have the same type; - // we enforce that here. - EVT OpVT = Ops[0].getValueType(); - Ops[Elt] = - OpVT.isInteger() ? DAG.getAnyExtOrTrunc(InVal, DL, OpVT) : InVal; + // vXi1 vector - we don't need to recurse. + if (NumElts == 1) + return DAG.getBuildVector(VT, DL, {InVal}); + + // If we haven't already collected the element, insert into the op list. + EVT MaxEltVT = InVal.getValueType(); + auto AddBuildVectorOp = [&](SmallVectorImpl<SDValue> &Ops, SDValue Elt, + unsigned Idx) { + if (!Ops[Idx]) { + Ops[Idx] = Elt; + if (VT.isInteger()) { + EVT EltVT = Elt.getValueType(); + MaxEltVT = MaxEltVT.bitsGE(EltVT) ? MaxEltVT : EltVT; + } } + }; - // Return the new vector + // Ensure all the operands are the same value type, fill any missing + // operands with UNDEF and create the BUILD_VECTOR. + auto CanonicalizeBuildVector = [&](SmallVectorImpl<SDValue> &Ops) { + assert(Ops.size() == NumElts && "Unexpected vector size"); + for (SDValue &Op : Ops) { + if (Op) + Op = VT.isInteger() ? DAG.getAnyExtOrTrunc(Op, DL, MaxEltVT) : Op; + else + Op = DAG.getUNDEF(MaxEltVT); + } return DAG.getBuildVector(VT, DL, Ops); }; - // Check that the operand is a BUILD_VECTOR (or UNDEF, which can essentially - // be converted to a BUILD_VECTOR). Fill in the Ops vector with the - // vector elements. - SmallVector<SDValue, 8> Ops; + SmallVector<SDValue, 8> Ops(NumElts, SDValue()); + Ops[Elt] = InVal; - // Do not combine these two vectors if the output vector will not replace - // the input vector. - if (InVec.getOpcode() == ISD::BUILD_VECTOR && InVec.hasOneUse()) { - Ops.append(InVec->op_begin(), InVec->op_end()); - return UpdateBuildVector(Ops); - } + // Recurse up a INSERT_VECTOR_ELT chain to build a BUILD_VECTOR. + for (SDValue CurVec = InVec; CurVec;) { + // UNDEF - build new BUILD_VECTOR from already inserted operands. + if (CurVec.isUndef()) + return CanonicalizeBuildVector(Ops); - if (InVec.getOpcode() == ISD::SCALAR_TO_VECTOR && InVec.hasOneUse()) { - Ops.push_back(InVec.getOperand(0)); - Ops.append(NumElts - 1, DAG.getUNDEF(InVec.getOperand(0).getValueType())); - return UpdateBuildVector(Ops); - } + // BUILD_VECTOR - insert unused operands and build new BUILD_VECTOR. + if (CurVec.getOpcode() == ISD::BUILD_VECTOR && CurVec.hasOneUse()) { + for (unsigned I = 0; I != NumElts; ++I) + AddBuildVectorOp(Ops, CurVec.getOperand(I), I); + return CanonicalizeBuildVector(Ops); + } - if (InVec.isUndef()) { - Ops.append(NumElts, DAG.getUNDEF(InVal.getValueType())); - return UpdateBuildVector(Ops); - } + // SCALAR_TO_VECTOR - insert unused scalar and build new BUILD_VECTOR. + if (CurVec.getOpcode() == ISD::SCALAR_TO_VECTOR && CurVec.hasOneUse()) { + AddBuildVectorOp(Ops, CurVec.getOperand(0), 0); + return CanonicalizeBuildVector(Ops); + } - // If we're inserting into the end of a vector as part of an sequence, see - // if we can create a BUILD_VECTOR by following the sequence back up the - // chain. - if (Elt == (NumElts - 1)) { - SmallVector<SDValue> ReverseInsertions; - ReverseInsertions.push_back(InVal); - - EVT MaxEltVT = InVal.getValueType(); - SDValue CurVec = InVec; - for (unsigned I = 1; I != NumElts; ++I) { - if (CurVec.getOpcode() != ISD::INSERT_VECTOR_ELT || !CurVec.hasOneUse()) - break; + // INSERT_VECTOR_ELT - insert operand and continue up the chain. + if (CurVec.getOpcode() == ISD::INSERT_VECTOR_ELT && CurVec.hasOneUse()) + if (auto *CurIdx = dyn_cast<ConstantSDNode>(CurVec.getOperand(2))) + if (CurIdx->getAPIntValue().ult(NumElts)) { + unsigned Idx = CurIdx->getZExtValue(); + AddBuildVectorOp(Ops, CurVec.getOperand(1), Idx); - auto *CurIdx = dyn_cast<ConstantSDNode>(CurVec.getOperand(2)); - if (!CurIdx || CurIdx->getAPIntValue() != ((NumElts - 1) - I)) - break; - SDValue CurVal = CurVec.getOperand(1); - ReverseInsertions.push_back(CurVal); - if (VT.isInteger()) { - EVT CurValVT = CurVal.getValueType(); - MaxEltVT = MaxEltVT.bitsGE(CurValVT) ? MaxEltVT : CurValVT; - } - CurVec = CurVec.getOperand(0); - } + // Found entire BUILD_VECTOR. + if (all_of(Ops, [](SDValue Op) { return !!Op; })) + return CanonicalizeBuildVector(Ops); - if (ReverseInsertions.size() == NumElts) { - for (unsigned I = 0; I != NumElts; ++I) { - SDValue Val = ReverseInsertions[(NumElts - 1) - I]; - Val = VT.isInteger() ? DAG.getAnyExtOrTrunc(Val, DL, MaxEltVT) : Val; - Ops.push_back(Val); - } - return DAG.getBuildVector(VT, DL, Ops); - } + CurVec = CurVec->getOperand(0); + continue; + } + + // Failed to find a match in the chain - bail. + break; } } @@ -22643,6 +22658,56 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { } } + // If we're not performing a select/blend shuffle, see if we can convert the + // shuffle into a AND node, with all the out-of-lane elements are known zero. + if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT)) { + bool IsInLaneMask = true; + ArrayRef<int> Mask = SVN->getMask(); + SmallVector<int, 16> ClearMask(NumElts, -1); + APInt DemandedLHS = APInt::getNullValue(NumElts); + APInt DemandedRHS = APInt::getNullValue(NumElts); + for (int I = 0; I != (int)NumElts; ++I) { + int M = Mask[I]; + if (M < 0) + continue; + ClearMask[I] = M == I ? I : (I + NumElts); + IsInLaneMask &= (M == I) || (M == (int)(I + NumElts)); + if (M != I) { + APInt &Demanded = M < (int)NumElts ? DemandedLHS : DemandedRHS; + Demanded.setBit(M % NumElts); + } + } + // TODO: Should we try to mask with N1 as well? + if (!IsInLaneMask && + (!DemandedLHS.isNullValue() || !DemandedRHS.isNullValue()) && + (DemandedLHS.isNullValue() || + DAG.MaskedVectorIsZero(N0, DemandedLHS)) && + (DemandedRHS.isNullValue() || + DAG.MaskedVectorIsZero(N1, DemandedRHS))) { + SDLoc DL(N); + EVT IntVT = VT.changeVectorElementTypeToInteger(); + EVT IntSVT = VT.getVectorElementType().changeTypeToInteger(); + SDValue ZeroElt = DAG.getConstant(0, DL, IntSVT); + SDValue AllOnesElt = DAG.getAllOnesConstant(DL, IntSVT); + SmallVector<SDValue, 16> AndMask(NumElts, DAG.getUNDEF(IntSVT)); + for (int I = 0; I != (int)NumElts; ++I) + if (0 <= Mask[I]) + AndMask[I] = Mask[I] == I ? AllOnesElt : ZeroElt; + + // See if a clear mask is legal instead of going via + // XformToShuffleWithZero which loses UNDEF mask elements. + if (TLI.isVectorClearMaskLegal(ClearMask, IntVT)) + return DAG.getBitcast( + VT, DAG.getVectorShuffle(IntVT, DL, DAG.getBitcast(IntVT, N0), + DAG.getConstant(0, DL, IntVT), ClearMask)); + + if (TLI.isOperationLegalOrCustom(ISD::AND, IntVT)) + return DAG.getBitcast( + VT, DAG.getNode(ISD::AND, DL, IntVT, DAG.getBitcast(IntVT, N0), + DAG.getBuildVector(IntVT, DL, AndMask))); + } + } + // Attempt to combine a shuffle of 2 inputs of 'scalar sources' - // BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR. if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT)) @@ -23385,10 +23450,14 @@ static SDValue scalarizeBinOpOfSplats(SDNode *N, SelectionDAG &DAG, int Index0, Index1; SDValue Src0 = DAG.getSplatSourceVector(N0, Index0); SDValue Src1 = DAG.getSplatSourceVector(N1, Index1); + // Extract element from splat_vector should be free. + // TODO: use DAG.isSplatValue instead? + bool IsBothSplatVector = N0.getOpcode() == ISD::SPLAT_VECTOR && + N1.getOpcode() == ISD::SPLAT_VECTOR; if (!Src0 || !Src1 || Index0 != Index1 || Src0.getValueType().getVectorElementType() != EltVT || Src1.getValueType().getVectorElementType() != EltVT || - !TLI.isExtractVecEltCheap(VT, Index0) || + !(IsBothSplatVector || TLI.isExtractVecEltCheap(VT, Index0)) || !TLI.isOperationLegalOrCustom(Opcode, EltVT)) return SDValue(); @@ -23410,6 +23479,8 @@ static SDValue scalarizeBinOpOfSplats(SDNode *N, SelectionDAG &DAG, } // bo (splat X, Index), (splat Y, Index) --> splat (bo X, Y), Index + if (VT.isScalableVector()) + return DAG.getSplatVector(VT, DL, ScalarBO); SmallVector<SDValue, 8> Ops(VT.getVectorNumElements(), ScalarBO); return DAG.getBuildVector(VT, DL, Ops); } diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index 8bdc9410d131..56d35dfe8701 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -1404,17 +1404,21 @@ SDValue SelectionDAGLegalize::ExpandExtractFromVectorThroughStack(SDValue Op) { } SDValue NewLoad; + Align ElementAlignment = + std::min(cast<StoreSDNode>(Ch)->getAlign(), + DAG.getDataLayout().getPrefTypeAlign( + Op.getValueType().getTypeForEVT(*DAG.getContext()))); if (Op.getValueType().isVector()) { StackPtr = TLI.getVectorSubVecPointer(DAG, StackPtr, VecVT, Op.getValueType(), Idx); - NewLoad = - DAG.getLoad(Op.getValueType(), dl, Ch, StackPtr, MachinePointerInfo()); + NewLoad = DAG.getLoad(Op.getValueType(), dl, Ch, StackPtr, + MachinePointerInfo(), ElementAlignment); } else { StackPtr = TLI.getVectorElementPointer(DAG, StackPtr, VecVT, Idx); NewLoad = DAG.getExtLoad(ISD::EXTLOAD, dl, Op.getValueType(), Ch, StackPtr, - MachinePointerInfo(), - VecVT.getVectorElementType()); + MachinePointerInfo(), VecVT.getVectorElementType(), + ElementAlignment); } // Replace the chain going out of the store, by the one out of the load. diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp index 6c136bdfc652..b2df67f45c72 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp @@ -2918,6 +2918,9 @@ bool DAGTypeLegalizer::SoftPromoteHalfOperand(SDNode *N, unsigned OpNo) { case ISD::STACKMAP: Res = SoftPromoteHalfOp_STACKMAP(N, OpNo); break; + case ISD::PATCHPOINT: + Res = SoftPromoteHalfOp_PATCHPOINT(N, OpNo); + break; } if (!Res.getNode()) @@ -3059,3 +3062,18 @@ SDValue DAGTypeLegalizer::SoftPromoteHalfOp_STACKMAP(SDNode *N, unsigned OpNo) { return SDValue(); // Signal that we replaced the node ourselves. } + +SDValue DAGTypeLegalizer::SoftPromoteHalfOp_PATCHPOINT(SDNode *N, + unsigned OpNo) { + assert(OpNo >= 7); + SmallVector<SDValue> NewOps(N->ops().begin(), N->ops().end()); + SDValue Op = N->getOperand(OpNo); + NewOps[OpNo] = GetSoftPromotedHalf(Op); + SDValue NewNode = + DAG.getNode(N->getOpcode(), SDLoc(N), N->getVTList(), NewOps); + + for (unsigned ResNum = 0; ResNum < N->getNumValues(); ResNum++) + ReplaceValueWith(SDValue(N, ResNum), NewNode.getValue(ResNum)); + + return SDValue(); // Signal that we replaced the node ourselves. +} diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp index 343722a97c3c..228d4a43ccde 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -1727,6 +1727,13 @@ bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) { case ISD::STACKMAP: Res = PromoteIntOp_STACKMAP(N, OpNo); break; + case ISD::PATCHPOINT: + Res = PromoteIntOp_PATCHPOINT(N, OpNo); + break; + case ISD::EXPERIMENTAL_VP_STRIDED_LOAD: + case ISD::EXPERIMENTAL_VP_STRIDED_STORE: + Res = PromoteIntOp_VP_STRIDED(N, OpNo); + break; } // If the result is null, the sub-method took care of registering results etc. @@ -2341,6 +2348,25 @@ SDValue DAGTypeLegalizer::PromoteIntOp_STACKMAP(SDNode *N, unsigned OpNo) { return SDValue(DAG.UpdateNodeOperands(N, NewOps), 0); } +SDValue DAGTypeLegalizer::PromoteIntOp_PATCHPOINT(SDNode *N, unsigned OpNo) { + assert(OpNo >= 7); + SmallVector<SDValue> NewOps(N->ops().begin(), N->ops().end()); + SDValue Operand = N->getOperand(OpNo); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), Operand.getValueType()); + NewOps[OpNo] = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), NVT, Operand); + return SDValue(DAG.UpdateNodeOperands(N, NewOps), 0); +} + +SDValue DAGTypeLegalizer::PromoteIntOp_VP_STRIDED(SDNode *N, unsigned OpNo) { + assert((N->getOpcode() == ISD::EXPERIMENTAL_VP_STRIDED_LOAD && OpNo == 3) || + (N->getOpcode() == ISD::EXPERIMENTAL_VP_STRIDED_STORE && OpNo == 4)); + + SmallVector<SDValue, 8> NewOps(N->op_begin(), N->op_end()); + NewOps[OpNo] = SExtPromotedInteger(N->getOperand(OpNo)); + + return SDValue(DAG.UpdateNodeOperands(N, NewOps), 0); +} + //===----------------------------------------------------------------------===// // Integer Result Expansion //===----------------------------------------------------------------------===// @@ -2886,11 +2912,15 @@ void DAGTypeLegalizer::ExpandIntRes_ADDSUB(SDNode *N, if (N->getOpcode() == ISD::ADD) { Lo = DAG.getNode(ISD::UADDO, dl, VTList, LoOps); HiOps[2] = Lo.getValue(1); - Hi = DAG.getNode(ISD::ADDCARRY, dl, VTList, HiOps); + Hi = DAG.computeKnownBits(HiOps[2]).isZero() + ? DAG.getNode(ISD::UADDO, dl, VTList, makeArrayRef(HiOps, 2)) + : DAG.getNode(ISD::ADDCARRY, dl, VTList, HiOps); } else { Lo = DAG.getNode(ISD::USUBO, dl, VTList, LoOps); HiOps[2] = Lo.getValue(1); - Hi = DAG.getNode(ISD::SUBCARRY, dl, VTList, HiOps); + Hi = DAG.computeKnownBits(HiOps[2]).isZero() + ? DAG.getNode(ISD::USUBO, dl, VTList, makeArrayRef(HiOps, 2)) + : DAG.getNode(ISD::SUBCARRY, dl, VTList, HiOps); } return; } @@ -4693,6 +4723,13 @@ bool DAGTypeLegalizer::ExpandIntegerOperand(SDNode *N, unsigned OpNo) { case ISD::STACKMAP: Res = ExpandIntOp_STACKMAP(N, OpNo); break; + case ISD::PATCHPOINT: + Res = ExpandIntOp_PATCHPOINT(N, OpNo); + break; + case ISD::EXPERIMENTAL_VP_STRIDED_LOAD: + case ISD::EXPERIMENTAL_VP_STRIDED_STORE: + Res = ExpandIntOp_VP_STRIDED(N, OpNo); + break; } // If the result is null, the sub-method took care of registering results etc. @@ -5108,6 +5145,17 @@ SDValue DAGTypeLegalizer::ExpandIntOp_ATOMIC_STORE(SDNode *N) { return Swap.getValue(1); } +SDValue DAGTypeLegalizer::ExpandIntOp_VP_STRIDED(SDNode *N, unsigned OpNo) { + assert((N->getOpcode() == ISD::EXPERIMENTAL_VP_STRIDED_LOAD && OpNo == 3) || + (N->getOpcode() == ISD::EXPERIMENTAL_VP_STRIDED_STORE && OpNo == 4)); + + SDValue Hi; // The upper half is dropped out. + SmallVector<SDValue, 8> NewOps(N->op_begin(), N->op_end()); + GetExpandedInteger(NewOps[OpNo], NewOps[OpNo], Hi); + + return SDValue(DAG.UpdateNodeOperands(N, NewOps), 0); +} + SDValue DAGTypeLegalizer::PromoteIntRes_VECTOR_SPLICE(SDNode *N) { SDLoc dl(N); @@ -5253,21 +5301,28 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BUILD_VECTOR(SDNode *N) { assert(NOutVT.isVector() && "This type must be promoted to a vector type"); unsigned NumElems = N->getNumOperands(); EVT NOutVTElem = NOutVT.getVectorElementType(); - + TargetLoweringBase::BooleanContent NOutBoolType = TLI.getBooleanContents(NOutVT); + unsigned NOutExtOpc = TargetLowering::getExtendForContent(NOutBoolType); SDLoc dl(N); SmallVector<SDValue, 8> Ops; Ops.reserve(NumElems); for (unsigned i = 0; i != NumElems; ++i) { - SDValue Op; + SDValue Op = N->getOperand(i); + EVT OpVT = Op.getValueType(); // BUILD_VECTOR integer operand types are allowed to be larger than the // result's element type. This may still be true after the promotion. For // example, we might be promoting (<v?i1> = BV <i32>, <i32>, ...) to // (v?i16 = BV <i32>, <i32>, ...), and we can't any_extend <i32> to <i16>. - if (N->getOperand(i).getValueType().bitsLT(NOutVTElem)) - Op = DAG.getNode(ISD::ANY_EXTEND, dl, NOutVTElem, N->getOperand(i)); - else - Op = N->getOperand(i); + if (OpVT.bitsLT(NOutVTElem)) { + unsigned ExtOpc = ISD::ANY_EXTEND; + // Attempt to extend constant bool vectors to match target's BooleanContent. + // While not necessary, this improves chances of the constant correctly + // folding with compare results (e.g. for NOT patterns). + if (OpVT == MVT::i1 && Op.getOpcode() == ISD::Constant) + ExtOpc = NOutExtOpc; + Op = DAG.getNode(ExtOpc, dl, NOutVTElem, Op); + } Ops.push_back(Op); } @@ -5524,30 +5579,67 @@ SDValue DAGTypeLegalizer::PromoteIntOp_CONCAT_VECTORS(SDNode *N) { SDValue DAGTypeLegalizer::ExpandIntOp_STACKMAP(SDNode *N, unsigned OpNo) { assert(OpNo > 1); - SDValue Op = N->getOperand(OpNo); - SDLoc DL = SDLoc(N); + + // FIXME: Non-constant operands are not yet handled: + // - https://github.com/llvm/llvm-project/issues/26431 + // - https://github.com/llvm/llvm-project/issues/55957 + ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Op); + if (!CN) + return SDValue(); + + // Copy operands before the one being expanded. SmallVector<SDValue> NewOps; + for (unsigned I = 0; I < OpNo; I++) + NewOps.push_back(N->getOperand(I)); + + EVT Ty = Op.getValueType(); + SDLoc DL = SDLoc(N); + if (CN->getConstantIntValue()->getValue().getActiveBits() < 64) { + NewOps.push_back( + DAG.getTargetConstant(StackMaps::ConstantOp, DL, MVT::i64)); + NewOps.push_back(DAG.getTargetConstant(CN->getZExtValue(), DL, Ty)); + } else { + // FIXME: https://github.com/llvm/llvm-project/issues/55609 + return SDValue(); + } + + // Copy remaining operands. + for (unsigned I = OpNo + 1; I < N->getNumOperands(); I++) + NewOps.push_back(N->getOperand(I)); + + SDValue NewNode = DAG.getNode(N->getOpcode(), DL, N->getVTList(), NewOps); + + for (unsigned ResNum = 0; ResNum < N->getNumValues(); ResNum++) + ReplaceValueWith(SDValue(N, ResNum), NewNode.getValue(ResNum)); + + return SDValue(); // Signal that we have replaced the node already. +} + +SDValue DAGTypeLegalizer::ExpandIntOp_PATCHPOINT(SDNode *N, unsigned OpNo) { + assert(OpNo >= 7); + SDValue Op = N->getOperand(OpNo); + + // FIXME: Non-constant operands are not yet handled: + // - https://github.com/llvm/llvm-project/issues/26431 + // - https://github.com/llvm/llvm-project/issues/55957 + ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Op); + if (!CN) + return SDValue(); // Copy operands before the one being expanded. + SmallVector<SDValue> NewOps; for (unsigned I = 0; I < OpNo; I++) NewOps.push_back(N->getOperand(I)); - if (Op->getOpcode() == ISD::Constant) { - ConstantSDNode *CN = cast<ConstantSDNode>(Op); - EVT Ty = Op.getValueType(); - if (CN->getConstantIntValue()->getValue().getActiveBits() < 64) { - NewOps.push_back( - DAG.getTargetConstant(StackMaps::ConstantOp, DL, MVT::i64)); - NewOps.push_back(DAG.getTargetConstant(CN->getZExtValue(), DL, Ty)); - } else { - // FIXME: https://github.com/llvm/llvm-project/issues/55609 - return SDValue(); - } + EVT Ty = Op.getValueType(); + SDLoc DL = SDLoc(N); + if (CN->getConstantIntValue()->getValue().getActiveBits() < 64) { + NewOps.push_back( + DAG.getTargetConstant(StackMaps::ConstantOp, DL, MVT::i64)); + NewOps.push_back(DAG.getTargetConstant(CN->getZExtValue(), DL, Ty)); } else { - // FIXME: Non-constant operands are not yet handled: - // - https://github.com/llvm/llvm-project/issues/26431 - // - https://github.com/llvm/llvm-project/issues/55957 + // FIXME: https://github.com/llvm/llvm-project/issues/55609 return SDValue(); } diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h index 2807b7f5ae68..6696b79cf885 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -403,6 +403,8 @@ private: SDValue PromoteIntOp_VP_REDUCE(SDNode *N, unsigned OpNo); SDValue PromoteIntOp_SET_ROUNDING(SDNode *N); SDValue PromoteIntOp_STACKMAP(SDNode *N, unsigned OpNo); + SDValue PromoteIntOp_PATCHPOINT(SDNode *N, unsigned OpNo); + SDValue PromoteIntOp_VP_STRIDED(SDNode *N, unsigned OpNo); void PromoteSetCCOperands(SDValue &LHS,SDValue &RHS, ISD::CondCode Code); @@ -495,6 +497,8 @@ private: SDValue ExpandIntOp_ATOMIC_STORE(SDNode *N); SDValue ExpandIntOp_SPLAT_VECTOR(SDNode *N); SDValue ExpandIntOp_STACKMAP(SDNode *N, unsigned OpNo); + SDValue ExpandIntOp_PATCHPOINT(SDNode *N, unsigned OpNo); + SDValue ExpandIntOp_VP_STRIDED(SDNode *N, unsigned OpNo); void IntegerExpandSetCCOperands(SDValue &NewLHS, SDValue &NewRHS, ISD::CondCode &CCCode, const SDLoc &dl); @@ -744,6 +748,7 @@ private: SDValue SoftPromoteHalfOp_SELECT_CC(SDNode *N, unsigned OpNo); SDValue SoftPromoteHalfOp_STORE(SDNode *N, unsigned OpNo); SDValue SoftPromoteHalfOp_STACKMAP(SDNode *N, unsigned OpNo); + SDValue SoftPromoteHalfOp_PATCHPOINT(SDNode *N, unsigned OpNo); //===--------------------------------------------------------------------===// // Scalarization Support: LegalizeVectorTypes.cpp diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp index 842ffa2aa23e..f5a1eae1e7fe 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp @@ -737,6 +737,20 @@ void VectorLegalizer::Expand(SDNode *Node, SmallVectorImpl<SDValue> &Results) { case ISD::SELECT: Results.push_back(ExpandSELECT(Node)); return; + case ISD::SELECT_CC: { + if (Node->getValueType(0).isScalableVector()) { + EVT CondVT = TLI.getSetCCResultType( + DAG.getDataLayout(), *DAG.getContext(), Node->getValueType(0)); + SDValue SetCC = + DAG.getNode(ISD::SETCC, SDLoc(Node), CondVT, Node->getOperand(0), + Node->getOperand(1), Node->getOperand(4)); + Results.push_back(DAG.getSelect(SDLoc(Node), Node->getValueType(0), SetCC, + Node->getOperand(2), + Node->getOperand(3))); + return; + } + break; + } case ISD::FP_TO_UINT: ExpandFP_TO_UINT(Node, Results); return; @@ -833,6 +847,16 @@ void VectorLegalizer::Expand(SDNode *Node, SmallVectorImpl<SDValue> &Results) { return; } break; + case ISD::FP_TO_SINT_SAT: + case ISD::FP_TO_UINT_SAT: + // Expand the fpsosisat if it is scalable to prevent it from unrolling below. + if (Node->getValueType(0).isScalableVector()) { + if (SDValue Expanded = TLI.expandFP_TO_INT_SAT(Node, DAG)) { + Results.push_back(Expanded); + return; + } + } + break; case ISD::SMULFIX: case ISD::UMULFIX: if (SDValue Expanded = TLI.expandFixedPointMul(Node, DAG)) { diff --git a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp index 78fc407e9573..3ac2a7bddc5a 100644 --- a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp @@ -793,7 +793,7 @@ ScheduleDAGLinearize::EmitSchedule(MachineBasicBlock::iterator &InsertPos) { // Emit any debug values associated with the node. if (N->getHasDebugValue()) { MachineBasicBlock::iterator InsertPos = Emitter.getInsertPos(); - for (auto DV : DAG->GetDbgValues(N)) { + for (auto *DV : DAG->GetDbgValues(N)) { if (!DV->isEmitted()) if (auto *DbgMI = Emitter.EmitDbgValue(DV, VRBaseMap)) BB->insert(InsertPos, DbgMI); diff --git a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp index 2a10157b404e..5166db033c62 100644 --- a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp @@ -749,7 +749,7 @@ ProcessSDDbgValues(SDNode *N, SelectionDAG *DAG, InstrEmitter &Emitter, // source order number as N. MachineBasicBlock *BB = Emitter.getBlock(); MachineBasicBlock::iterator InsertPos = Emitter.getInsertPos(); - for (auto DV : DAG->GetDbgValues(N)) { + for (auto *DV : DAG->GetDbgValues(N)) { if (DV->isEmitted()) continue; unsigned DVOrder = DV->getOrder(); diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index c8d0f5faf647..441437351852 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -24,6 +24,7 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Triple.h" #include "llvm/ADT/Twine.h" +#include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/MemoryLocation.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/CodeGen/Analysis.h" @@ -602,7 +603,7 @@ static void AddNodeIDValueTypes(FoldingSetNodeID &ID, SDVTList VTList) { /// AddNodeIDOperands - Various routines for adding operands to the NodeID data. static void AddNodeIDOperands(FoldingSetNodeID &ID, ArrayRef<SDValue> Ops) { - for (auto& Op : Ops) { + for (const auto &Op : Ops) { ID.AddPointer(Op.getNode()); ID.AddInteger(Op.getResNo()); } @@ -611,7 +612,7 @@ static void AddNodeIDOperands(FoldingSetNodeID &ID, /// AddNodeIDOperands - Various routines for adding operands to the NodeID data. static void AddNodeIDOperands(FoldingSetNodeID &ID, ArrayRef<SDUse> Ops) { - for (auto& Op : Ops) { + for (const auto &Op : Ops) { ID.AddPointer(Op.getNode()); ID.AddInteger(Op.getResNo()); } @@ -2711,16 +2712,9 @@ bool SelectionDAG::isSplatValue(SDValue V, const APInt &DemandedElts, SubDemandedElts &= ScaledDemandedElts; if (!isSplatValue(Src, SubDemandedElts, SubUndefElts, Depth + 1)) return false; - - // Here we can't do "MatchAnyBits" operation merge for undef bits. - // Because some operation only use part value of the source. - // Take llvm.fshl.* for example: - // t1: v4i32 = Constant:i32<12>, undef:i32, Constant:i32<12>, undef:i32 - // t2: v2i64 = bitcast t1 - // t5: v2i64 = fshl t3, t4, t2 - // We can not convert t2 to {i64 undef, i64 undef} - UndefElts |= APIntOps::ScaleBitMask(SubUndefElts, NumElts, - /*MatchAllBits=*/true); + // TODO: Add support for merging sub undef elements. + if (!SubUndefElts.isZero()) + return false; } return true; } @@ -2947,6 +2941,9 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts, unsigned Opcode = Op.getOpcode(); switch (Opcode) { + case ISD::MERGE_VALUES: + return computeKnownBits(Op.getOperand(Op.getResNo()), DemandedElts, + Depth + 1); case ISD::BUILD_VECTOR: // Collect the known bits that are shared by every demanded vector element. Known.Zero.setAllBits(); Known.One.setAllBits(); @@ -3219,12 +3216,6 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts, Known = KnownBits::mulhs(Known, Known2); break; } - case ISD::UDIV: { - Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); - Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); - Known = KnownBits::udiv(Known, Known2); - break; - } case ISD::AVGCEILU: { Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); @@ -3339,6 +3330,38 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts, Known.Zero |= Known2.Zero; } break; + case ISD::SHL_PARTS: + case ISD::SRA_PARTS: + case ISD::SRL_PARTS: { + assert((Op.getResNo() == 0 || Op.getResNo() == 1) && "Unknown result"); + + // Collect lo/hi source values and concatenate. + // TODO: Would a KnownBits::concatBits helper be useful? + unsigned LoBits = Op.getOperand(0).getScalarValueSizeInBits(); + unsigned HiBits = Op.getOperand(1).getScalarValueSizeInBits(); + Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); + Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); + Known = Known.anyext(LoBits + HiBits); + Known.insertBits(Known2, LoBits); + + // Collect shift amount. + Known2 = computeKnownBits(Op.getOperand(2), DemandedElts, Depth + 1); + + if (Opcode == ISD::SHL_PARTS) + Known = KnownBits::shl(Known, Known2); + else if (Opcode == ISD::SRA_PARTS) + Known = KnownBits::ashr(Known, Known2); + else // if (Opcode == ISD::SRL_PARTS) + Known = KnownBits::lshr(Known, Known2); + + // TODO: Minimum shift low/high bits are known zero. + + if (Op.getResNo() == 0) + Known = Known.extractBits(LoBits, 0); + else + Known = Known.extractBits(HiBits, LoBits); + break; + } case ISD::SIGN_EXTEND_INREG: { Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); EVT EVT = cast<VTSDNode>(Op.getOperand(1))->getVT(); @@ -3570,6 +3593,12 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts, Known = KnownBits::computeForAddCarry(Known, Known2, Carry); break; } + case ISD::UDIV: { + Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); + Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); + Known = KnownBits::udiv(Known, Known2); + break; + } case ISD::SREM: { Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); @@ -3925,7 +3954,9 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts, case ISD::AssertZext: Tmp = cast<VTSDNode>(Op.getOperand(1))->getVT().getSizeInBits(); return VTBits-Tmp; - + case ISD::MERGE_VALUES: + return ComputeNumSignBits(Op.getOperand(Op.getResNo()), DemandedElts, + Depth + 1); case ISD::BUILD_VECTOR: Tmp = VTBits; for (unsigned i = 0, e = Op.getNumOperands(); (i < e) && (Tmp > 1); ++i) { @@ -6105,8 +6136,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, assert(N1.getValueType().isVector() == VT.isVector() && "FP_TO_*INT_SAT type should be vector iff the operand type is " "vector!"); - assert((!VT.isVector() || VT.getVectorNumElements() == - N1.getValueType().getVectorNumElements()) && + assert((!VT.isVector() || VT.getVectorElementCount() == + N1.getValueType().getVectorElementCount()) && "Vector element counts must match in FP_TO_*INT_SAT"); assert(!cast<VTSDNode>(N2)->getVT().isVector() && "Type to saturate to must be a scalar."); @@ -6719,7 +6750,7 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl, bool isVol, bool AlwaysInline, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo, - const AAMDNodes &AAInfo) { + const AAMDNodes &AAInfo, AAResults *AA) { // Turn a memcpy of undef to nop. // FIXME: We need to honor volatile even is Src is undef. if (Src.isUndef()) @@ -6782,6 +6813,11 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl, AAMDNodes NewAAInfo = AAInfo; NewAAInfo.TBAA = NewAAInfo.TBAAStruct = nullptr; + const Value *SrcVal = SrcPtrInfo.V.dyn_cast<const Value *>(); + bool isConstant = + AA && SrcVal && + AA->pointsToConstantMemory(MemoryLocation(SrcVal, Size, AAInfo)); + MachineMemOperand::Flags MMOFlags = isVol ? MachineMemOperand::MOVolatile : MachineMemOperand::MONone; SmallVector<SDValue, 16> OutLoadChains; @@ -6843,6 +6879,8 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl, MachineMemOperand::Flags SrcMMOFlags = MMOFlags; if (isDereferenceable) SrcMMOFlags |= MachineMemOperand::MODereferenceable; + if (isConstant) + SrcMMOFlags |= MachineMemOperand::MOInvariant; Value = DAG.getExtLoad( ISD::EXTLOAD, dl, NVT, Chain, @@ -7131,7 +7169,7 @@ SDValue SelectionDAG::getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst, bool isVol, bool AlwaysInline, bool isTailCall, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo, - const AAMDNodes &AAInfo) { + const AAMDNodes &AAInfo, AAResults *AA) { // Check to see if we should lower the memcpy to loads and stores first. // For cases within the target-specified limits, this is the best choice. ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size); @@ -7142,7 +7180,7 @@ SDValue SelectionDAG::getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Result = getMemcpyLoadsAndStores( *this, dl, Chain, Dst, Src, ConstantSize->getZExtValue(), Alignment, - isVol, false, DstPtrInfo, SrcPtrInfo, AAInfo); + isVol, false, DstPtrInfo, SrcPtrInfo, AAInfo, AA); if (Result.getNode()) return Result; } @@ -7161,9 +7199,9 @@ SDValue SelectionDAG::getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst, // use a (potentially long) sequence of loads and stores. if (AlwaysInline) { assert(ConstantSize && "AlwaysInline requires a constant size!"); - return getMemcpyLoadsAndStores(*this, dl, Chain, Dst, Src, - ConstantSize->getZExtValue(), Alignment, - isVol, true, DstPtrInfo, SrcPtrInfo, AAInfo); + return getMemcpyLoadsAndStores( + *this, dl, Chain, Dst, Src, ConstantSize->getZExtValue(), Alignment, + isVol, true, DstPtrInfo, SrcPtrInfo, AAInfo, AA); } checkAddrSpaceIsValidForLibcall(TLI, DstPtrInfo.getAddrSpace()); @@ -7245,7 +7283,7 @@ SDValue SelectionDAG::getMemmove(SDValue Chain, const SDLoc &dl, SDValue Dst, bool isVol, bool isTailCall, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo, - const AAMDNodes &AAInfo) { + const AAMDNodes &AAInfo, AAResults *AA) { // Check to see if we should lower the memmove to loads and stores first. // For cases within the target-specified limits, this is the best choice. ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size); @@ -8904,7 +8942,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, } #ifndef NDEBUG - for (auto &Op : Ops) + for (const auto &Op : Ops) assert(Op.getOpcode() != ISD::DELETED_NODE && "Operand is DELETED_NODE!"); #endif @@ -8928,6 +8966,11 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, "True and False arms of SelectCC must have same type!"); assert(Ops[2].getValueType() == VT && "select_cc node must be of same type as true and false value!"); + assert((!Ops[0].getValueType().isVector() || + Ops[0].getValueType().getVectorElementCount() == + VT.getVectorElementCount()) && + "Expected select_cc with vector result to have the same sized " + "comparison type!"); break; case ISD::BR_CC: assert(NumOps == 5 && "BR_CC takes 5 operands!"); @@ -9018,12 +9061,34 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, SDVTList VTList, return getNode(Opcode, DL, VTList.VTs[0], Ops, Flags); #ifndef NDEBUG - for (auto &Op : Ops) + for (const auto &Op : Ops) assert(Op.getOpcode() != ISD::DELETED_NODE && "Operand is DELETED_NODE!"); #endif switch (Opcode) { + case ISD::SADDO: + case ISD::UADDO: + case ISD::SSUBO: + case ISD::USUBO: { + assert(VTList.NumVTs == 2 && Ops.size() == 2 && + "Invalid add/sub overflow op!"); + assert(VTList.VTs[0].isInteger() && VTList.VTs[1].isInteger() && + Ops[0].getValueType() == Ops[1].getValueType() && + Ops[0].getValueType() == VTList.VTs[0] && + "Binary operator types must match!"); + SDValue N1 = Ops[0], N2 = Ops[1]; + canonicalizeCommutativeBinop(Opcode, N1, N2); + + // (X +- 0) -> X with zero-overflow. + ConstantSDNode *N2CV = isConstOrConstSplat(N2, /*AllowUndefs*/ false, + /*AllowTruncation*/ true); + if (N2CV && N2CV->isZero()) { + SDValue ZeroOverFlow = getConstant(0, DL, VTList.VTs[1]); + return getNode(ISD::MERGE_VALUES, DL, VTList, {N1, ZeroOverFlow}, Flags); + } + break; + } case ISD::STRICT_FP_EXTEND: assert(VTList.NumVTs == 2 && Ops.size() == 2 && "Invalid STRICT_FP_EXTEND!"); @@ -9914,7 +9979,7 @@ void SelectionDAG::salvageDebugInfo(SDNode &N) { return; SmallVector<SDDbgValue *, 2> ClonedDVs; - for (auto DV : GetDbgValues(&N)) { + for (auto *DV : GetDbgValues(&N)) { if (DV->isInvalidated()) continue; switch (N.getOpcode()) { @@ -10268,7 +10333,7 @@ bool SelectionDAG::calculateDivergence(SDNode *N) { } if (TLI->isSDNodeSourceOfDivergence(N, FLI, DA)) return true; - for (auto &Op : N->ops()) { + for (const auto &Op : N->ops()) { if (Op.Val.getValueType() != MVT::Other && Op.getNode()->isDivergent()) return true; } @@ -10298,7 +10363,7 @@ void SelectionDAG::CreateTopologicalOrder(std::vector<SDNode *> &Order) { } for (size_t I = 0; I != Order.size(); ++I) { SDNode *N = Order[I]; - for (auto U : N->uses()) { + for (auto *U : N->uses()) { unsigned &UnsortedOps = Degree[U]; if (0 == --UnsortedOps) Order.push_back(U); diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index fe3c38ec590d..35650b9bd00e 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -1789,7 +1789,7 @@ static void findWasmUnwindDestinations( UnwindDests.emplace_back(FuncInfo.MBBMap[EHPadBB], Prob); UnwindDests.back().first->setIsEHScopeEntry(); break; - } else if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(Pad)) { + } else if (const auto *CatchSwitch = dyn_cast<CatchSwitchInst>(Pad)) { // Add the catchpad handlers to the possible destinations. We don't // continue to the unwind destination of the catchswitch for wasm. for (const BasicBlock *CatchPadBB : CatchSwitch->handlers()) { @@ -1844,7 +1844,7 @@ static void findUnwindDestinations( UnwindDests.back().first->setIsEHScopeEntry(); UnwindDests.back().first->setIsEHFuncletEntry(); break; - } else if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(Pad)) { + } else if (const auto *CatchSwitch = dyn_cast<CatchSwitchInst>(Pad)) { // Add the catchpad handlers to the possible destinations. for (const BasicBlock *CatchPadBB : CatchSwitch->handlers()) { UnwindDests.emplace_back(FuncInfo.MBBMap[CatchPadBB], Prob); @@ -2990,14 +2990,20 @@ void SelectionDAGBuilder::visitCallBr(const CallBrInst &I) { CopyToExportRegsIfNeeded(&I); // Retrieve successors. + SmallPtrSet<BasicBlock *, 8> Dests; + Dests.insert(I.getDefaultDest()); MachineBasicBlock *Return = FuncInfo.MBBMap[I.getDefaultDest()]; // Update successor info. addSuccessorWithProb(CallBrMBB, Return, BranchProbability::getOne()); for (unsigned i = 0, e = I.getNumIndirectDests(); i < e; ++i) { - MachineBasicBlock *Target = FuncInfo.MBBMap[I.getIndirectDest(i)]; - addSuccessorWithProb(CallBrMBB, Target, BranchProbability::getZero()); + BasicBlock *Dest = I.getIndirectDest(i); + MachineBasicBlock *Target = FuncInfo.MBBMap[Dest]; Target->setIsInlineAsmBrIndirectTarget(); + Target->setHasAddressTaken(); + // Don't add duplicate machine successors. + if (Dests.insert(Dest).second) + addSuccessorWithProb(CallBrMBB, Target, BranchProbability::getZero()); } CallBrMBB->normalizeSuccProbs(); @@ -4075,6 +4081,8 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) { return; bool isVolatile = I.isVolatile(); + MachineMemOperand::Flags MMOFlags = + TLI.getLoadMemOperandFlags(I, DAG.getDataLayout()); SDValue Root; bool ConstantMemory = false; @@ -4091,6 +4099,12 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) { // Do not serialize (non-volatile) loads of constant memory with anything. Root = DAG.getEntryNode(); ConstantMemory = true; + MMOFlags |= MachineMemOperand::MOInvariant; + + // FIXME: pointsToConstantMemory probably does not imply dereferenceable, + // but the previous usage implied it did. Probably should check + // isDereferenceableAndAlignedPointer. + MMOFlags |= MachineMemOperand::MODereferenceable; } else { // Do not serialize non-volatile loads against each other. Root = DAG.getRoot(); @@ -4110,9 +4124,6 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) { SmallVector<SDValue, 4> Chains(std::min(MaxParallelChains, NumValues)); EVT PtrVT = Ptr.getValueType(); - MachineMemOperand::Flags MMOFlags - = TLI.getLoadMemOperandFlags(I, DAG.getDataLayout()); - unsigned ChainI = 0; for (unsigned i = 0; i != NumValues; ++i, ++ChainI) { // Serializing loads here may result in excessive register pressure, and @@ -5766,7 +5777,7 @@ static const CallBase *FindPreallocatedCall(const Value *PreallocatedSetup) { ->getCalledFunction() ->getIntrinsicID() == Intrinsic::call_preallocated_setup && "expected call_preallocated_setup Value"); - for (auto *U : PreallocatedSetup->users()) { + for (const auto *U : PreallocatedSetup->users()) { auto *UseCall = cast<CallBase>(U); const Function *Fn = UseCall->getCalledFunction(); if (!Fn || Fn->getIntrinsicID() != Intrinsic::call_preallocated_arg) { @@ -5859,11 +5870,10 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, // FIXME: Support passing different dest/src alignments to the memcpy DAG // node. SDValue Root = isVol ? getRoot() : getMemoryRoot(); - SDValue MC = DAG.getMemcpy(Root, sdl, Op1, Op2, Op3, Alignment, isVol, - /* AlwaysInline */ false, isTC, - MachinePointerInfo(I.getArgOperand(0)), - MachinePointerInfo(I.getArgOperand(1)), - I.getAAMetadata()); + SDValue MC = DAG.getMemcpy( + Root, sdl, Op1, Op2, Op3, Alignment, isVol, + /* AlwaysInline */ false, isTC, MachinePointerInfo(I.getArgOperand(0)), + MachinePointerInfo(I.getArgOperand(1)), I.getAAMetadata(), AA); updateDAGForMaybeTailCall(MC); return; } @@ -5881,11 +5891,10 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, bool isTC = I.isTailCall() && isInTailCallPosition(I, DAG.getTarget()); // FIXME: Support passing different dest/src alignments to the memcpy DAG // node. - SDValue MC = DAG.getMemcpy(getRoot(), sdl, Dst, Src, Size, Alignment, isVol, - /* AlwaysInline */ true, isTC, - MachinePointerInfo(I.getArgOperand(0)), - MachinePointerInfo(I.getArgOperand(1)), - I.getAAMetadata()); + SDValue MC = DAG.getMemcpy( + getRoot(), sdl, Dst, Src, Size, Alignment, isVol, + /* AlwaysInline */ true, isTC, MachinePointerInfo(I.getArgOperand(0)), + MachinePointerInfo(I.getArgOperand(1)), I.getAAMetadata(), AA); updateDAGForMaybeTailCall(MC); return; } @@ -5940,7 +5949,7 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, SDValue MM = DAG.getMemmove(Root, sdl, Op1, Op2, Op3, Alignment, isVol, isTC, MachinePointerInfo(I.getArgOperand(0)), MachinePointerInfo(I.getArgOperand(1)), - I.getAAMetadata()); + I.getAAMetadata(), AA); updateDAGForMaybeTailCall(MM); return; } @@ -8855,7 +8864,8 @@ void SelectionDAGBuilder::visitInlineAsm(const CallBase &Call, } break; - case InlineAsm::isInput: { + case InlineAsm::isInput: + case InlineAsm::isLabel: { SDValue InOperandVal = OpInfo.CallOperand; if (OpInfo.isMatchingInputConstraint()) { @@ -9295,19 +9305,18 @@ void SelectionDAGBuilder::populateCallLoweringInfo( static void addStackMapLiveVars(const CallBase &Call, unsigned StartIdx, const SDLoc &DL, SmallVectorImpl<SDValue> &Ops, SelectionDAGBuilder &Builder) { - for (unsigned i = StartIdx, e = Call.arg_size(); i != e; ++i) { - SDValue OpVal = Builder.getValue(Call.getArgOperand(i)); - if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(OpVal)) { - Ops.push_back( - Builder.DAG.getTargetConstant(StackMaps::ConstantOp, DL, MVT::i64)); - Ops.push_back( - Builder.DAG.getTargetConstant(C->getSExtValue(), DL, MVT::i64)); - } else if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(OpVal)) { - const TargetLowering &TLI = Builder.DAG.getTargetLoweringInfo(); - Ops.push_back(Builder.DAG.getTargetFrameIndex( - FI->getIndex(), TLI.getFrameIndexTy(Builder.DAG.getDataLayout()))); - } else - Ops.push_back(OpVal); + SelectionDAG &DAG = Builder.DAG; + for (unsigned I = StartIdx; I < Call.arg_size(); I++) { + SDValue Op = Builder.getValue(Call.getArgOperand(I)); + + // Things on the stack are pointer-typed, meaning that they are already + // legal and can be emitted directly to target nodes. + if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Op)) { + Ops.push_back(DAG.getTargetFrameIndex(FI->getIndex(), Op.getValueType())); + } else { + // Otherwise emit a target independent node to be legalised. + Ops.push_back(Builder.getValue(Call.getArgOperand(I))); + } } } @@ -9359,20 +9368,7 @@ void SelectionDAGBuilder::visitStackmap(const CallInst &CI) { Ops.push_back(ShadConst); // Add the live variables. - for (unsigned I = 2; I < CI.arg_size(); I++) { - SDValue Op = getValue(CI.getArgOperand(I)); - - // Things on the stack are pointer-typed, meaning that they are already - // legal and can be emitted directly to target nodes. - if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Op)) { - const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - Ops.push_back(DAG.getTargetFrameIndex( - FI->getIndex(), TLI.getFrameIndexTy(DAG.getDataLayout()))); - } else { - // Otherwise emit a target independent node to be legalised. - Ops.push_back(getValue(CI.getArgOperand(I))); - } - } + addStackMapLiveVars(CI, 2, DL, Ops, *this); // Create the STACKMAP node. SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); @@ -9449,6 +9445,19 @@ void SelectionDAGBuilder::visitPatchpoint(const CallBase &CB, // Replace the target specific call node with the patchable intrinsic. SmallVector<SDValue, 8> Ops; + // Push the chain. + Ops.push_back(*(Call->op_begin())); + + // Optionally, push the glue (if any). + if (HasGlue) + Ops.push_back(*(Call->op_end() - 1)); + + // Push the register mask info. + if (HasGlue) + Ops.push_back(*(Call->op_end() - 2)); + else + Ops.push_back(*(Call->op_end() - 1)); + // Add the <id> and <numBytes> constants. SDValue IDVal = getValue(CB.getArgOperand(PatchPointOpers::IDPos)); Ops.push_back(DAG.getTargetConstant( @@ -9477,27 +9486,13 @@ void SelectionDAGBuilder::visitPatchpoint(const CallBase &CB, for (unsigned i = NumMetaOpers, e = NumMetaOpers + NumArgs; i != e; ++i) Ops.push_back(getValue(CB.getArgOperand(i))); - // Push the arguments from the call instruction up to the register mask. + // Push the arguments from the call instruction. SDNode::op_iterator e = HasGlue ? Call->op_end()-2 : Call->op_end()-1; Ops.append(Call->op_begin() + 2, e); // Push live variables for the stack map. addStackMapLiveVars(CB, NumMetaOpers + NumArgs, dl, Ops, *this); - // Push the register mask info. - if (HasGlue) - Ops.push_back(*(Call->op_end()-2)); - else - Ops.push_back(*(Call->op_end()-1)); - - // Push the chain (this is originally the first operand of the call, but - // becomes now the last or second to last operand). - Ops.push_back(*(Call->op_begin())); - - // Push the glue flag (last operand). - if (HasGlue) - Ops.push_back(*(Call->op_end()-1)); - SDVTList NodeTys; if (IsAnyRegCC && HasDef) { // Create the return types based on the intrinsic definition @@ -9514,13 +9509,12 @@ void SelectionDAGBuilder::visitPatchpoint(const CallBase &CB, NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); // Replace the target specific call node with a PATCHPOINT node. - MachineSDNode *MN = DAG.getMachineNode(TargetOpcode::PATCHPOINT, - dl, NodeTys, Ops); + SDValue PPV = DAG.getNode(ISD::PATCHPOINT, dl, NodeTys, Ops); // Update the NodeMap. if (HasDef) { if (IsAnyRegCC) - setValue(&CB, SDValue(MN, 0)); + setValue(&CB, SDValue(PPV.getNode(), 0)); else setValue(&CB, Result.first); } @@ -9531,10 +9525,10 @@ void SelectionDAGBuilder::visitPatchpoint(const CallBase &CB, // value. if (IsAnyRegCC && HasDef) { SDValue From[] = {SDValue(Call, 0), SDValue(Call, 1)}; - SDValue To[] = {SDValue(MN, 1), SDValue(MN, 2)}; + SDValue To[] = {PPV.getValue(1), PPV.getValue(2)}; DAG.ReplaceAllUsesOfValuesWith(From, To, 2); } else - DAG.ReplaceAllUsesWith(Call, MN); + DAG.ReplaceAllUsesWith(Call, PPV.getNode()); DAG.DeleteNode(Call); // Inform the Frame Information that we have a patchpoint in this function. diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp index 9df0b64c26c3..6ba01664e756 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp @@ -488,6 +488,8 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::VECREDUCE_FMIN: return "vecreduce_fmin"; case ISD::STACKMAP: return "stackmap"; + case ISD::PATCHPOINT: + return "patchpoint"; // Vector Predication #define BEGIN_REGISTER_VP_SDNODE(SDID, LEGALARG, NAME, ...) \ diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp index 7f453f081982..d46a0a23cca3 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -2193,8 +2193,27 @@ void SelectionDAGISel::Select_ARITH_FENCE(SDNode *N) { N->getOperand(0)); } +void SelectionDAGISel::pushStackMapLiveVariable(SmallVectorImpl<SDValue> &Ops, + SDValue OpVal, SDLoc DL) { + SDNode *OpNode = OpVal.getNode(); + + // FrameIndex nodes should have been directly emitted to TargetFrameIndex + // nodes at DAG-construction time. + assert(OpNode->getOpcode() != ISD::FrameIndex); + + if (OpNode->getOpcode() == ISD::Constant) { + Ops.push_back( + CurDAG->getTargetConstant(StackMaps::ConstantOp, DL, MVT::i64)); + Ops.push_back( + CurDAG->getTargetConstant(cast<ConstantSDNode>(OpNode)->getZExtValue(), + DL, OpVal.getValueType())); + } else { + Ops.push_back(OpVal); + } +} + void SelectionDAGISel::Select_STACKMAP(SDNode *N) { - std::vector<SDValue> Ops; + SmallVector<SDValue, 32> Ops; auto *It = N->op_begin(); SDLoc DL(N); @@ -2213,24 +2232,8 @@ void SelectionDAGISel::Select_STACKMAP(SDNode *N) { Ops.push_back(Shad); // Live variable operands. - for (; It != N->op_end(); It++) { - SDNode *OpNode = It->getNode(); - SDValue O; - - // FrameIndex nodes should have been directly emitted to TargetFrameIndex - // nodes at DAG-construction time. - assert(OpNode->getOpcode() != ISD::FrameIndex); - - if (OpNode->getOpcode() == ISD::Constant) { - Ops.push_back( - CurDAG->getTargetConstant(StackMaps::ConstantOp, DL, MVT::i64)); - O = CurDAG->getTargetConstant( - cast<ConstantSDNode>(OpNode)->getZExtValue(), DL, It->getValueType()); - } else { - O = *It; - } - Ops.push_back(O); - } + for (; It != N->op_end(); It++) + pushStackMapLiveVariable(Ops, *It, DL); Ops.push_back(Chain); Ops.push_back(InFlag); @@ -2239,6 +2242,57 @@ void SelectionDAGISel::Select_STACKMAP(SDNode *N) { CurDAG->SelectNodeTo(N, TargetOpcode::STACKMAP, NodeTys, Ops); } +void SelectionDAGISel::Select_PATCHPOINT(SDNode *N) { + SmallVector<SDValue, 32> Ops; + auto *It = N->op_begin(); + SDLoc DL(N); + + // Cache arguments that will be moved to the end in the target node. + SDValue Chain = *It++; + Optional<SDValue> Glue; + if (It->getValueType() == MVT::Glue) + Glue = *It++; + SDValue RegMask = *It++; + + // <id> operand. + SDValue ID = *It++; + assert(ID.getValueType() == MVT::i64); + Ops.push_back(ID); + + // <numShadowBytes> operand. + SDValue Shad = *It++; + assert(Shad.getValueType() == MVT::i32); + Ops.push_back(Shad); + + // Add the callee. + Ops.push_back(*It++); + + // Add <numArgs>. + SDValue NumArgs = *It++; + assert(NumArgs.getValueType() == MVT::i32); + Ops.push_back(NumArgs); + + // Calling convention. + Ops.push_back(*It++); + + // Push the args for the call. + for (uint64_t I = cast<ConstantSDNode>(NumArgs)->getZExtValue(); I != 0; I--) + Ops.push_back(*It++); + + // Now push the live variables. + for (; It != N->op_end(); It++) + pushStackMapLiveVariable(Ops, *It, DL); + + // Finally, the regmask, chain and (if present) glue are moved to the end. + Ops.push_back(RegMask); + Ops.push_back(Chain); + if (Glue.has_value()) + Ops.push_back(Glue.value()); + + SDVTList NodeTys = N->getVTList(); + CurDAG->SelectNodeTo(N, TargetOpcode::PATCHPOINT, NodeTys, Ops); +} + /// GetVBR - decode a vbr encoding whose top bit is set. LLVM_ATTRIBUTE_ALWAYS_INLINE static uint64_t GetVBR(uint64_t Val, const unsigned char *MatcherTable, unsigned &Idx) { @@ -2796,6 +2850,9 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch, case ISD::STACKMAP: Select_STACKMAP(NodeToMatch); return; + case ISD::PATCHPOINT: + Select_PATCHPOINT(NodeToMatch); + return; } assert(!NodeToMatch->isMachineOpcode() && "Node already selected!"); diff --git a/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp index 3061158eea30..c5c093ae228f 100644 --- a/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp @@ -169,8 +169,14 @@ static Optional<int> findPreviousSpillSlot(const Value *Val, // Spill location is known for gc relocates if (const auto *Relocate = dyn_cast<GCRelocateInst>(Val)) { - const auto &RelocationMap = - Builder.FuncInfo.StatepointRelocationMaps[Relocate->getStatepoint()]; + const Value *Statepoint = Relocate->getStatepoint(); + assert((isa<GCStatepointInst>(Statepoint) || isa<UndefValue>(Statepoint)) && + "GetStatepoint must return one of two types"); + if (isa<UndefValue>(Statepoint)) + return None; + + const auto &RelocationMap = Builder.FuncInfo.StatepointRelocationMaps + [cast<GCStatepointInst>(Statepoint)]; auto It = RelocationMap.find(Relocate); if (It == RelocationMap.end()) @@ -193,7 +199,7 @@ static Optional<int> findPreviousSpillSlot(const Value *Val, if (const PHINode *Phi = dyn_cast<PHINode>(Val)) { Optional<int> MergedResult = None; - for (auto &IncomingValue : Phi->incoming_values()) { + for (const auto &IncomingValue : Phi->incoming_values()) { Optional<int> SpillSlot = findPreviousSpillSlot(IncomingValue, Builder, LookUpDepth - 1); if (!SpillSlot) @@ -569,9 +575,10 @@ lowerStatepointMetaArgs(SmallVectorImpl<SDValue> &Ops, // We cannot assing them to VRegs. SmallSet<SDValue, 8> LPadPointers; if (!UseRegistersForGCPointersInLandingPad) - if (auto *StInvoke = dyn_cast_or_null<InvokeInst>(SI.StatepointInstr)) { + if (const auto *StInvoke = + dyn_cast_or_null<InvokeInst>(SI.StatepointInstr)) { LandingPadInst *LPI = StInvoke->getLandingPadInst(); - for (auto *Relocate : SI.GCRelocates) + for (const auto *Relocate : SI.GCRelocates) if (Relocate->getOperand(0) == LPI) { LPadPointers.insert(Builder.getValue(Relocate->getBasePtr())); LPadPointers.insert(Builder.getValue(Relocate->getDerivedPtr())); @@ -739,7 +746,7 @@ SDValue SelectionDAGBuilder::LowerAsSTATEPOINT( LLVM_DEBUG(dbgs() << "Lowering statepoint " << *SI.StatepointInstr << "\n"); #ifndef NDEBUG - for (auto *Reloc : SI.GCRelocates) + for (const auto *Reloc : SI.GCRelocates) if (Reloc->getParent() == SI.StatepointInstr->getParent()) StatepointLowering.scheduleRelocCall(*Reloc); #endif @@ -1017,7 +1024,7 @@ SDValue SelectionDAGBuilder::LowerAsSTATEPOINT( static std::pair<const GCResultInst*, const GCResultInst*> getGCResultLocality(const GCStatepointInst &S) { std::pair<const GCResultInst *, const GCResultInst*> Res(nullptr, nullptr); - for (auto *U : S.users()) { + for (const auto *U : S.users()) { auto *GRI = dyn_cast<GCResultInst>(U); if (!GRI) continue; @@ -1195,9 +1202,13 @@ void SelectionDAGBuilder::LowerCallSiteWithDeoptBundle( void SelectionDAGBuilder::visitGCResult(const GCResultInst &CI) { // The result value of the gc_result is simply the result of the actual // call. We've already emitted this, so just grab the value. - const GCStatepointInst *SI = CI.getStatepoint(); + const Value *SI = CI.getStatepoint(); + assert((isa<GCStatepointInst>(SI) || isa<UndefValue>(SI)) && + "GetStatepoint must return one of two types"); + if (isa<UndefValue>(SI)) + return; - if (SI->getParent() == CI.getParent()) { + if (cast<GCStatepointInst>(SI)->getParent() == CI.getParent()) { setValue(&CI, getValue(SI)); return; } @@ -1215,12 +1226,18 @@ void SelectionDAGBuilder::visitGCResult(const GCResultInst &CI) { } void SelectionDAGBuilder::visitGCRelocate(const GCRelocateInst &Relocate) { + const Value *Statepoint = Relocate.getStatepoint(); #ifndef NDEBUG // Consistency check // We skip this check for relocates not in the same basic block as their // statepoint. It would be too expensive to preserve validation info through // different basic blocks. - if (Relocate.getStatepoint()->getParent() == Relocate.getParent()) + assert((isa<GCStatepointInst>(Statepoint) || isa<UndefValue>(Statepoint)) && + "GetStatepoint must return one of two types"); + if (isa<UndefValue>(Statepoint)) + return; + + if (cast<GCStatepointInst>(Statepoint)->getParent() == Relocate.getParent()) StatepointLowering.relocCallVisited(Relocate); auto *Ty = Relocate.getType()->getScalarType(); @@ -1230,14 +1247,15 @@ void SelectionDAGBuilder::visitGCRelocate(const GCRelocateInst &Relocate) { const Value *DerivedPtr = Relocate.getDerivedPtr(); auto &RelocationMap = - FuncInfo.StatepointRelocationMaps[Relocate.getStatepoint()]; + FuncInfo.StatepointRelocationMaps[cast<GCStatepointInst>(Statepoint)]; auto SlotIt = RelocationMap.find(&Relocate); assert(SlotIt != RelocationMap.end() && "Relocating not lowered gc value"); const RecordType &Record = SlotIt->second; // If relocation was done via virtual register.. if (Record.type == RecordType::SDValueNode) { - assert(Relocate.getStatepoint()->getParent() == Relocate.getParent() && + assert(cast<GCStatepointInst>(Statepoint)->getParent() == + Relocate.getParent() && "Nonlocal gc.relocate mapped via SDValue"); SDValue SDV = StatepointLowering.getLocation(getValue(DerivedPtr)); assert(SDV.getNode() && "empty SDValue"); diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 66389a57f780..cd4f0ae42bcd 100644 --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -1056,13 +1056,13 @@ bool TargetLowering::SimplifyDemandedBits( // TODO: We can probably do more work on calculating the known bits and // simplifying the operations for scalable vectors, but for now we just // bail out. - if (Op.getValueType().isScalableVector()) + EVT VT = Op.getValueType(); + if (VT.isScalableVector()) return false; bool IsLE = TLO.DAG.getDataLayout().isLittleEndian(); unsigned NumElts = OriginalDemandedElts.getBitWidth(); - assert((!Op.getValueType().isVector() || - NumElts == Op.getValueType().getVectorNumElements()) && + assert((!VT.isVector() || NumElts == VT.getVectorNumElements()) && "Unexpected vector size"); APInt DemandedBits = OriginalDemandedBits; @@ -1088,7 +1088,6 @@ bool TargetLowering::SimplifyDemandedBits( } // Other users may use these bits. - EVT VT = Op.getValueType(); if (!Op.getNode()->hasOneUse() && !AssumeSingleUse) { if (Depth != 0) { // If not at the root, Just compute the Known bits to @@ -1468,6 +1467,33 @@ bool TargetLowering::SimplifyDemandedBits( } } + // (or (and X, C1), (and (or X, Y), C2)) -> (or (and X, C1|C2), (and Y, C2)) + // TODO: Use SimplifyMultipleUseDemandedBits to peek through masks. + if (Op0.getOpcode() == ISD::AND && Op1.getOpcode() == ISD::AND && + Op0->hasOneUse() && Op1->hasOneUse()) { + // Attempt to match all commutations - m_c_Or would've been useful! + for (int I = 0; I != 2; ++I) { + SDValue X = Op.getOperand(I).getOperand(0); + SDValue C1 = Op.getOperand(I).getOperand(1); + SDValue Alt = Op.getOperand(1 - I).getOperand(0); + SDValue C2 = Op.getOperand(1 - I).getOperand(1); + if (Alt.getOpcode() == ISD::OR) { + for (int J = 0; J != 2; ++J) { + if (X == Alt.getOperand(J)) { + SDValue Y = Alt.getOperand(1 - J); + if (SDValue C12 = TLO.DAG.FoldConstantArithmetic(ISD::OR, dl, VT, + {C1, C2})) { + SDValue MaskX = TLO.DAG.getNode(ISD::AND, dl, VT, X, C12); + SDValue MaskY = TLO.DAG.getNode(ISD::AND, dl, VT, Y, C2); + return TLO.CombineTo( + Op, TLO.DAG.getNode(ISD::OR, dl, VT, MaskX, MaskY)); + } + } + } + } + } + } + Known |= Known2; break; } @@ -1500,7 +1526,7 @@ bool TargetLowering::SimplifyDemandedBits( if (DemandedBits.isSubsetOf(Known.Zero | Known2.Zero)) return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::OR, dl, VT, Op0, Op1)); - ConstantSDNode* C = isConstOrConstSplat(Op1, DemandedElts); + ConstantSDNode *C = isConstOrConstSplat(Op1, DemandedElts); if (C) { // If one side is a constant, and all of the set bits in the constant are // also known set on the other side, turn this into an AND, as we know @@ -1521,6 +1547,32 @@ bool TargetLowering::SimplifyDemandedBits( SDValue New = TLO.DAG.getNOT(dl, Op0, VT); return TLO.CombineTo(Op, New); } + + unsigned Op0Opcode = Op0.getOpcode(); + if ((Op0Opcode == ISD::SRL || Op0Opcode == ISD::SHL) && Op0.hasOneUse()) { + if (ConstantSDNode *ShiftC = + isConstOrConstSplat(Op0.getOperand(1), DemandedElts)) { + // Don't crash on an oversized shift. We can not guarantee that a + // bogus shift has been simplified to undef. + if (ShiftC->getAPIntValue().ult(BitWidth)) { + uint64_t ShiftAmt = ShiftC->getZExtValue(); + APInt Ones = APInt::getAllOnes(BitWidth); + Ones = Op0Opcode == ISD::SHL ? Ones.shl(ShiftAmt) + : Ones.lshr(ShiftAmt); + const TargetLowering &TLI = TLO.DAG.getTargetLoweringInfo(); + if ((DemandedBits & C->getAPIntValue()) == (DemandedBits & Ones) && + TLI.isDesirableToCommuteXorWithShift(Op.getNode())) { + // If the xor constant is a demanded mask, do a 'not' before the + // shift: + // xor (X << ShiftC), XorC --> (not X) << ShiftC + // xor (X >> ShiftC), XorC --> (not X) >> ShiftC + SDValue Not = TLO.DAG.getNOT(dl, Op0.getOperand(0), VT); + return TLO.CombineTo(Op, TLO.DAG.getNode(Op0Opcode, dl, VT, Not, + Op0.getOperand(1))); + } + } + } + } } // If we can't turn this into a 'not', try to shrink the constant. @@ -1723,6 +1775,26 @@ bool TargetLowering::SimplifyDemandedBits( if ((ShAmt < DemandedBits.getActiveBits()) && ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO)) return true; + } else { + // This is a variable shift, so we can't shift the demand mask by a known + // amount. But if we are not demanding high bits, then we are not + // demanding those bits from the pre-shifted operand either. + if (unsigned CTLZ = DemandedBits.countLeadingZeros()) { + APInt DemandedFromOp(APInt::getLowBitsSet(BitWidth, BitWidth - CTLZ)); + if (SimplifyDemandedBits(Op0, DemandedFromOp, DemandedElts, Known, TLO, + Depth + 1)) { + SDNodeFlags Flags = Op.getNode()->getFlags(); + if (Flags.hasNoSignedWrap() || Flags.hasNoUnsignedWrap()) { + // Disable the nsw and nuw flags. We can no longer guarantee that we + // won't wrap after simplification. + Flags.setNoSignedWrap(false); + Flags.setNoUnsignedWrap(false); + Op->setFlags(Flags); + } + return true; + } + Known.resetAll(); + } } // If we are only demanding sign bits then we can use the shift source @@ -3292,6 +3364,12 @@ bool TargetLowering::SimplifyDemandedVectorElts( TLO, Depth + 1)) return true; + // If every element pair has a zero/undef then just fold to zero. + // fold (and x, undef) -> 0 / (and x, 0) -> 0 + // fold (mul x, undef) -> 0 / (mul x, 0) -> 0 + if (DemandedElts.isSubsetOf(SrcZero | KnownZero | SrcUndef | KnownUndef)) + return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT)); + // If either side has a zero element, then the result element is zero, even // if the other is an UNDEF. // TODO: Extend getKnownUndefForVectorBinop to also deal with known zeros @@ -3301,7 +3379,6 @@ bool TargetLowering::SimplifyDemandedVectorElts( KnownUndef &= ~KnownZero; // Attempt to avoid multi-use ops if we don't need anything from them. - // TODO - use KnownUndef to relax the demandedelts? if (!DemandedElts.isAllOnes()) if (SimplifyDemandedVectorEltsBinOp(Op0, Op1)) return true; @@ -5204,6 +5281,7 @@ TargetLowering::ParseConstraints(const DataLayout &DL, // ConstraintOperands list. unsigned ArgNo = 0; // ArgNo - The argument of the CallInst. unsigned ResNo = 0; // ResNo - The result number of the next output. + unsigned LabelNo = 0; // LabelNo - CallBr indirect dest number. for (InlineAsm::ConstraintInfo &CI : IA->ParseConstraints()) { ConstraintOperands.emplace_back(std::move(CI)); @@ -5240,6 +5318,14 @@ TargetLowering::ParseConstraints(const DataLayout &DL, case InlineAsm::isInput: OpInfo.CallOperandVal = Call.getArgOperand(ArgNo); break; + case InlineAsm::isLabel: + OpInfo.CallOperandVal = + cast<CallBrInst>(&Call)->getBlockAddressForIndirectDest(LabelNo); + OpInfo.ConstraintVT = + getAsmOperandValueType(DL, OpInfo.CallOperandVal->getType()) + .getSimpleVT(); + ++LabelNo; + continue; case InlineAsm::isClobber: // Nothing to do. break; @@ -5852,22 +5938,22 @@ SDValue TargetLowering::BuildUDIV(SDNode *N, SelectionDAG &DAG, // FIXME: We should use a narrower constant when the upper // bits are known to be zero. const APInt& Divisor = C->getAPIntValue(); - UnsignedDivisonByConstantInfo magics = UnsignedDivisonByConstantInfo::get(Divisor); + UnsignedDivisionByConstantInfo magics = + UnsignedDivisionByConstantInfo::get(Divisor); unsigned PreShift = 0, PostShift = 0; // If the divisor is even, we can avoid using the expensive fixup by // shifting the divided value upfront. - if (magics.IsAdd != 0 && !Divisor[0]) { + if (magics.IsAdd && !Divisor[0]) { PreShift = Divisor.countTrailingZeros(); // Get magic number for the shifted divisor. - magics = UnsignedDivisonByConstantInfo::get(Divisor.lshr(PreShift), PreShift); - assert(magics.IsAdd == 0 && "Should use cheap fixup now"); + magics = + UnsignedDivisionByConstantInfo::get(Divisor.lshr(PreShift), PreShift); + assert(!magics.IsAdd && "Should use cheap fixup now"); } - APInt Magic = magics.Magic; - unsigned SelNPQ; - if (magics.IsAdd == 0 || Divisor.isOne()) { + if (!magics.IsAdd || Divisor.isOne()) { assert(magics.ShiftAmount < Divisor.getBitWidth() && "We shouldn't generate an undefined shift!"); PostShift = magics.ShiftAmount; @@ -5878,7 +5964,7 @@ SDValue TargetLowering::BuildUDIV(SDNode *N, SelectionDAG &DAG, } PreShifts.push_back(DAG.getConstant(PreShift, dl, ShSVT)); - MagicFactors.push_back(DAG.getConstant(Magic, dl, SVT)); + MagicFactors.push_back(DAG.getConstant(magics.Magic, dl, SVT)); NPQFactors.push_back( DAG.getConstant(SelNPQ ? APInt::getOneBitSet(EltBits, EltBits - 1) : APInt::getZero(EltBits), diff --git a/llvm/lib/CodeGen/SlotIndexes.cpp b/llvm/lib/CodeGen/SlotIndexes.cpp index ffac68a223bf..ee3a0164564e 100644 --- a/llvm/lib/CodeGen/SlotIndexes.cpp +++ b/llvm/lib/CodeGen/SlotIndexes.cpp @@ -179,21 +179,12 @@ void SlotIndexes::renumberIndexes(IndexList::iterator curItr) { void SlotIndexes::repairIndexesInRange(MachineBasicBlock *MBB, MachineBasicBlock::iterator Begin, MachineBasicBlock::iterator End) { - // FIXME: Is this really necessary? The only caller repairIntervalsForRange() - // does the same thing. - // Find anchor points, which are at the beginning/end of blocks or at - // instructions that already have indexes. - while (Begin != MBB->begin() && !hasIndex(*Begin)) - --Begin; - while (End != MBB->end() && !hasIndex(*End)) - ++End; - bool includeStart = (Begin == MBB->begin()); SlotIndex startIdx; if (includeStart) startIdx = getMBBStartIdx(MBB); else - startIdx = getInstructionIndex(*Begin); + startIdx = getInstructionIndex(*--Begin); SlotIndex endIdx; if (End == MBB->end()) diff --git a/llvm/lib/CodeGen/SplitKit.cpp b/llvm/lib/CodeGen/SplitKit.cpp index 140a91ae342b..94149f56e703 100644 --- a/llvm/lib/CodeGen/SplitKit.cpp +++ b/llvm/lib/CodeGen/SplitKit.cpp @@ -347,13 +347,11 @@ void SplitAnalysis::analyze(const LiveInterval *li) { //===----------------------------------------------------------------------===// /// Create a new SplitEditor for editing the LiveInterval analyzed by SA. -SplitEditor::SplitEditor(SplitAnalysis &SA, AliasAnalysis &AA, - LiveIntervals &LIS, VirtRegMap &VRM, +SplitEditor::SplitEditor(SplitAnalysis &SA, LiveIntervals &LIS, VirtRegMap &VRM, MachineDominatorTree &MDT, MachineBlockFrequencyInfo &MBFI, VirtRegAuxInfo &VRAI) - : SA(SA), AA(AA), LIS(LIS), VRM(VRM), - MRI(VRM.getMachineFunction().getRegInfo()), MDT(MDT), - TII(*VRM.getMachineFunction().getSubtarget().getInstrInfo()), + : SA(SA), LIS(LIS), VRM(VRM), MRI(VRM.getMachineFunction().getRegInfo()), + MDT(MDT), TII(*VRM.getMachineFunction().getSubtarget().getInstrInfo()), TRI(*VRM.getMachineFunction().getSubtarget().getRegisterInfo()), MBFI(MBFI), VRAI(VRAI), RegAssign(Allocator) {} @@ -371,9 +369,7 @@ void SplitEditor::reset(LiveRangeEdit &LRE, ComplementSpillMode SM) { LICalc[1].reset(&VRM.getMachineFunction(), LIS.getSlotIndexes(), &MDT, &LIS.getVNInfoAllocator()); - // We don't need an AliasAnalysis since we will only be performing - // cheap-as-a-copy remats anyway. - Edit->anyRematerializable(nullptr); + Edit->anyRematerializable(); } #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) @@ -1454,7 +1450,7 @@ void SplitEditor::deleteRematVictims() { if (Dead.empty()) return; - Edit->eliminateDeadDefs(Dead, None, &AA); + Edit->eliminateDeadDefs(Dead, None); } void SplitEditor::forceRecomputeVNI(const VNInfo &ParentVNI) { diff --git a/llvm/lib/CodeGen/SplitKit.h b/llvm/lib/CodeGen/SplitKit.h index 4400a797d38e..556b022b93fb 100644 --- a/llvm/lib/CodeGen/SplitKit.h +++ b/llvm/lib/CodeGen/SplitKit.h @@ -257,7 +257,6 @@ public: /// class LLVM_LIBRARY_VISIBILITY SplitEditor { SplitAnalysis &SA; - AAResults &AA; LiveIntervals &LIS; VirtRegMap &VRM; MachineRegisterInfo &MRI; @@ -436,9 +435,9 @@ private: public: /// Create a new SplitEditor for editing the LiveInterval analyzed by SA. /// Newly created intervals will be appended to newIntervals. - SplitEditor(SplitAnalysis &SA, AAResults &AA, LiveIntervals &LIS, - VirtRegMap &VRM, MachineDominatorTree &MDT, - MachineBlockFrequencyInfo &MBFI, VirtRegAuxInfo &VRAI); + SplitEditor(SplitAnalysis &SA, LiveIntervals &LIS, VirtRegMap &VRM, + MachineDominatorTree &MDT, MachineBlockFrequencyInfo &MBFI, + VirtRegAuxInfo &VRAI); /// reset - Prepare for a new split. void reset(LiveRangeEdit&, ComplementSpillMode = SM_Partition); diff --git a/llvm/lib/CodeGen/StackMaps.cpp b/llvm/lib/CodeGen/StackMaps.cpp index 6757d6ca4f88..ccaff862fa3f 100644 --- a/llvm/lib/CodeGen/StackMaps.cpp +++ b/llvm/lib/CodeGen/StackMaps.cpp @@ -365,7 +365,7 @@ StackMaps::parseRegisterLiveOutMask(const uint32_t *Mask) const { }); for (auto I = LiveOuts.begin(), E = LiveOuts.end(); I != E; ++I) { - for (auto II = std::next(I); II != E; ++II) { + for (auto *II = std::next(I); II != E; ++II) { if (I->DwarfRegNum != II->DwarfRegNum) { // Skip all the now invalid entries. I = --II; diff --git a/llvm/lib/CodeGen/SwiftErrorValueTracking.cpp b/llvm/lib/CodeGen/SwiftErrorValueTracking.cpp index 4408011c95c0..2282d53e8ffd 100644 --- a/llvm/lib/CodeGen/SwiftErrorValueTracking.cpp +++ b/llvm/lib/CodeGen/SwiftErrorValueTracking.cpp @@ -267,7 +267,7 @@ void SwiftErrorValueTracking::preassignVRegs( if (auto *CB = dyn_cast<CallBase>(&*It)) { // A call-site with a swifterror argument is both use and def. const Value *SwiftErrorAddr = nullptr; - for (auto &Arg : CB->args()) { + for (const auto &Arg : CB->args()) { if (!Arg->isSwiftError()) continue; // Use of swifterror. diff --git a/llvm/lib/CodeGen/TailDuplicator.cpp b/llvm/lib/CodeGen/TailDuplicator.cpp index ba533a491b9c..18507b8fa84f 100644 --- a/llvm/lib/CodeGen/TailDuplicator.cpp +++ b/llvm/lib/CodeGen/TailDuplicator.cpp @@ -653,7 +653,7 @@ bool TailDuplicator::shouldTailDuplicate(bool IsSimple, // demonstrated by test/CodeGen/Hexagon/tail-dup-subreg-abort.ll. // Disable tail duplication for this case for now, until the problem is // fixed. - for (auto SB : TailBB.successors()) { + for (auto *SB : TailBB.successors()) { for (auto &I : *SB) { if (!I.isPHI()) break; diff --git a/llvm/lib/CodeGen/TargetInstrInfo.cpp b/llvm/lib/CodeGen/TargetInstrInfo.cpp index 2a987ee3eedf..4116231c005f 100644 --- a/llvm/lib/CodeGen/TargetInstrInfo.cpp +++ b/llvm/lib/CodeGen/TargetInstrInfo.cpp @@ -916,7 +916,7 @@ void TargetInstrInfo::genAlternativeCodeSequence( } bool TargetInstrInfo::isReallyTriviallyReMaterializableGeneric( - const MachineInstr &MI, AAResults *AA) const { + const MachineInstr &MI) const { const MachineFunction &MF = *MI.getMF(); const MachineRegisterInfo &MRI = MF.getRegInfo(); @@ -952,7 +952,7 @@ bool TargetInstrInfo::isReallyTriviallyReMaterializableGeneric( return false; // Avoid instructions which load from potentially varying memory. - if (MI.mayLoad() && !MI.isDereferenceableInvariantLoad(AA)) + if (MI.mayLoad() && !MI.isDereferenceableInvariantLoad()) return false; // If any of the registers accessed are non-constant, conservatively assume diff --git a/llvm/lib/CodeGen/TargetLoweringBase.cpp b/llvm/lib/CodeGen/TargetLoweringBase.cpp index 6a595a4c748b..a342a4dd1e25 100644 --- a/llvm/lib/CodeGen/TargetLoweringBase.cpp +++ b/llvm/lib/CodeGen/TargetLoweringBase.cpp @@ -1145,7 +1145,7 @@ static unsigned getVectorTypeBreakdownMVT(MVT VT, MVT &IntermediateVT, /// specified register class are all legal. bool TargetLoweringBase::isLegalRC(const TargetRegisterInfo &TRI, const TargetRegisterClass &RC) const { - for (auto I = TRI.legalclasstypes_begin(RC); *I != MVT::Other; ++I) + for (const auto *I = TRI.legalclasstypes_begin(RC); *I != MVT::Other; ++I) if (isTypeLegal(*I)) return true; return false; diff --git a/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp b/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp index c44fd9f97383..17fe819fa900 100644 --- a/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp +++ b/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp @@ -1450,9 +1450,9 @@ void TwoAddressInstructionPass::processTiedPairs(MachineInstr *MI, TiedPairList &TiedPairs, unsigned &Dist) { - bool IsEarlyClobber = llvm::find_if(TiedPairs, [MI](auto const &TP) { - return MI->getOperand(TP.second).isEarlyClobber(); - }) != TiedPairs.end(); + bool IsEarlyClobber = llvm::any_of(TiedPairs, [MI](auto const &TP) { + return MI->getOperand(TP.second).isEarlyClobber(); + }); bool RemovedKillFlag = false; bool AllUsesCopied = true; diff --git a/llvm/lib/CodeGen/TypePromotion.cpp b/llvm/lib/CodeGen/TypePromotion.cpp index 166a3c413f6a..8dc8d381ad16 100644 --- a/llvm/lib/CodeGen/TypePromotion.cpp +++ b/llvm/lib/CodeGen/TypePromotion.cpp @@ -446,7 +446,7 @@ void IRPromoter::ExtendSources() { // Now, insert extending instructions between the sources and their users. LLVM_DEBUG(dbgs() << "IR Promotion: Promoting sources:\n"); - for (auto V : Sources) { + for (auto *V : Sources) { LLVM_DEBUG(dbgs() << " - " << *V << "\n"); if (auto *I = dyn_cast<Instruction>(V)) InsertZExt(I, I); @@ -524,7 +524,7 @@ void IRPromoter::TruncateSinks() { // Fix up any stores or returns that use the results of the promoted // chain. - for (auto I : Sinks) { + for (auto *I : Sinks) { LLVM_DEBUG(dbgs() << "IR Promotion: For Sink: " << *I << "\n"); // Handle calls separately as we need to iterate over arg operands. @@ -570,7 +570,7 @@ void IRPromoter::Cleanup() { LLVM_DEBUG(dbgs() << "IR Promotion: Cleanup..\n"); // Some zexts will now have become redundant, along with their trunc // operands, so remove them - for (auto V : Visited) { + for (auto *V : Visited) { if (!isa<ZExtInst>(V)) continue; diff --git a/llvm/lib/CodeGen/VLIWMachineScheduler.cpp b/llvm/lib/CodeGen/VLIWMachineScheduler.cpp index 8b5b585090f5..8225d4ea6996 100644 --- a/llvm/lib/CodeGen/VLIWMachineScheduler.cpp +++ b/llvm/lib/CodeGen/VLIWMachineScheduler.cpp @@ -579,7 +579,7 @@ static inline bool isSingleUnscheduledSucc(SUnit *SU, SUnit *SU2) { /// pressure, then return 0. int ConvergingVLIWScheduler::pressureChange(const SUnit *SU, bool isBotUp) { PressureDiff &PD = DAG->getPressureDiff(SU); - for (auto &P : PD) { + for (const auto &P : PD) { if (!P.isValid()) continue; // The pressure differences are computed bottom-up, so the comparision for diff --git a/llvm/lib/DWARFLinker/DWARFLinker.cpp b/llvm/lib/DWARFLinker/DWARFLinker.cpp index 298359dea9af..62b7f629f403 100644 --- a/llvm/lib/DWARFLinker/DWARFLinker.cpp +++ b/llvm/lib/DWARFLinker/DWARFLinker.cpp @@ -504,9 +504,14 @@ unsigned DWARFLinker::shouldKeepSubprogramDIE( &DIE); return Flags; } + if (*LowPc > *HighPc) { + reportWarning("low_pc greater than high_pc. Range will be discarded.\n", + File, &DIE); + return Flags; + } // Replace the debug map range with a more accurate one. - Ranges[*LowPc] = ObjFileAddressRange(*HighPc, MyInfo.AddrAdjust); + Ranges.insert({*LowPc, *HighPc}, MyInfo.AddrAdjust); Unit.addFunctionRange(*LowPc, *HighPc, MyInfo.AddrAdjust); return Flags; } @@ -1575,7 +1580,7 @@ void DWARFLinker::patchRangesForUnit(const CompileUnit &Unit, DWARFDataExtractor RangeExtractor(OrigDwarf.getDWARFObj(), OrigDwarf.getDWARFObj().getRangesSection(), OrigDwarf.isLittleEndian(), AddressSize); - auto InvalidRange = FunctionRanges.end(), CurrRange = InvalidRange; + Optional<std::pair<AddressRange, int64_t>> CurrRange; DWARFUnit &OrigUnit = Unit.getOrigUnit(); auto OrigUnitDie = OrigUnit.getUnitDIE(false); uint64_t OrigLowPc = @@ -1598,12 +1603,11 @@ void DWARFLinker::patchRangesForUnit(const CompileUnit &Unit, if (!Entries.empty()) { const DWARFDebugRangeList::RangeListEntry &First = Entries.front(); - if (CurrRange == InvalidRange || - First.StartAddress + OrigLowPc < CurrRange.start() || - First.StartAddress + OrigLowPc >= CurrRange.stop()) { - CurrRange = FunctionRanges.find(First.StartAddress + OrigLowPc); - if (CurrRange == InvalidRange || - CurrRange.start() > First.StartAddress + OrigLowPc) { + if (!CurrRange || + !CurrRange->first.contains(First.StartAddress + OrigLowPc)) { + CurrRange = FunctionRanges.getRangeValueThatContains( + First.StartAddress + OrigLowPc); + if (!CurrRange) { reportWarning("no mapping for range.", File); continue; } @@ -1710,7 +1714,7 @@ void DWARFLinker::patchLineTableForUnit(CompileUnit &Unit, // in NewRows. std::vector<DWARFDebugLine::Row> Seq; const auto &FunctionRanges = Unit.getFunctionRanges(); - auto InvalidRange = FunctionRanges.end(), CurrRange = InvalidRange; + Optional<std::pair<AddressRange, int64_t>> CurrRange; // FIXME: This logic is meant to generate exactly the same output as // Darwin's classic dsymutil. There is a nicer way to implement this @@ -1729,19 +1733,14 @@ void DWARFLinker::patchLineTableForUnit(CompileUnit &Unit, // it is marked as end_sequence in the input (because in that // case, the relocation offset is accurate and that entry won't // serve as the start of another function). - if (CurrRange == InvalidRange || Row.Address.Address < CurrRange.start() || - Row.Address.Address > CurrRange.stop() || - (Row.Address.Address == CurrRange.stop() && !Row.EndSequence)) { + if (!CurrRange || !CurrRange->first.contains(Row.Address.Address) || + (Row.Address.Address == CurrRange->first.end() && !Row.EndSequence)) { // We just stepped out of a known range. Insert a end_sequence // corresponding to the end of the range. - uint64_t StopAddress = CurrRange != InvalidRange - ? CurrRange.stop() + CurrRange.value() - : -1ULL; - CurrRange = FunctionRanges.find(Row.Address.Address); - bool CurrRangeValid = - CurrRange != InvalidRange && CurrRange.start() <= Row.Address.Address; - if (!CurrRangeValid) { - CurrRange = InvalidRange; + uint64_t StopAddress = + CurrRange ? CurrRange->first.end() + CurrRange->second : -1ULL; + CurrRange = FunctionRanges.getRangeValueThatContains(Row.Address.Address); + if (!CurrRange) { if (StopAddress != -1ULL) { // Try harder by looking in the Address ranges map. // There are corner cases where this finds a @@ -1749,14 +1748,9 @@ void DWARFLinker::patchLineTableForUnit(CompileUnit &Unit, // for now do as dsymutil. // FIXME: Understand exactly what cases this addresses and // potentially remove it along with the Ranges map. - auto Range = Ranges.lower_bound(Row.Address.Address); - if (Range != Ranges.begin() && Range != Ranges.end()) - --Range; - - if (Range != Ranges.end() && Range->first <= Row.Address.Address && - Range->second.HighPC >= Row.Address.Address) { - StopAddress = Row.Address.Address + Range->second.Offset; - } + if (Optional<std::pair<AddressRange, int64_t>> Range = + Ranges.getRangeValueThatContains(Row.Address.Address)) + StopAddress = Row.Address.Address + (*Range).second; } } if (StopAddress != -1ULL && !Seq.empty()) { @@ -1772,7 +1766,7 @@ void DWARFLinker::patchLineTableForUnit(CompileUnit &Unit, insertLineSequence(Seq, NewRows); } - if (!CurrRangeValid) + if (!CurrRange) continue; } @@ -1781,7 +1775,7 @@ void DWARFLinker::patchLineTableForUnit(CompileUnit &Unit, continue; // Relocate row address and add it to the current sequence. - Row.Address.Address += CurrRange.value(); + Row.Address.Address += CurrRange->second; Seq.emplace_back(Row); if (Row.EndSequence) @@ -1921,11 +1915,9 @@ void DWARFLinker::patchFrameInfoForObject(const DWARFFile &File, // the function entry point, thus we can't just lookup the address // in the debug map. Use the AddressInfo's range map to see if the FDE // describes something that we can relocate. - auto Range = Ranges.upper_bound(Loc); - if (Range != Ranges.begin()) - --Range; - if (Range == Ranges.end() || Range->first > Loc || - Range->second.HighPC <= Loc) { + Optional<std::pair<AddressRange, int64_t>> Range = + Ranges.getRangeValueThatContains(Loc); + if (!Range) { // The +4 is to account for the size of the InitialLength field itself. InputOffset = EntryOffset + InitialLength + 4; continue; @@ -1953,7 +1945,7 @@ void DWARFLinker::patchFrameInfoForObject(const DWARFFile &File, // fields that will get reconstructed by emitFDE(). unsigned FDERemainingBytes = InitialLength - (4 + AddrSize); TheDwarfEmitter->emitFDE(IteratorInserted.first->getValue(), AddrSize, - Loc + Range->second.Offset, + Loc + Range->second, FrameData.substr(InputOffset, FDERemainingBytes)); InputOffset += FDERemainingBytes; } diff --git a/llvm/lib/DWARFLinker/DWARFLinkerCompileUnit.cpp b/llvm/lib/DWARFLinker/DWARFLinkerCompileUnit.cpp index e9e8be7fd008..1cb20c0bb948 100644 --- a/llvm/lib/DWARFLinker/DWARFLinkerCompileUnit.cpp +++ b/llvm/lib/DWARFLinker/DWARFLinkerCompileUnit.cpp @@ -105,11 +105,7 @@ void CompileUnit::addLabelLowPc(uint64_t LabelLowPc, int64_t PcOffset) { void CompileUnit::addFunctionRange(uint64_t FuncLowPc, uint64_t FuncHighPc, int64_t PcOffset) { - // Don't add empty ranges to the interval map. They are a problem because - // the interval map expects half open intervals. This is safe because they - // are empty anyway. - if (FuncHighPc != FuncLowPc) - Ranges.insert(FuncLowPc, FuncHighPc, PcOffset); + Ranges.insert({FuncLowPc, FuncHighPc}, PcOffset); this->LowPc = std::min(LowPc, FuncLowPc + PcOffset); this->HighPc = std::max(HighPc, FuncHighPc + PcOffset); } diff --git a/llvm/lib/DWARFLinker/DWARFStreamer.cpp b/llvm/lib/DWARFLinker/DWARFStreamer.cpp index 55ff6b14f945..a00e51fcf135 100644 --- a/llvm/lib/DWARFLinker/DWARFStreamer.cpp +++ b/llvm/lib/DWARFLinker/DWARFStreamer.cpp @@ -321,13 +321,14 @@ void DwarfStreamer::emitSwiftReflectionSection( /// sized addresses describing the ranges. void DwarfStreamer::emitRangesEntries( int64_t UnitPcOffset, uint64_t OrigLowPc, - const FunctionIntervals::const_iterator &FuncRange, + Optional<std::pair<AddressRange, int64_t>> FuncRange, const std::vector<DWARFDebugRangeList::RangeListEntry> &Entries, unsigned AddressSize) { MS->switchSection(MC->getObjectFileInfo()->getDwarfRangesSection()); // Offset each range by the right amount. - int64_t PcOffset = Entries.empty() ? 0 : FuncRange.value() + UnitPcOffset; + int64_t PcOffset = + (Entries.empty() || !FuncRange) ? 0 : FuncRange->second + UnitPcOffset; for (const auto &Range : Entries) { if (Range.isBaseAddressSelectionEntry(AddressSize)) { warn("unsupported base address selection operation", @@ -339,8 +340,7 @@ void DwarfStreamer::emitRangesEntries( continue; // All range entries should lie in the function range. - if (!(Range.StartAddress + OrigLowPc >= FuncRange.start() && - Range.EndAddress + OrigLowPc <= FuncRange.stop())) + if (!FuncRange->first.contains(Range.StartAddress + OrigLowPc)) warn("inconsistent range data.", "emitting debug_ranges"); MS->emitIntValue(Range.StartAddress + PcOffset, AddressSize); MS->emitIntValue(Range.EndAddress + PcOffset, AddressSize); @@ -365,11 +365,13 @@ void DwarfStreamer::emitUnitRangesEntries(CompileUnit &Unit, // IntervalMap will have coalesced the non-linked ranges, but here // we want to coalesce the linked addresses. std::vector<std::pair<uint64_t, uint64_t>> Ranges; - const auto &FunctionRanges = Unit.getFunctionRanges(); - for (auto Range = FunctionRanges.begin(), End = FunctionRanges.end(); - Range != End; ++Range) - Ranges.push_back(std::make_pair(Range.start() + Range.value(), - Range.stop() + Range.value())); + const RangesTy &FunctionRanges = Unit.getFunctionRanges(); + for (size_t Idx = 0; Idx < FunctionRanges.size(); Idx++) { + std::pair<AddressRange, int64_t> CurRange = FunctionRanges[Idx]; + + Ranges.push_back(std::make_pair(CurRange.first.start() + CurRange.second, + CurRange.first.end() + CurRange.second)); + } // The object addresses where sorted, but again, the linked // addresses might end up in a different order. diff --git a/llvm/lib/DWP/DWP.cpp b/llvm/lib/DWP/DWP.cpp index 34615a73e328..44e39c019e0c 100644 --- a/llvm/lib/DWP/DWP.cpp +++ b/llvm/lib/DWP/DWP.cpp @@ -16,6 +16,7 @@ #include "llvm/MC/MCObjectFileInfo.h" #include "llvm/MC/MCTargetOptionsCommandFlags.h" #include "llvm/Object/Decompressor.h" +#include "llvm/Object/ELFObjectFile.h" #include "llvm/Support/MemoryBuffer.h" using namespace llvm; @@ -273,12 +274,16 @@ static Error createError(StringRef Name, Error E) { static Error handleCompressedSection(std::deque<SmallString<32>> &UncompressedSections, - StringRef &Name, StringRef &Contents) { - if (!Decompressor::isGnuStyle(Name)) + SectionRef Sec, StringRef Name, StringRef &Contents) { + auto *Obj = dyn_cast<ELFObjectFileBase>(Sec.getObject()); + if (!Obj || + !(static_cast<ELFSectionRef>(Sec).getFlags() & ELF::SHF_COMPRESSED)) return Error::success(); - - Expected<Decompressor> Dec = - Decompressor::create(Name, Contents, false /*IsLE*/, false /*Is64Bit*/); + bool IsLE = isa<object::ELF32LEObjectFile>(Obj) || + isa<object::ELF64LEObjectFile>(Obj); + bool Is64 = isa<object::ELF64LEObjectFile>(Obj) || + isa<object::ELF64BEObjectFile>(Obj); + Expected<Decompressor> Dec = Decompressor::create(Name, Contents, IsLE, Is64); if (!Dec) return createError(Name, Dec.takeError()); @@ -286,7 +291,6 @@ handleCompressedSection(std::deque<SmallString<32>> &UncompressedSections, if (Error E = Dec->resizeAndDecompress(UncompressedSections.back())) return createError(Name, std::move(E)); - Name = Name.substr(2); // Drop ".z" Contents = UncompressedSections.back(); return Error::success(); } @@ -494,7 +498,8 @@ Error handleSection( return ContentsOrErr.takeError(); StringRef Contents = *ContentsOrErr; - if (auto Err = handleCompressedSection(UncompressedSections, Name, Contents)) + if (auto Err = handleCompressedSection(UncompressedSections, Section, Name, + Contents)) return Err; Name = Name.substr(Name.find_first_not_of("._")); diff --git a/llvm/lib/DebugInfo/CodeView/TypeRecordMapping.cpp b/llvm/lib/DebugInfo/CodeView/TypeRecordMapping.cpp index 27f63b9edcd0..7f4511258c64 100644 --- a/llvm/lib/DebugInfo/CodeView/TypeRecordMapping.cpp +++ b/llvm/lib/DebugInfo/CodeView/TypeRecordMapping.cpp @@ -36,8 +36,10 @@ using namespace llvm::codeview; namespace { #define error(X) \ - if (auto EC = X) \ - return EC; + do { \ + if (auto EC = X) \ + return EC; \ + } while (false) static const EnumEntry<TypeLeafKind> LeafTypeNames[] = { #define CV_TYPE(enum, val) {#enum, enum}, diff --git a/llvm/lib/DebugInfo/DWARF/DWARFContext.cpp b/llvm/lib/DebugInfo/DWARF/DWARFContext.cpp index 2e567d8bc7ee..19d7d659a86a 100644 --- a/llvm/lib/DebugInfo/DWARF/DWARFContext.cpp +++ b/llvm/lib/DebugInfo/DWARF/DWARFContext.cpp @@ -1645,7 +1645,7 @@ class DWARFObjInMemory final : public DWARFObject { /// provided by Data. Otherwise leaves it unchanged. Error maybeDecompress(const object::SectionRef &Sec, StringRef Name, StringRef &Data) { - if (!Decompressor::isCompressed(Sec)) + if (!Sec.isCompressed()) return Error::success(); Expected<Decompressor> Decompressor = diff --git a/llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp b/llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp index 33856c12b3c9..d2ed4fe018b5 100644 --- a/llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp +++ b/llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp @@ -29,10 +29,6 @@ using namespace llvm; using namespace dwarf; -namespace llvm { -class DwarfContext; -} - using FileLineInfoKind = DILineInfoSpecifier::FileLineInfoKind; namespace { diff --git a/llvm/lib/DebugInfo/Symbolize/Markup.cpp b/llvm/lib/DebugInfo/Symbolize/Markup.cpp index 9bc65e763287..aa8a89812227 100644 --- a/llvm/lib/DebugInfo/Symbolize/Markup.cpp +++ b/llvm/lib/DebugInfo/Symbolize/Markup.cpp @@ -100,6 +100,9 @@ Optional<MarkupNode> MarkupParser::nextNode() { } void MarkupParser::flush() { + Buffer.clear(); + NextIdx = 0; + Line = {}; if (InProgressMultiline.empty()) return; FinishedMultiline.swap(InProgressMultiline); diff --git a/llvm/lib/DebugInfo/Symbolize/MarkupFilter.cpp b/llvm/lib/DebugInfo/Symbolize/MarkupFilter.cpp index 3363fe5e531f..91a51485026e 100644 --- a/llvm/lib/DebugInfo/Symbolize/MarkupFilter.cpp +++ b/llvm/lib/DebugInfo/Symbolize/MarkupFilter.cpp @@ -10,14 +10,22 @@ /// This file defines the implementation of a filter that replaces symbolizer /// markup with human-readable expressions. /// +/// See https://llvm.org/docs/SymbolizerMarkupFormat.html +/// //===----------------------------------------------------------------------===// #include "llvm/DebugInfo/Symbolize/MarkupFilter.h" #include "llvm/ADT/None.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringSwitch.h" +#include "llvm/DebugInfo/Symbolize/Markup.h" +#include "llvm/Debuginfod/Debuginfod.h" #include "llvm/Demangle/Demangle.h" +#include "llvm/Object/ObjectFile.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/FormatVariadic.h" #include "llvm/Support/WithColor.h" #include "llvm/Support/raw_ostream.h" @@ -28,30 +36,195 @@ MarkupFilter::MarkupFilter(raw_ostream &OS, Optional<bool> ColorsEnabled) : OS(OS), ColorsEnabled(ColorsEnabled.value_or( WithColor::defaultAutoDetectFunction()(OS))) {} -void MarkupFilter::beginLine(StringRef Line) { +void MarkupFilter::filter(StringRef Line) { this->Line = Line; resetColor(); + + Parser.parseLine(Line); + SmallVector<MarkupNode> DeferredNodes; + // See if the line is a contextual (i.e. contains a contextual element). + // In this case, anything after the contextual element is elided, or the whole + // line may be elided. + while (Optional<MarkupNode> Node = Parser.nextNode()) { + // If this was a contextual line, then summarily stop processing. + if (tryContextualElement(*Node, DeferredNodes)) + return; + // This node may yet be part of an elided contextual line. + DeferredNodes.push_back(*Node); + } + + // This was not a contextual line, so nothing in it should be elided. + endAnyModuleInfoLine(); + for (const MarkupNode &Node : DeferredNodes) + filterNode(Node); } -void MarkupFilter::filter(const MarkupNode &Node) { - if (!checkTag(Node)) - return; +void MarkupFilter::finish() { + Parser.flush(); + while (Optional<MarkupNode> Node = Parser.nextNode()) + filterNode(*Node); + endAnyModuleInfoLine(); + resetColor(); + Modules.clear(); + MMaps.clear(); +} - if (trySGR(Node)) - return; +// See if the given node is a contextual element and handle it if so. This may +// either output or defer the element; in the former case, it will first emit +// any DeferredNodes. +// +// Returns true if the given element was a contextual element. In this case, +// DeferredNodes should be considered handled and should not be emitted. The +// rest of the containing line must also be ignored in case the element was +// deferred to a following line. +bool MarkupFilter::tryContextualElement( + const MarkupNode &Node, const SmallVector<MarkupNode> &DeferredNodes) { + if (tryMMap(Node, DeferredNodes)) + return true; + if (tryReset(Node, DeferredNodes)) + return true; + return tryModule(Node, DeferredNodes); +} - if (Node.Tag == "symbol") { - if (!checkNumFields(Node, 1)) - return; +bool MarkupFilter::tryMMap(const MarkupNode &Node, + const SmallVector<MarkupNode> &DeferredNodes) { + if (Node.Tag != "mmap") + return false; + Optional<MMap> ParsedMMap = parseMMap(Node); + if (!ParsedMMap) + return true; + + if (const MMap *M = overlappingMMap(*ParsedMMap)) { + WithColor::error(errs()) + << formatv("overlapping mmap: #{0:x} [{1:x},{2:x})\n", M->Mod->ID, + M->Addr, M->Addr + M->Size); + reportLocation(Node.Fields[0].begin()); + return true; + } + + auto Res = MMaps.emplace(ParsedMMap->Addr, std::move(*ParsedMMap)); + assert(Res.second && "Overlap check should ensure emplace succeeds."); + MMap &MMap = Res.first->second; + + if (!MIL || MIL->Mod != MMap.Mod) { + endAnyModuleInfoLine(); + for (const MarkupNode &Node : DeferredNodes) + filterNode(Node); + beginModuleInfoLine(MMap.Mod); + OS << "; adds"; + } + MIL->MMaps.push_back(&MMap); + return true; +} + +bool MarkupFilter::tryReset(const MarkupNode &Node, + const SmallVector<MarkupNode> &DeferredNodes) { + if (Node.Tag != "reset") + return false; + if (!checkNumFields(Node, 0)) + return true; + + if (!Modules.empty() || !MMaps.empty()) { + endAnyModuleInfoLine(); + for (const MarkupNode &Node : DeferredNodes) + filterNode(Node); highlight(); - OS << llvm::demangle(Node.Fields.front().str()); + OS << "[[[reset]]]" << lineEnding(); restoreColor(); + + Modules.clear(); + MMaps.clear(); + } + return true; +} + +bool MarkupFilter::tryModule(const MarkupNode &Node, + const SmallVector<MarkupNode> &DeferredNodes) { + if (Node.Tag != "module") + return false; + Optional<Module> ParsedModule = parseModule(Node); + if (!ParsedModule) + return true; + + auto Res = Modules.try_emplace( + ParsedModule->ID, std::make_unique<Module>(std::move(*ParsedModule))); + if (!Res.second) { + WithColor::error(errs()) << "duplicate module ID\n"; + reportLocation(Node.Fields[0].begin()); + return true; + } + Module &Module = *Res.first->second; + + endAnyModuleInfoLine(); + for (const MarkupNode &Node : DeferredNodes) + filterNode(Node); + beginModuleInfoLine(&Module); + OS << "; BuildID="; + highlightValue(); + OS << toHex(Module.BuildID, /*LowerCase=*/true); + highlight(); + return true; +} + +void MarkupFilter::beginModuleInfoLine(const Module *M) { + highlight(); + OS << "[[[ELF module"; + highlightValue(); + OS << formatv(" #{0:x} \"{1}\"", M->ID, M->Name); + highlight(); + MIL = ModuleInfoLine{M}; +} + +void MarkupFilter::endAnyModuleInfoLine() { + if (!MIL) return; + llvm::stable_sort(MIL->MMaps, [](const MMap *A, const MMap *B) { + return A->Addr < B->Addr; + }); + for (const MMap *M : MIL->MMaps) { + OS << (M == MIL->MMaps.front() ? ' ' : '-'); + highlightValue(); + OS << formatv("{0:x}", M->Addr); + highlight(); + OS << '('; + highlightValue(); + OS << M->Mode; + highlight(); + OS << ')'; } + OS << "]]]" << lineEnding(); + restoreColor(); + MIL.reset(); +} + +// Handle a node that is known not to be a contextual element. +void MarkupFilter::filterNode(const MarkupNode &Node) { + if (!checkTag(Node)) + return; + if (tryPresentation(Node)) + return; + if (trySGR(Node)) + return; OS << Node.Text; } +bool MarkupFilter::tryPresentation(const MarkupNode &Node) { + return trySymbol(Node); +} + +bool MarkupFilter::trySymbol(const MarkupNode &Node) { + if (Node.Tag != "symbol") + return false; + if (!checkNumFields(Node, 1)) + return true; + + highlight(); + OS << llvm::demangle(Node.Fields.front().str()); + restoreColor(); + return true; +} + bool MarkupFilter::trySGR(const MarkupNode &Node) { if (Node.Text == "\033[0m") { resetColor(); @@ -93,6 +266,13 @@ void MarkupFilter::highlight() { Bold); } +// Begin highlighting a field within a highlighted markup string. +void MarkupFilter::highlightValue() { + if (!ColorsEnabled) + return; + OS.changeColor(raw_ostream::Colors::GREEN, Bold); +} + // Set the output stream's color to the current color and bold state of the SGR // abstract machine. void MarkupFilter::restoreColor() { @@ -117,6 +297,139 @@ void MarkupFilter::resetColor() { OS.resetColor(); } +// This macro helps reduce the amount of indirection done through Optional +// below, since the usual case upon returning a None Optional is to return None. +#define ASSIGN_OR_RETURN_NONE(TYPE, NAME, EXPR) \ + auto NAME##Opt = (EXPR); \ + if (!NAME##Opt) \ + return None; \ + TYPE NAME = std::move(*NAME##Opt) + +Optional<MarkupFilter::Module> +MarkupFilter::parseModule(const MarkupNode &Element) const { + if (!checkNumFieldsAtLeast(Element, 3)) + return None; + ASSIGN_OR_RETURN_NONE(uint64_t, ID, parseModuleID(Element.Fields[0])); + StringRef Name = Element.Fields[1]; + StringRef Type = Element.Fields[2]; + if (Type != "elf") { + WithColor::error() << "unknown module type\n"; + reportLocation(Type.begin()); + return None; + } + if (!checkNumFields(Element, 4)) + return None; + ASSIGN_OR_RETURN_NONE(SmallVector<uint8_t>, BuildID, + parseBuildID(Element.Fields[3])); + return Module{ID, Name.str(), std::move(BuildID)}; +} + +Optional<MarkupFilter::MMap> +MarkupFilter::parseMMap(const MarkupNode &Element) const { + if (!checkNumFieldsAtLeast(Element, 3)) + return None; + ASSIGN_OR_RETURN_NONE(uint64_t, Addr, parseAddr(Element.Fields[0])); + ASSIGN_OR_RETURN_NONE(uint64_t, Size, parseSize(Element.Fields[1])); + StringRef Type = Element.Fields[2]; + if (Type != "load") { + WithColor::error() << "unknown mmap type\n"; + reportLocation(Type.begin()); + return None; + } + if (!checkNumFields(Element, 6)) + return None; + ASSIGN_OR_RETURN_NONE(uint64_t, ID, parseModuleID(Element.Fields[3])); + ASSIGN_OR_RETURN_NONE(std::string, Mode, parseMode(Element.Fields[4])); + auto It = Modules.find(ID); + if (It == Modules.end()) { + WithColor::error() << "unknown module ID\n"; + reportLocation(Element.Fields[3].begin()); + return None; + } + ASSIGN_OR_RETURN_NONE(uint64_t, ModuleRelativeAddr, + parseAddr(Element.Fields[5])); + return MMap{Addr, Size, It->second.get(), std::move(Mode), + ModuleRelativeAddr}; +} + +// Parse an address (%p in the spec). +Optional<uint64_t> MarkupFilter::parseAddr(StringRef Str) const { + if (Str.empty()) { + reportTypeError(Str, "address"); + return None; + } + if (all_of(Str, [](char C) { return C == '0'; })) + return 0; + if (!Str.startswith("0x")) { + reportTypeError(Str, "address"); + return None; + } + uint64_t Addr; + if (Str.drop_front(2).getAsInteger(16, Addr)) { + reportTypeError(Str, "address"); + return None; + } + return Addr; +} + +// Parse a module ID (%i in the spec). +Optional<uint64_t> MarkupFilter::parseModuleID(StringRef Str) const { + uint64_t ID; + if (Str.getAsInteger(0, ID)) { + reportTypeError(Str, "module ID"); + return None; + } + return ID; +} + +// Parse a size (%i in the spec). +Optional<uint64_t> MarkupFilter::parseSize(StringRef Str) const { + uint64_t ID; + if (Str.getAsInteger(0, ID)) { + reportTypeError(Str, "size"); + return None; + } + return ID; +} + +// Parse a build ID (%x in the spec). +Optional<SmallVector<uint8_t>> MarkupFilter::parseBuildID(StringRef Str) const { + std::string Bytes; + if (Str.empty() || Str.size() % 2 || !tryGetFromHex(Str, Bytes)) { + reportTypeError(Str, "build ID"); + return None; + } + ArrayRef<uint8_t> BuildID(reinterpret_cast<const uint8_t *>(Bytes.data()), + Bytes.size()); + return SmallVector<uint8_t>(BuildID.begin(), BuildID.end()); +} + +// Parses the mode string for an mmap element. +Optional<std::string> MarkupFilter::parseMode(StringRef Str) const { + if (Str.empty()) { + reportTypeError(Str, "mode"); + return None; + } + + // Pop off each of r/R, w/W, and x/X from the front, in that order. + StringRef Remainder = Str; + if (!Remainder.empty() && tolower(Remainder.front()) == 'r') + Remainder = Remainder.drop_front(); + if (!Remainder.empty() && tolower(Remainder.front()) == 'w') + Remainder = Remainder.drop_front(); + if (!Remainder.empty() && tolower(Remainder.front()) == 'x') + Remainder = Remainder.drop_front(); + + // If anything remains, then the string wasn't a mode. + if (!Remainder.empty()) { + reportTypeError(Str, "mode"); + return None; + } + + // Normalize the mode. + return Str.lower(); +} + bool MarkupFilter::checkTag(const MarkupNode &Node) const { if (any_of(Node.Tag, [](char C) { return C < 'a' || C > 'z'; })) { WithColor::error(errs()) << "tags must be all lowercase characters\n"; @@ -126,18 +439,66 @@ bool MarkupFilter::checkTag(const MarkupNode &Node) const { return true; } -bool MarkupFilter::checkNumFields(const MarkupNode &Node, size_t Size) const { - if (Node.Fields.size() != Size) { +bool MarkupFilter::checkNumFields(const MarkupNode &Element, + size_t Size) const { + if (Element.Fields.size() != Size) { WithColor::error(errs()) << "expected " << Size << " fields; found " - << Node.Fields.size() << "\n"; - reportLocation(Node.Tag.end()); + << Element.Fields.size() << "\n"; + reportLocation(Element.Tag.end()); return false; } return true; } +bool MarkupFilter::checkNumFieldsAtLeast(const MarkupNode &Element, + size_t Size) const { + if (Element.Fields.size() < Size) { + WithColor::error(errs()) + << "expected at least " << Size << " fields; found " + << Element.Fields.size() << "\n"; + reportLocation(Element.Tag.end()); + return false; + } + return true; +} + +void MarkupFilter::reportTypeError(StringRef Str, StringRef TypeName) const { + WithColor::error(errs()) << "expected " << TypeName << "; found '" << Str + << "'\n"; + reportLocation(Str.begin()); +} + +// Prints two lines that point out the given location in the current Line using +// a caret. The iterator must be within the bounds of the most recent line +// passed to beginLine(). void MarkupFilter::reportLocation(StringRef::iterator Loc) const { errs() << Line; WithColor(errs().indent(Loc - Line.begin()), HighlightColor::String) << '^'; errs() << '\n'; } + +// Checks for an existing mmap that overlaps the given one and returns a +// pointer to one of them. +const MarkupFilter::MMap *MarkupFilter::overlappingMMap(const MMap &Map) const { + // If the given map contains the start of another mmap, they overlap. + auto I = MMaps.upper_bound(Map.Addr); + if (I != MMaps.end() && Map.contains(I->second.Addr)) + return &I->second; + + // If no element starts inside the given mmap, the only possible overlap would + // be if the preceding mmap contains the start point of the given mmap. + if (I != MMaps.begin()) { + --I; + if (I->second.contains(Map.Addr)) + return &I->second; + } + return nullptr; +} + +StringRef MarkupFilter::lineEnding() const { + return Line.endswith("\r\n") ? "\r\n" : "\n"; +} + +bool MarkupFilter::MMap::contains(uint64_t Addr) const { + return this->Addr <= Addr && Addr < this->Addr + Size; +} diff --git a/llvm/lib/ExecutionEngine/JITLink/COFFLinkGraphBuilder.cpp b/llvm/lib/ExecutionEngine/JITLink/COFFLinkGraphBuilder.cpp index 43b9c2ba400b..dc07eaeaf615 100644 --- a/llvm/lib/ExecutionEngine/JITLink/COFFLinkGraphBuilder.cpp +++ b/llvm/lib/ExecutionEngine/JITLink/COFFLinkGraphBuilder.cpp @@ -524,4 +524,4 @@ COFFLinkGraphBuilder::exportCOMDATSymbol(COFFSymbolIndex SymIndex, } } // namespace jitlink -} // namespace llvm
\ No newline at end of file +} // namespace llvm diff --git a/llvm/lib/ExecutionEngine/JITLink/DWARFRecordSectionSplitter.cpp b/llvm/lib/ExecutionEngine/JITLink/DWARFRecordSectionSplitter.cpp index 0fc366bf505f..2a60d8206f63 100644 --- a/llvm/lib/ExecutionEngine/JITLink/DWARFRecordSectionSplitter.cpp +++ b/llvm/lib/ExecutionEngine/JITLink/DWARFRecordSectionSplitter.cpp @@ -107,7 +107,7 @@ Error DWARFRecordSectionSplitter::processBlock( } uint64_t BlockSize = BlockReader.getOffset() - RecordStartOffset; - auto &NewBlock = G.splitBlock(B, BlockSize); + auto &NewBlock = G.splitBlock(B, BlockSize, &Cache); (void)NewBlock; LLVM_DEBUG(dbgs() << " Extracted " << NewBlock << "\n"); } diff --git a/llvm/lib/ExecutionEngine/Orc/JITTargetMachineBuilder.cpp b/llvm/lib/ExecutionEngine/Orc/JITTargetMachineBuilder.cpp index c60f4b3b263c..70a3c404d836 100644 --- a/llvm/lib/ExecutionEngine/Orc/JITTargetMachineBuilder.cpp +++ b/llvm/lib/ExecutionEngine/Orc/JITTargetMachineBuilder.cpp @@ -48,6 +48,10 @@ JITTargetMachineBuilder::createTargetMachine() { if (!TheTarget) return make_error<StringError>(std::move(ErrMsg), inconvertibleErrorCode()); + if (!TheTarget->hasJIT()) + return make_error<StringError>("Target has no JIT support", + inconvertibleErrorCode()); + auto *TM = TheTarget->createTargetMachine(TT.getTriple(), CPU, Features.getString(), Options, RM, CM, OptLevel, /*JIT*/ true); diff --git a/llvm/lib/ExecutionEngine/Orc/MapperJITLinkMemoryManager.cpp b/llvm/lib/ExecutionEngine/Orc/MapperJITLinkMemoryManager.cpp new file mode 100644 index 000000000000..c2e7baabb994 --- /dev/null +++ b/llvm/lib/ExecutionEngine/Orc/MapperJITLinkMemoryManager.cpp @@ -0,0 +1,135 @@ +//=== MapperJITLinkMemoryManager.cpp - Memory management with MemoryMapper ===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/ExecutionEngine/Orc/MapperJITLinkMemoryManager.h" + +#include "llvm/ExecutionEngine/JITLink/JITLink.h" +#include "llvm/Support/Process.h" + +#include <limits> + +using namespace llvm::jitlink; + +namespace llvm { +namespace orc { + +class MapperJITLinkMemoryManager::InFlightAlloc + : public JITLinkMemoryManager::InFlightAlloc { +public: + InFlightAlloc(MapperJITLinkMemoryManager &Parent, LinkGraph &G, + ExecutorAddr AllocAddr, + std::vector<MemoryMapper::AllocInfo::SegInfo> Segs) + : Parent(Parent), G(G), AllocAddr(AllocAddr), Segs(std::move(Segs)) {} + + void finalize(OnFinalizedFunction OnFinalize) override { + MemoryMapper::AllocInfo AI; + AI.MappingBase = AllocAddr; + + std::swap(AI.Segments, Segs); + std::swap(AI.Actions, G.allocActions()); + + Parent.Mapper->initialize(AI, [&](Expected<ExecutorAddr> Result) { + if (!Result) { + OnFinalize(Result.takeError()); + return; + } + + OnFinalize(FinalizedAlloc(*Result)); + }); + } + + void abandon(OnAbandonedFunction OnFinalize) override { + Parent.Mapper->release({AllocAddr}, std::move(OnFinalize)); + } + +private: + MapperJITLinkMemoryManager &Parent; + LinkGraph &G; + ExecutorAddr AllocAddr; + std::vector<MemoryMapper::AllocInfo::SegInfo> Segs; +}; + +MapperJITLinkMemoryManager::MapperJITLinkMemoryManager( + std::unique_ptr<MemoryMapper> Mapper) + : Mapper(std::move(Mapper)) {} + +void MapperJITLinkMemoryManager::allocate(const JITLinkDylib *JD, LinkGraph &G, + OnAllocatedFunction OnAllocated) { + BasicLayout BL(G); + + // find required address space + auto SegsSizes = BL.getContiguousPageBasedLayoutSizes(Mapper->getPageSize()); + if (!SegsSizes) { + OnAllocated(SegsSizes.takeError()); + return; + } + + // Check if total size fits in address space + if (SegsSizes->total() > std::numeric_limits<size_t>::max()) { + OnAllocated(make_error<JITLinkError>( + formatv("Total requested size {:x} for graph {} exceeds address space", + SegsSizes->total(), G.getName()))); + return; + } + + Mapper->reserve( + SegsSizes->total(), + [this, &G, BL = std::move(BL), OnAllocated = std::move(OnAllocated)]( + Expected<ExecutorAddrRange> Result) mutable { + if (!Result) { + return OnAllocated(Result.takeError()); + } + + auto NextSegAddr = Result->Start; + + std::vector<MemoryMapper::AllocInfo::SegInfo> SegInfos; + + for (auto &KV : BL.segments()) { + auto &AG = KV.first; + auto &Seg = KV.second; + + auto TotalSize = Seg.ContentSize + Seg.ZeroFillSize; + + Seg.Addr = NextSegAddr; + Seg.WorkingMem = Mapper->prepare(NextSegAddr, TotalSize); + + NextSegAddr += alignTo(TotalSize, Mapper->getPageSize()); + + MemoryMapper::AllocInfo::SegInfo SI; + SI.Offset = Seg.Addr - Result->Start; + SI.ContentSize = Seg.ContentSize; + SI.ZeroFillSize = Seg.ZeroFillSize; + SI.Prot = (toSysMemoryProtectionFlags(AG.getMemProt())); + SI.WorkingMem = Seg.WorkingMem; + + SegInfos.push_back(SI); + } + + if (auto Err = BL.apply()) { + OnAllocated(std::move(Err)); + return; + } + + OnAllocated(std::make_unique<InFlightAlloc>(*this, G, Result->Start, + std::move(SegInfos))); + }); +} + +void MapperJITLinkMemoryManager::deallocate( + std::vector<FinalizedAlloc> Allocs, OnDeallocatedFunction OnDeallocated) { + std::vector<ExecutorAddr> Bases; + Bases.reserve(Allocs.size()); + for (auto &FA : Allocs) { + Bases.push_back(FA.getAddress()); + FA.release(); + } + Mapper->release(Bases, std::move(OnDeallocated)); +} + +} // end namespace orc +} // end namespace llvm diff --git a/llvm/lib/ExecutionEngine/Orc/MemoryMapper.cpp b/llvm/lib/ExecutionEngine/Orc/MemoryMapper.cpp index 8b3fbd7117e2..ca3f64b8a409 100644 --- a/llvm/lib/ExecutionEngine/Orc/MemoryMapper.cpp +++ b/llvm/lib/ExecutionEngine/Orc/MemoryMapper.cpp @@ -8,11 +8,33 @@ #include "llvm/ExecutionEngine/Orc/MemoryMapper.h" +#include "llvm/ExecutionEngine/Orc/Shared/OrcRTBridge.h" +#include "llvm/Support/WindowsError.h" + +#if defined(LLVM_ON_UNIX) +#include <fcntl.h> +#include <sys/mman.h> +#include <unistd.h> +#elif defined(_WIN32) +#include <windows.h> +#endif + namespace llvm { namespace orc { MemoryMapper::~MemoryMapper() {} +InProcessMemoryMapper::InProcessMemoryMapper(size_t PageSize) + : PageSize(PageSize) {} + +Expected<std::unique_ptr<InProcessMemoryMapper>> +InProcessMemoryMapper::Create() { + auto PageSize = sys::Process::getPageSize(); + if (!PageSize) + return PageSize.takeError(); + return std::make_unique<InProcessMemoryMapper>(*PageSize); +} + void InProcessMemoryMapper::reserve(size_t NumBytes, OnReservedFunction OnReserved) { std::error_code EC; @@ -147,6 +169,238 @@ InProcessMemoryMapper::~InProcessMemoryMapper() { cantFail(F.get()); } +// SharedMemoryMapper + +SharedMemoryMapper::SharedMemoryMapper(ExecutorProcessControl &EPC, + SymbolAddrs SAs, size_t PageSize) + : EPC(EPC), SAs(SAs), PageSize(PageSize) {} + +Expected<std::unique_ptr<SharedMemoryMapper>> +SharedMemoryMapper::Create(ExecutorProcessControl &EPC, SymbolAddrs SAs) { + auto PageSize = sys::Process::getPageSize(); + if (!PageSize) + return PageSize.takeError(); + + return std::make_unique<SharedMemoryMapper>(EPC, SAs, *PageSize); +} + +void SharedMemoryMapper::reserve(size_t NumBytes, + OnReservedFunction OnReserved) { +#if defined(LLVM_ON_UNIX) || defined(_WIN32) + + EPC.callSPSWrapperAsync< + rt::SPSExecutorSharedMemoryMapperServiceReserveSignature>( + SAs.Reserve, + [this, NumBytes, OnReserved = std::move(OnReserved)]( + Error SerializationErr, + Expected<std::pair<ExecutorAddr, std::string>> Result) mutable { + if (SerializationErr) { + cantFail(Result.takeError()); + return OnReserved(std::move(SerializationErr)); + } + + if (!Result) + return OnReserved(Result.takeError()); + + ExecutorAddr RemoteAddr; + std::string SharedMemoryName; + std::tie(RemoteAddr, SharedMemoryName) = std::move(*Result); + + void *LocalAddr = nullptr; + +#if defined(LLVM_ON_UNIX) + + int SharedMemoryFile = shm_open(SharedMemoryName.c_str(), O_RDWR, 0700); + if (SharedMemoryFile < 0) { + return OnReserved(errorCodeToError( + std::error_code(errno, std::generic_category()))); + } + + // this prevents other processes from accessing it by name + shm_unlink(SharedMemoryName.c_str()); + + LocalAddr = mmap(nullptr, NumBytes, PROT_READ | PROT_WRITE, MAP_SHARED, + SharedMemoryFile, 0); + if (LocalAddr == MAP_FAILED) { + return OnReserved(errorCodeToError( + std::error_code(errno, std::generic_category()))); + } + + close(SharedMemoryFile); + +#elif defined(_WIN32) + + std::wstring WideSharedMemoryName(SharedMemoryName.begin(), + SharedMemoryName.end()); + HANDLE SharedMemoryFile = OpenFileMappingW( + FILE_MAP_ALL_ACCESS, FALSE, WideSharedMemoryName.c_str()); + if (!SharedMemoryFile) + return OnReserved(errorCodeToError(mapWindowsError(GetLastError()))); + + LocalAddr = + MapViewOfFile(SharedMemoryFile, FILE_MAP_ALL_ACCESS, 0, 0, 0); + if (!LocalAddr) { + CloseHandle(SharedMemoryFile); + return OnReserved(errorCodeToError(mapWindowsError(GetLastError()))); + } + + CloseHandle(SharedMemoryFile); + +#endif + { + std::lock_guard<std::mutex> Lock(Mutex); + Reservations.insert({RemoteAddr, {LocalAddr, NumBytes}}); + } + + OnReserved(ExecutorAddrRange(RemoteAddr, NumBytes)); + }, + SAs.Instance, static_cast<uint64_t>(NumBytes)); + +#else + OnReserved(make_error<StringError>( + "SharedMemoryMapper is not supported on this platform yet", + inconvertibleErrorCode())); +#endif +} + +char *SharedMemoryMapper::prepare(ExecutorAddr Addr, size_t ContentSize) { + auto R = Reservations.upper_bound(Addr); + assert(R != Reservations.begin() && "Attempt to prepare unknown range"); + R--; + + ExecutorAddrDiff Offset = Addr - R->first; + + return static_cast<char *>(R->second.LocalAddr) + Offset; +} + +void SharedMemoryMapper::initialize(MemoryMapper::AllocInfo &AI, + OnInitializedFunction OnInitialized) { + auto Reservation = Reservations.find(AI.MappingBase); + assert(Reservation != Reservations.end() && + "Attempt to initialize unreserved range"); + + tpctypes::SharedMemoryFinalizeRequest FR; + + AI.Actions.swap(FR.Actions); + + FR.Segments.reserve(AI.Segments.size()); + + for (auto Segment : AI.Segments) { + char *Base = + static_cast<char *>(Reservation->second.LocalAddr) + Segment.Offset; + std::memset(Base + Segment.ContentSize, 0, Segment.ZeroFillSize); + + tpctypes::SharedMemorySegFinalizeRequest SegReq; + SegReq.Prot = tpctypes::toWireProtectionFlags( + static_cast<sys::Memory::ProtectionFlags>(Segment.Prot)); + SegReq.Addr = AI.MappingBase + Segment.Offset; + SegReq.Size = Segment.ContentSize + Segment.ZeroFillSize; + + FR.Segments.push_back(SegReq); + } + + EPC.callSPSWrapperAsync< + rt::SPSExecutorSharedMemoryMapperServiceInitializeSignature>( + SAs.Initialize, + [OnInitialized = std::move(OnInitialized)]( + Error SerializationErr, Expected<ExecutorAddr> Result) mutable { + if (SerializationErr) { + cantFail(Result.takeError()); + return OnInitialized(std::move(SerializationErr)); + } + + OnInitialized(std::move(Result)); + }, + SAs.Instance, AI.MappingBase, std::move(FR)); +} + +void SharedMemoryMapper::deinitialize( + ArrayRef<ExecutorAddr> Allocations, + MemoryMapper::OnDeinitializedFunction OnDeinitialized) { + EPC.callSPSWrapperAsync< + rt::SPSExecutorSharedMemoryMapperServiceDeinitializeSignature>( + SAs.Deinitialize, + [OnDeinitialized = std::move(OnDeinitialized)](Error SerializationErr, + Error Result) mutable { + if (SerializationErr) { + cantFail(std::move(Result)); + return OnDeinitialized(std::move(SerializationErr)); + } + + OnDeinitialized(std::move(Result)); + }, + SAs.Instance, Allocations); +} + +void SharedMemoryMapper::release(ArrayRef<ExecutorAddr> Bases, + OnReleasedFunction OnReleased) { +#if defined(LLVM_ON_UNIX) || defined(_WIN32) + Error Err = Error::success(); + + { + std::lock_guard<std::mutex> Lock(Mutex); + + for (auto Base : Bases) { + +#if defined(LLVM_ON_UNIX) + + if (munmap(Reservations[Base].LocalAddr, Reservations[Base].Size) != 0) + Err = joinErrors(std::move(Err), errorCodeToError(std::error_code( + errno, std::generic_category()))); + +#elif defined(_WIN32) + + if (!UnmapViewOfFile(Reservations[Base].LocalAddr)) + joinErrors(std::move(Err), + errorCodeToError(mapWindowsError(GetLastError()))); + +#endif + + Reservations.erase(Base); + } + } + + EPC.callSPSWrapperAsync< + rt::SPSExecutorSharedMemoryMapperServiceReleaseSignature>( + SAs.Release, + [OnReleased = std::move(OnReleased), + Err = std::move(Err)](Error SerializationErr, Error Result) mutable { + if (SerializationErr) { + cantFail(std::move(Result)); + return OnReleased( + joinErrors(std::move(Err), std::move(SerializationErr))); + } + + return OnReleased(joinErrors(std::move(Err), std::move(Result))); + }, + SAs.Instance, Bases); +#else + OnReleased(make_error<StringError>( + "SharedMemoryMapper is not supported on this platform yet", + inconvertibleErrorCode())); +#endif +} + +SharedMemoryMapper::~SharedMemoryMapper() { + std::vector<ExecutorAddr> ReservationAddrs; + if (!Reservations.empty()) { + std::lock_guard<std::mutex> Lock(Mutex); + { + ReservationAddrs.reserve(Reservations.size()); + for (const auto &R : Reservations) { + ReservationAddrs.push_back(R.first); + } + } + } + + std::promise<MSVCPError> P; + auto F = P.get_future(); + release(ReservationAddrs, [&](Error Err) { P.set_value(std::move(Err)); }); + // FIXME: Release can actually fail. The error should be propagated. + // Meanwhile, a better option is to explicitly call release(). + cantFail(F.get()); +} + } // namespace orc } // namespace llvm diff --git a/llvm/lib/ExecutionEngine/Orc/Shared/OrcRTBridge.cpp b/llvm/lib/ExecutionEngine/Orc/Shared/OrcRTBridge.cpp index 5eae33121eb9..dfdd846c46a7 100644 --- a/llvm/lib/ExecutionEngine/Orc/Shared/OrcRTBridge.cpp +++ b/llvm/lib/ExecutionEngine/Orc/Shared/OrcRTBridge.cpp @@ -18,6 +18,7 @@ const char *SimpleExecutorDylibManagerOpenWrapperName = "__llvm_orc_SimpleExecutorDylibManager_open_wrapper"; const char *SimpleExecutorDylibManagerLookupWrapperName = "__llvm_orc_SimpleExecutorDylibManager_lookup_wrapper"; + const char *SimpleExecutorMemoryManagerInstanceName = "__llvm_orc_SimpleExecutorMemoryManager_Instance"; const char *SimpleExecutorMemoryManagerReserveWrapperName = @@ -26,6 +27,18 @@ const char *SimpleExecutorMemoryManagerFinalizeWrapperName = "__llvm_orc_SimpleExecutorMemoryManager_finalize_wrapper"; const char *SimpleExecutorMemoryManagerDeallocateWrapperName = "__llvm_orc_SimpleExecutorMemoryManager_deallocate_wrapper"; + +const char *ExecutorSharedMemoryMapperServiceInstanceName = + "__llvm_orc_ExecutorSharedMemoryMapperService_Instance"; +const char *ExecutorSharedMemoryMapperServiceReserveWrapperName = + "__llvm_orc_ExecutorSharedMemoryMapperService_Reserve"; +const char *ExecutorSharedMemoryMapperServiceInitializeWrapperName = + "__llvm_orc_ExecutorSharedMemoryMapperService_Initialize"; +const char *ExecutorSharedMemoryMapperServiceDeinitializeWrapperName = + "__llvm_orc_ExecutorSharedMemoryMapperService_Deinitialize"; +const char *ExecutorSharedMemoryMapperServiceReleaseWrapperName = + "__llvm_orc_ExecutorSharedMemoryMapperService_Release"; + const char *MemoryWriteUInt8sWrapperName = "__llvm_orc_bootstrap_mem_write_uint8s_wrapper"; const char *MemoryWriteUInt16sWrapperName = @@ -36,10 +49,12 @@ const char *MemoryWriteUInt64sWrapperName = "__llvm_orc_bootstrap_mem_write_uint64s_wrapper"; const char *MemoryWriteBuffersWrapperName = "__llvm_orc_bootstrap_mem_write_buffers_wrapper"; + const char *RegisterEHFrameSectionWrapperName = "__llvm_orc_bootstrap_register_ehframe_section_wrapper"; const char *DeregisterEHFrameSectionWrapperName = "__llvm_orc_bootstrap_deregister_ehframe_section_wrapper"; + const char *RunAsMainWrapperName = "__llvm_orc_bootstrap_run_as_main_wrapper"; } // end namespace rt diff --git a/llvm/lib/ExecutionEngine/Orc/TargetProcess/ExecutorSharedMemoryMapperService.cpp b/llvm/lib/ExecutionEngine/Orc/TargetProcess/ExecutorSharedMemoryMapperService.cpp new file mode 100644 index 000000000000..6c9f099061ae --- /dev/null +++ b/llvm/lib/ExecutionEngine/Orc/TargetProcess/ExecutorSharedMemoryMapperService.cpp @@ -0,0 +1,341 @@ +//===---------- ExecutorSharedMemoryMapperService.cpp -----------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/ExecutionEngine/Orc/TargetProcess/ExecutorSharedMemoryMapperService.h" + +#include "llvm/ExecutionEngine/Orc/Shared/OrcRTBridge.h" +#include "llvm/Support/Process.h" +#include "llvm/Support/WindowsError.h" + +#include <sstream> + +#if defined(LLVM_ON_UNIX) +#include <errno.h> +#include <fcntl.h> +#include <sys/mman.h> +#include <unistd.h> +#endif + +#if defined(_WIN32) +static DWORD getWindowsProtectionFlags(unsigned Flags) { + switch (Flags & llvm::sys::Memory::MF_RWE_MASK) { + case llvm::sys::Memory::MF_READ: + return PAGE_READONLY; + case llvm::sys::Memory::MF_WRITE: + // Note: PAGE_WRITE is not supported by VirtualProtect + return PAGE_READWRITE; + case llvm::sys::Memory::MF_READ | llvm::sys::Memory::MF_WRITE: + return PAGE_READWRITE; + case llvm::sys::Memory::MF_READ | llvm::sys::Memory::MF_EXEC: + return PAGE_EXECUTE_READ; + case llvm::sys::Memory::MF_READ | llvm::sys::Memory::MF_WRITE | + llvm::sys::Memory::MF_EXEC: + return PAGE_EXECUTE_READWRITE; + case llvm::sys::Memory::MF_EXEC: + return PAGE_EXECUTE; + default: + llvm_unreachable("Illegal memory protection flag specified!"); + } + // Provide a default return value as required by some compilers. + return PAGE_NOACCESS; +} +#endif + +namespace llvm { +namespace orc { +namespace rt_bootstrap { + +Expected<std::pair<ExecutorAddr, std::string>> +ExecutorSharedMemoryMapperService::reserve(uint64_t Size) { +#if defined(LLVM_ON_UNIX) || defined(_WIN32) + +#if defined(LLVM_ON_UNIX) + + std::string SharedMemoryName; + { + std::stringstream SharedMemoryNameStream; + SharedMemoryNameStream << "/jitlink_" << sys::Process::getProcessId() << '_' + << (++SharedMemoryCount); + SharedMemoryName = SharedMemoryNameStream.str(); + } + + int SharedMemoryFile = + shm_open(SharedMemoryName.c_str(), O_RDWR | O_CREAT | O_EXCL, 0700); + if (SharedMemoryFile < 0) + return errorCodeToError(std::error_code(errno, std::generic_category())); + + // by default size is 0 + if (ftruncate(SharedMemoryFile, Size) < 0) + return errorCodeToError(std::error_code(errno, std::generic_category())); + + void *Addr = mmap(nullptr, Size, PROT_NONE, MAP_SHARED, SharedMemoryFile, 0); + if (Addr == MAP_FAILED) + return errorCodeToError(std::error_code(errno, std::generic_category())); + + close(SharedMemoryFile); + +#elif defined(_WIN32) + + std::string SharedMemoryName; + { + std::stringstream SharedMemoryNameStream; + SharedMemoryNameStream << "jitlink_" << sys::Process::getProcessId() << '_' + << (++SharedMemoryCount); + SharedMemoryName = SharedMemoryNameStream.str(); + } + + std::wstring WideSharedMemoryName(SharedMemoryName.begin(), + SharedMemoryName.end()); + HANDLE SharedMemoryFile = CreateFileMappingW( + INVALID_HANDLE_VALUE, NULL, PAGE_EXECUTE_READWRITE, Size >> 32, + Size & 0xffffffff, WideSharedMemoryName.c_str()); + if (!SharedMemoryFile) + return errorCodeToError(mapWindowsError(GetLastError())); + + void *Addr = MapViewOfFile(SharedMemoryFile, + FILE_MAP_ALL_ACCESS | FILE_MAP_EXECUTE, 0, 0, 0); + if (!Addr) { + CloseHandle(SharedMemoryFile); + return errorCodeToError(mapWindowsError(GetLastError())); + } + +#endif + + { + std::lock_guard<std::mutex> Lock(Mutex); + Reservations[Addr].Size = Size; +#if defined(_WIN32) + Reservations[Addr].SharedMemoryFile = SharedMemoryFile; +#endif + } + + return std::make_pair(ExecutorAddr::fromPtr(Addr), + std::move(SharedMemoryName)); +#else + return make_error<StringError>( + "SharedMemoryMapper is not supported on this platform yet", + inconvertibleErrorCode()); +#endif +} + +Expected<ExecutorAddr> ExecutorSharedMemoryMapperService::initialize( + ExecutorAddr Reservation, tpctypes::SharedMemoryFinalizeRequest &FR) { +#if defined(LLVM_ON_UNIX) || defined(_WIN32) + + ExecutorAddr MinAddr(~0ULL); + + // Contents are already in place + for (auto &Segment : FR.Segments) { + if (Segment.Addr < MinAddr) + MinAddr = Segment.Addr; + +#if defined(LLVM_ON_UNIX) + + int NativeProt = 0; + if (Segment.Prot & tpctypes::WPF_Read) + NativeProt |= PROT_READ; + if (Segment.Prot & tpctypes::WPF_Write) + NativeProt |= PROT_WRITE; + if (Segment.Prot & tpctypes::WPF_Exec) + NativeProt |= PROT_EXEC; + + if (mprotect(Segment.Addr.toPtr<void *>(), Segment.Size, NativeProt)) + return errorCodeToError(std::error_code(errno, std::generic_category())); + +#elif defined(_WIN32) + + DWORD NativeProt = + getWindowsProtectionFlags(fromWireProtectionFlags(Segment.Prot)); + + if (!VirtualProtect(Segment.Addr.toPtr<void *>(), Segment.Size, NativeProt, + &NativeProt)) + return errorCodeToError(mapWindowsError(GetLastError())); + +#endif + + if (Segment.Prot & tpctypes::WPF_Exec) + sys::Memory::InvalidateInstructionCache(Segment.Addr.toPtr<void *>(), + Segment.Size); + } + + // Run finalization actions and get deinitlization action list. + auto DeinitializeActions = shared::runFinalizeActions(FR.Actions); + if (!DeinitializeActions) { + return DeinitializeActions.takeError(); + } + + { + std::lock_guard<std::mutex> Lock(Mutex); + Allocations[MinAddr].DeinitializationActions = + std::move(*DeinitializeActions); + Reservations[Reservation.toPtr<void *>()].Allocations.push_back(MinAddr); + } + + return MinAddr; + +#else + return make_error<StringError>( + "SharedMemoryMapper is not supported on this platform yet", + inconvertibleErrorCode()); +#endif +} + +Error ExecutorSharedMemoryMapperService::deinitialize( + const std::vector<ExecutorAddr> &Bases) { + Error AllErr = Error::success(); + + { + std::lock_guard<std::mutex> Lock(Mutex); + + for (auto Base : Bases) { + if (Error Err = shared::runDeallocActions( + Allocations[Base].DeinitializationActions)) { + AllErr = joinErrors(std::move(AllErr), std::move(Err)); + } + + Allocations.erase(Base); + } + } + + return AllErr; +} + +Error ExecutorSharedMemoryMapperService::release( + const std::vector<ExecutorAddr> &Bases) { +#if defined(LLVM_ON_UNIX) || defined(_WIN32) + Error Err = Error::success(); + + for (auto Base : Bases) { + std::vector<ExecutorAddr> AllocAddrs; + size_t Size; + +#if defined(_WIN32) + HANDLE SharedMemoryFile; +#endif + + { + std::lock_guard<std::mutex> Lock(Mutex); + auto &R = Reservations[Base.toPtr<void *>()]; + Size = R.Size; + +#if defined(_WIN32) + SharedMemoryFile = R.SharedMemoryFile; +#endif + + AllocAddrs.swap(R.Allocations); + } + + // deinitialize sub allocations + if (Error E = deinitialize(AllocAddrs)) + Err = joinErrors(std::move(Err), std::move(E)); + +#if defined(LLVM_ON_UNIX) + + if (munmap(Base.toPtr<void *>(), Size) != 0) + Err = joinErrors(std::move(Err), errorCodeToError(std::error_code( + errno, std::generic_category()))); + +#elif defined(_WIN32) + + if (!UnmapViewOfFile(Base.toPtr<void *>())) + Err = joinErrors(std::move(Err), + errorCodeToError(mapWindowsError(GetLastError()))); + + CloseHandle(SharedMemoryFile); + +#endif + + std::lock_guard<std::mutex> Lock(Mutex); + Reservations.erase(Base.toPtr<void *>()); + } + + return Err; +#else + return make_error<StringError>( + "SharedMemoryMapper is not supported on this platform yet", + inconvertibleErrorCode()); +#endif +} + +Error ExecutorSharedMemoryMapperService::shutdown() { + std::vector<ExecutorAddr> ReservationAddrs; + if (!Reservations.empty()) { + std::lock_guard<std::mutex> Lock(Mutex); + { + ReservationAddrs.reserve(Reservations.size()); + for (const auto &R : Reservations) { + ReservationAddrs.push_back(ExecutorAddr::fromPtr(R.getFirst())); + } + } + } + return release(ReservationAddrs); + + return Error::success(); +} + +void ExecutorSharedMemoryMapperService::addBootstrapSymbols( + StringMap<ExecutorAddr> &M) { + M[rt::ExecutorSharedMemoryMapperServiceInstanceName] = + ExecutorAddr::fromPtr(this); + M[rt::ExecutorSharedMemoryMapperServiceReserveWrapperName] = + ExecutorAddr::fromPtr(&reserveWrapper); + M[rt::ExecutorSharedMemoryMapperServiceInitializeWrapperName] = + ExecutorAddr::fromPtr(&initializeWrapper); + M[rt::ExecutorSharedMemoryMapperServiceDeinitializeWrapperName] = + ExecutorAddr::fromPtr(&deinitializeWrapper); + M[rt::ExecutorSharedMemoryMapperServiceReleaseWrapperName] = + ExecutorAddr::fromPtr(&releaseWrapper); +} + +llvm::orc::shared::CWrapperFunctionResult +ExecutorSharedMemoryMapperService::reserveWrapper(const char *ArgData, + size_t ArgSize) { + return shared::WrapperFunction< + rt::SPSExecutorSharedMemoryMapperServiceReserveSignature>:: + handle(ArgData, ArgSize, + shared::makeMethodWrapperHandler( + &ExecutorSharedMemoryMapperService::reserve)) + .release(); +} + +llvm::orc::shared::CWrapperFunctionResult +ExecutorSharedMemoryMapperService::initializeWrapper(const char *ArgData, + size_t ArgSize) { + return shared::WrapperFunction< + rt::SPSExecutorSharedMemoryMapperServiceInitializeSignature>:: + handle(ArgData, ArgSize, + shared::makeMethodWrapperHandler( + &ExecutorSharedMemoryMapperService::initialize)) + .release(); +} + +llvm::orc::shared::CWrapperFunctionResult +ExecutorSharedMemoryMapperService::deinitializeWrapper(const char *ArgData, + size_t ArgSize) { + return shared::WrapperFunction< + rt::SPSExecutorSharedMemoryMapperServiceDeinitializeSignature>:: + handle(ArgData, ArgSize, + shared::makeMethodWrapperHandler( + &ExecutorSharedMemoryMapperService::deinitialize)) + .release(); +} + +llvm::orc::shared::CWrapperFunctionResult +ExecutorSharedMemoryMapperService::releaseWrapper(const char *ArgData, + size_t ArgSize) { + return shared::WrapperFunction< + rt::SPSExecutorSharedMemoryMapperServiceReleaseSignature>:: + handle(ArgData, ArgSize, + shared::makeMethodWrapperHandler( + &ExecutorSharedMemoryMapperService::release)) + .release(); +} + +} // namespace rt_bootstrap +} // end namespace orc +} // end namespace llvm diff --git a/llvm/lib/ExecutionEngine/Orc/TargetProcess/SimpleExecutorMemoryManager.cpp b/llvm/lib/ExecutionEngine/Orc/TargetProcess/SimpleExecutorMemoryManager.cpp index 7cadf3bb51a7..c848dd65fa7e 100644 --- a/llvm/lib/ExecutionEngine/Orc/TargetProcess/SimpleExecutorMemoryManager.cpp +++ b/llvm/lib/ExecutionEngine/Orc/TargetProcess/SimpleExecutorMemoryManager.cpp @@ -126,7 +126,8 @@ Error SimpleExecutorMemoryManager::finalize(tpctypes::FinalizeRequest &FR) { inconvertibleErrorCode())); char *Mem = Seg.Addr.toPtr<char *>(); - memcpy(Mem, Seg.Content.data(), Seg.Content.size()); + if (!Seg.Content.empty()) + memcpy(Mem, Seg.Content.data(), Seg.Content.size()); memset(Mem + Seg.Content.size(), 0, Seg.Size - Seg.Content.size()); assert(Seg.Size <= std::numeric_limits<size_t>::max()); if (auto EC = sys::Memory::protectMappedMemory( diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp index 574d9174bebf..cee4cddab5e8 100644 --- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp +++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp @@ -1453,7 +1453,36 @@ OpenMPIRBuilder::createTask(const LocationDescription &Loc, InsertPointTy(TaskAllocaBB, TaskAllocaBB->begin()); InsertPointTy TaskBodyIP = InsertPointTy(TaskBodyBB, TaskBodyBB->begin()); BodyGenCB(TaskAllocaIP, TaskBodyIP); - Builder.SetInsertPoint(TaskExitBB); + Builder.SetInsertPoint(TaskExitBB, TaskExitBB->begin()); + + return Builder.saveIP(); +} + +OpenMPIRBuilder::InsertPointTy +OpenMPIRBuilder::createTaskgroup(const LocationDescription &Loc, + InsertPointTy AllocaIP, + BodyGenCallbackTy BodyGenCB) { + if (!updateToLocation(Loc)) + return InsertPointTy(); + + uint32_t SrcLocStrSize; + Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize); + Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize); + Value *ThreadID = getOrCreateThreadID(Ident); + + // Emit the @__kmpc_taskgroup runtime call to start the taskgroup + Function *TaskgroupFn = + getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_taskgroup); + Builder.CreateCall(TaskgroupFn, {Ident, ThreadID}); + + BasicBlock *TaskgroupExitBB = splitBB(Builder, true, "taskgroup.exit"); + BodyGenCB(AllocaIP, Builder.saveIP()); + + Builder.SetInsertPoint(TaskgroupExitBB); + // Emit the @__kmpc_end_taskgroup runtime call to end the taskgroup + Function *EndTaskgroupFn = + getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_end_taskgroup); + Builder.CreateCall(EndTaskgroupFn, {Ident, ThreadID}); return Builder.saveIP(); } diff --git a/llvm/lib/IR/Function.cpp b/llvm/lib/IR/Function.cpp index 53df94366760..d4138133721e 100644 --- a/llvm/lib/IR/Function.cpp +++ b/llvm/lib/IR/Function.cpp @@ -354,6 +354,8 @@ Function *Function::createWithDefaultAttr(FunctionType *Ty, B.addAttribute("frame-pointer", "all"); break; } + if (M->getModuleFlag("function_return_thunk_extern")) + B.addAttribute(Attribute::FnRetThunkExtern); F->addFnAttrs(B); return F; } diff --git a/llvm/lib/IR/GCStrategy.cpp b/llvm/lib/IR/GCStrategy.cpp index f3bc5b74f8fd..5833dc26c57e 100644 --- a/llvm/lib/IR/GCStrategy.cpp +++ b/llvm/lib/IR/GCStrategy.cpp @@ -12,6 +12,7 @@ //===----------------------------------------------------------------------===// #include "llvm/IR/GCStrategy.h" +#include "llvm/ADT/Twine.h" using namespace llvm; @@ -32,7 +33,7 @@ std::unique_ptr<GCStrategy> llvm::getGCStrategy(const StringRef Name) { const std::string error = std::string("unsupported GC: ") + Name.str() + " (did you remember to link and initialize the library?)"; - report_fatal_error(error); + report_fatal_error(Twine(error)); } else - report_fatal_error(std::string("unsupported GC: ") + Name.str()); + report_fatal_error(Twine(std::string("unsupported GC: ") + Name.str())); } diff --git a/llvm/lib/IR/Globals.cpp b/llvm/lib/IR/Globals.cpp index 3265050261c8..51a22897babd 100644 --- a/llvm/lib/IR/Globals.cpp +++ b/llvm/lib/IR/Globals.cpp @@ -316,32 +316,38 @@ bool GlobalObject::canIncreaseAlignment() const { return true; } +template <typename Operation> static const GlobalObject * -findBaseObject(const Constant *C, DenseSet<const GlobalAlias *> &Aliases) { - if (auto *GO = dyn_cast<GlobalObject>(C)) +findBaseObject(const Constant *C, DenseSet<const GlobalAlias *> &Aliases, + const Operation &Op) { + if (auto *GO = dyn_cast<GlobalObject>(C)) { + Op(*GO); return GO; - if (auto *GA = dyn_cast<GlobalAlias>(C)) + } + if (auto *GA = dyn_cast<GlobalAlias>(C)) { + Op(*GA); if (Aliases.insert(GA).second) - return findBaseObject(GA->getOperand(0), Aliases); + return findBaseObject(GA->getOperand(0), Aliases, Op); + } if (auto *CE = dyn_cast<ConstantExpr>(C)) { switch (CE->getOpcode()) { case Instruction::Add: { - auto *LHS = findBaseObject(CE->getOperand(0), Aliases); - auto *RHS = findBaseObject(CE->getOperand(1), Aliases); + auto *LHS = findBaseObject(CE->getOperand(0), Aliases, Op); + auto *RHS = findBaseObject(CE->getOperand(1), Aliases, Op); if (LHS && RHS) return nullptr; return LHS ? LHS : RHS; } case Instruction::Sub: { - if (findBaseObject(CE->getOperand(1), Aliases)) + if (findBaseObject(CE->getOperand(1), Aliases, Op)) return nullptr; - return findBaseObject(CE->getOperand(0), Aliases); + return findBaseObject(CE->getOperand(0), Aliases, Op); } case Instruction::IntToPtr: case Instruction::PtrToInt: case Instruction::BitCast: case Instruction::GetElementPtr: - return findBaseObject(CE->getOperand(0), Aliases); + return findBaseObject(CE->getOperand(0), Aliases, Op); default: break; } @@ -351,7 +357,7 @@ findBaseObject(const Constant *C, DenseSet<const GlobalAlias *> &Aliases) { const GlobalObject *GlobalValue::getAliaseeObject() const { DenseSet<const GlobalAlias *> Aliases; - return findBaseObject(this, Aliases); + return findBaseObject(this, Aliases, [](const GlobalValue &) {}); } bool GlobalValue::isAbsoluteSymbolRef() const { @@ -544,7 +550,7 @@ void GlobalAlias::setAliasee(Constant *Aliasee) { const GlobalObject *GlobalAlias::getAliaseeObject() const { DenseSet<const GlobalAlias *> Aliases; - return findBaseObject(getOperand(0), Aliases); + return findBaseObject(getOperand(0), Aliases, [](const GlobalValue &) {}); } //===----------------------------------------------------------------------===// @@ -577,5 +583,12 @@ void GlobalIFunc::eraseFromParent() { const Function *GlobalIFunc::getResolverFunction() const { DenseSet<const GlobalAlias *> Aliases; - return dyn_cast<Function>(findBaseObject(getResolver(), Aliases)); + return dyn_cast<Function>( + findBaseObject(getResolver(), Aliases, [](const GlobalValue &) {})); +} + +void GlobalIFunc::applyAlongResolverPath( + function_ref<void(const GlobalValue &)> Op) const { + DenseSet<const GlobalAlias *> Aliases; + findBaseObject(getResolver(), Aliases, Op); } diff --git a/llvm/lib/IR/InlineAsm.cpp b/llvm/lib/IR/InlineAsm.cpp index c75b1aa7c1d6..088fcfdec742 100644 --- a/llvm/lib/IR/InlineAsm.cpp +++ b/llvm/lib/IR/InlineAsm.cpp @@ -93,6 +93,9 @@ bool InlineAsm::ConstraintInfo::Parse(StringRef Str, } else if (*I == '=') { ++I; Type = isOutput; + } else if (*I == '!') { + ++I; + Type = isLabel; } if (*I == '*') { @@ -265,14 +268,14 @@ Error InlineAsm::verify(FunctionType *Ty, StringRef ConstStr) { return makeStringError("failed to parse constraints"); unsigned NumOutputs = 0, NumInputs = 0, NumClobbers = 0; - unsigned NumIndirect = 0; + unsigned NumIndirect = 0, NumLabels = 0; for (const ConstraintInfo &Constraint : Constraints) { switch (Constraint.Type) { case InlineAsm::isOutput: - if ((NumInputs-NumIndirect) != 0 || NumClobbers != 0) - return makeStringError("output constraint occurs after input " - "or clobber constraint"); + if ((NumInputs-NumIndirect) != 0 || NumClobbers != 0 || NumLabels != 0) + return makeStringError("output constraint occurs after input, " + "clobber or label constraint"); if (!Constraint.isIndirect) { ++NumOutputs; @@ -289,6 +292,13 @@ Error InlineAsm::verify(FunctionType *Ty, StringRef ConstStr) { case InlineAsm::isClobber: ++NumClobbers; break; + case InlineAsm::isLabel: + if (NumClobbers) + return makeStringError("label constraint occurs after clobber " + "constraint"); + + ++NumLabels; + break; } } @@ -312,5 +322,7 @@ Error InlineAsm::verify(FunctionType *Ty, StringRef ConstStr) { if (Ty->getNumParams() != NumInputs) return makeStringError("number of input constraints does not match number " "of parameters"); + + // We don't have access to labels here, NumLabels will be checked separately. return Error::success(); } diff --git a/llvm/lib/IR/Instructions.cpp b/llvm/lib/IR/Instructions.cpp index b333f40f3ce9..26171f537244 100644 --- a/llvm/lib/IR/Instructions.cpp +++ b/llvm/lib/IR/Instructions.cpp @@ -960,15 +960,10 @@ void CallBrInst::init(FunctionType *FTy, Value *Fn, BasicBlock *Fallthrough, setName(NameStr); } -void CallBrInst::updateArgBlockAddresses(unsigned i, BasicBlock *B) { - assert(getNumIndirectDests() > i && "IndirectDest # out of range for callbr"); - if (BasicBlock *OldBB = getIndirectDest(i)) { - BlockAddress *Old = BlockAddress::get(OldBB); - BlockAddress *New = BlockAddress::get(B); - for (unsigned ArgNo = 0, e = arg_size(); ArgNo != e; ++ArgNo) - if (dyn_cast<BlockAddress>(getArgOperand(ArgNo)) == Old) - setArgOperand(ArgNo, New); - } +BlockAddress * +CallBrInst::getBlockAddressForIndirectDest(unsigned DestNo) const { + return BlockAddress::get(const_cast<Function *>(getFunction()), + getIndirectDest(DestNo)); } CallBrInst::CallBrInst(const CallBrInst &CBI) diff --git a/llvm/lib/IR/IntrinsicInst.cpp b/llvm/lib/IR/IntrinsicInst.cpp index 65a9a32ad2c5..c50d6901c9da 100644 --- a/llvm/lib/IR/IntrinsicInst.cpp +++ b/llvm/lib/IR/IntrinsicInst.cpp @@ -694,8 +694,10 @@ unsigned BinaryOpIntrinsic::getNoWrapKind() const { return OverflowingBinaryOperator::NoUnsignedWrap; } -const GCStatepointInst *GCProjectionInst::getStatepoint() const { +const Value *GCProjectionInst::getStatepoint() const { const Value *Token = getArgOperand(0); + if (isa<UndefValue>(Token)) + return Token; // This takes care both of relocates for call statepoints and relocates // on normal path of invoke statepoint. @@ -714,13 +716,23 @@ const GCStatepointInst *GCProjectionInst::getStatepoint() const { } Value *GCRelocateInst::getBasePtr() const { - if (auto Opt = getStatepoint()->getOperandBundle(LLVMContext::OB_gc_live)) + auto Statepoint = getStatepoint(); + if (isa<UndefValue>(Statepoint)) + return UndefValue::get(Statepoint->getType()); + + auto *GCInst = cast<GCStatepointInst>(Statepoint); + if (auto Opt = GCInst->getOperandBundle(LLVMContext::OB_gc_live)) return *(Opt->Inputs.begin() + getBasePtrIndex()); - return *(getStatepoint()->arg_begin() + getBasePtrIndex()); + return *(GCInst->arg_begin() + getBasePtrIndex()); } Value *GCRelocateInst::getDerivedPtr() const { - if (auto Opt = getStatepoint()->getOperandBundle(LLVMContext::OB_gc_live)) + auto *Statepoint = getStatepoint(); + if (isa<UndefValue>(Statepoint)) + return UndefValue::get(Statepoint->getType()); + + auto *GCInst = cast<GCStatepointInst>(Statepoint); + if (auto Opt = GCInst->getOperandBundle(LLVMContext::OB_gc_live)) return *(Opt->Inputs.begin() + getDerivedPtrIndex()); - return *(getStatepoint()->arg_begin() + getDerivedPtrIndex()); + return *(GCInst->arg_begin() + getDerivedPtrIndex()); } diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp index 75d02f4c8c82..e3ea256af16d 100644 --- a/llvm/lib/IR/Verifier.cpp +++ b/llvm/lib/IR/Verifier.cpp @@ -469,6 +469,9 @@ private: void visitRangeMetadata(Instruction &I, MDNode *Range, Type *Ty); void visitDereferenceableMetadata(Instruction &I, MDNode *MD); void visitProfMetadata(Instruction &I, MDNode *MD); + void visitCallStackMetadata(MDNode *MD); + void visitMemProfMetadata(Instruction &I, MDNode *MD); + void visitCallsiteMetadata(Instruction &I, MDNode *MD); void visitAnnotationMetadata(MDNode *Annotation); void visitAliasScopeMetadata(const MDNode *MD); void visitAliasScopeListMetadata(const MDNode *MD); @@ -1624,8 +1627,10 @@ Verifier::visitModuleFlag(const MDNode *Op, break; case Module::Min: { - Check(mdconst::dyn_extract_or_null<ConstantInt>(Op->getOperand(2)), - "invalid value for 'min' module flag (expected constant integer)", + auto *V = mdconst::dyn_extract_or_null<ConstantInt>(Op->getOperand(2)); + Check(V && V->getValue().isNonNegative(), + "invalid value for 'min' module flag (expected constant non-negative " + "integer)", Op->getOperand(2)); break; } @@ -2200,7 +2205,13 @@ bool Verifier::verifyAttributeCount(AttributeList Attrs, unsigned Params) { void Verifier::verifyInlineAsmCall(const CallBase &Call) { const InlineAsm *IA = cast<InlineAsm>(Call.getCalledOperand()); unsigned ArgNo = 0; + unsigned LabelNo = 0; for (const InlineAsm::ConstraintInfo &CI : IA->ParseConstraints()) { + if (CI.Type == InlineAsm::isLabel) { + ++LabelNo; + continue; + } + // Only deal with constraints that correspond to call arguments. if (!CI.hasArg()) continue; @@ -2222,6 +2233,15 @@ void Verifier::verifyInlineAsmCall(const CallBase &Call) { ArgNo++; } + + if (auto *CallBr = dyn_cast<CallBrInst>(&Call)) { + Check(LabelNo == CallBr->getNumIndirectDests(), + "Number of label constraints does not match number of callbr dests", + &Call); + } else { + Check(LabelNo == 0, "Label constraints can only be used with callbr", + &Call); + } } /// Verify that statepoint intrinsic is well formed. @@ -2839,25 +2859,6 @@ void Verifier::visitCallBrInst(CallBrInst &CBI) { Check(CBI.isInlineAsm(), "Callbr is currently only used for asm-goto!", &CBI); const InlineAsm *IA = cast<InlineAsm>(CBI.getCalledOperand()); Check(!IA->canThrow(), "Unwinding from Callbr is not allowed"); - for (unsigned i = 0, e = CBI.getNumSuccessors(); i != e; ++i) - Check(CBI.getSuccessor(i)->getType()->isLabelTy(), - "Callbr successors must all have pointer type!", &CBI); - for (unsigned i = 0, e = CBI.getNumOperands(); i != e; ++i) { - Check(i >= CBI.arg_size() || !isa<BasicBlock>(CBI.getOperand(i)), - "Using an unescaped label as a callbr argument!", &CBI); - if (isa<BasicBlock>(CBI.getOperand(i))) - for (unsigned j = i + 1; j != e; ++j) - Check(CBI.getOperand(i) != CBI.getOperand(j), - "Duplicate callbr destination!", &CBI); - } - { - SmallPtrSet<BasicBlock *, 4> ArgBBs; - for (Value *V : CBI.args()) - if (auto *BA = dyn_cast<BlockAddress>(V)) - ArgBBs.insert(BA->getBasicBlock()); - for (BasicBlock *BB : CBI.getIndirectDests()) - Check(ArgBBs.count(BB), "Indirect label missing from arglist.", &CBI); - } verifyInlineAsmCall(CBI); visitTerminator(CBI); @@ -4489,6 +4490,55 @@ void Verifier::visitProfMetadata(Instruction &I, MDNode *MD) { } } +void Verifier::visitCallStackMetadata(MDNode *MD) { + // Call stack metadata should consist of a list of at least 1 constant int + // (representing a hash of the location). + Check(MD->getNumOperands() >= 1, + "call stack metadata should have at least 1 operand", MD); + + for (const auto &Op : MD->operands()) + Check(mdconst::dyn_extract_or_null<ConstantInt>(Op), + "call stack metadata operand should be constant integer", Op); +} + +void Verifier::visitMemProfMetadata(Instruction &I, MDNode *MD) { + Check(isa<CallBase>(I), "!memprof metadata should only exist on calls", &I); + Check(MD->getNumOperands() >= 1, + "!memprof annotations should have at least 1 metadata operand " + "(MemInfoBlock)", + MD); + + // Check each MIB + for (auto &MIBOp : MD->operands()) { + MDNode *MIB = dyn_cast<MDNode>(MIBOp); + // The first operand of an MIB should be the call stack metadata. + // There rest of the operands should be MDString tags, and there should be + // at least one. + Check(MIB->getNumOperands() >= 2, + "Each !memprof MemInfoBlock should have at least 2 operands", MIB); + + // Check call stack metadata (first operand). + Check(MIB->getOperand(0) != nullptr, + "!memprof MemInfoBlock first operand should not be null", MIB); + Check(isa<MDNode>(MIB->getOperand(0)), + "!memprof MemInfoBlock first operand should be an MDNode", MIB); + MDNode *StackMD = dyn_cast<MDNode>(MIB->getOperand(0)); + visitCallStackMetadata(StackMD); + + // Check that remaining operands are MDString. + Check(std::all_of(MIB->op_begin() + 1, MIB->op_end(), + [](const MDOperand &Op) { return isa<MDString>(Op); }), + "Not all !memprof MemInfoBlock operands 1 to N are MDString", MIB); + } +} + +void Verifier::visitCallsiteMetadata(Instruction &I, MDNode *MD) { + Check(isa<CallBase>(I), "!callsite metadata should only exist on calls", &I); + // Verify the partial callstack annotated from memprof profiles. This callsite + // is a part of a profiled allocation callstack. + visitCallStackMetadata(MD); +} + void Verifier::visitAnnotationMetadata(MDNode *Annotation) { Check(isa<MDTuple>(Annotation), "annotation must be a tuple"); Check(Annotation->getNumOperands() >= 1, @@ -4735,6 +4785,12 @@ void Verifier::visitInstruction(Instruction &I) { if (MDNode *MD = I.getMetadata(LLVMContext::MD_prof)) visitProfMetadata(I, MD); + if (MDNode *MD = I.getMetadata(LLVMContext::MD_memprof)) + visitMemProfMetadata(I, MD); + + if (MDNode *MD = I.getMetadata(LLVMContext::MD_callsite)) + visitCallsiteMetadata(I, MD); + if (MDNode *Annotation = I.getMetadata(LLVMContext::MD_annotation)) visitAnnotationMetadata(Annotation); @@ -5160,14 +5216,13 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) { // In all other cases relocate should be tied to the statepoint directly. // This covers relocates on a normal return path of invoke statepoint and // relocates of a call statepoint. - auto Token = Call.getArgOperand(0); - Check(isa<GCStatepointInst>(Token), + auto *Token = Call.getArgOperand(0); + Check(isa<GCStatepointInst>(Token) || isa<UndefValue>(Token), "gc relocate is incorrectly tied to the statepoint", Call, Token); } // Verify rest of the relocate arguments. - const CallBase &StatepointCall = - *cast<GCRelocateInst>(Call).getStatepoint(); + const Value &StatepointCall = *cast<GCRelocateInst>(Call).getStatepoint(); // Both the base and derived must be piped through the safepoint. Value *Base = Call.getArgOperand(1); @@ -5182,7 +5237,10 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) { const uint64_t DerivedIndex = cast<ConstantInt>(Derived)->getZExtValue(); // Check the bounds - if (auto Opt = StatepointCall.getOperandBundle(LLVMContext::OB_gc_live)) { + if (isa<UndefValue>(StatepointCall)) + break; + if (auto Opt = cast<GCStatepointInst>(StatepointCall) + .getOperandBundle(LLVMContext::OB_gc_live)) { Check(BaseIndex < Opt->Inputs.size(), "gc.relocate: statepoint base index out of bounds", Call); Check(DerivedIndex < Opt->Inputs.size(), diff --git a/llvm/lib/Linker/IRMover.cpp b/llvm/lib/Linker/IRMover.cpp index 9e89cce8312e..e31faf6422ed 100644 --- a/llvm/lib/Linker/IRMover.cpp +++ b/llvm/lib/Linker/IRMover.cpp @@ -1273,14 +1273,19 @@ Error IRLinker::linkModuleFlagsMetadata() { // First build a map of the existing module flags and requirements. DenseMap<MDString *, std::pair<MDNode *, unsigned>> Flags; SmallSetVector<MDNode *, 16> Requirements; + SmallVector<unsigned, 0> Mins; + DenseSet<MDString *> SeenMin; for (unsigned I = 0, E = DstModFlags->getNumOperands(); I != E; ++I) { MDNode *Op = DstModFlags->getOperand(I); - ConstantInt *Behavior = mdconst::extract<ConstantInt>(Op->getOperand(0)); + uint64_t Behavior = + mdconst::extract<ConstantInt>(Op->getOperand(0))->getZExtValue(); MDString *ID = cast<MDString>(Op->getOperand(1)); - if (Behavior->getZExtValue() == Module::Require) { + if (Behavior == Module::Require) { Requirements.insert(cast<MDNode>(Op->getOperand(2))); } else { + if (Behavior == Module::Min) + Mins.push_back(I); Flags[ID] = std::make_pair(Op, I); } } @@ -1296,6 +1301,7 @@ Error IRLinker::linkModuleFlagsMetadata() { unsigned DstIndex; std::tie(DstOp, DstIndex) = Flags.lookup(ID); unsigned SrcBehaviorValue = SrcBehavior->getZExtValue(); + SeenMin.insert(ID); // If this is a requirement, add it and continue. if (SrcBehaviorValue == Module::Require) { @@ -1309,6 +1315,10 @@ Error IRLinker::linkModuleFlagsMetadata() { // If there is no existing flag with this ID, just add it. if (!DstOp) { + if (SrcBehaviorValue == Module::Min) { + Mins.push_back(DstModFlags->getNumOperands()); + SeenMin.erase(ID); + } Flags[ID] = std::make_pair(SrcOp, DstModFlags->getNumOperands()); DstModFlags->addOperand(SrcOp); continue; @@ -1362,8 +1372,10 @@ Error IRLinker::linkModuleFlagsMetadata() { "Expected MDTuple when appending module flags"); if (DstValue->isDistinct()) return dyn_cast<MDTuple>(DstValue); + ArrayRef<MDOperand> DstOperands = DstValue->operands(); MDTuple *New = MDTuple::getDistinct( - DstM.getContext(), SmallVector<Metadata *, 4>(DstValue->operands())); + DstM.getContext(), + SmallVector<Metadata *, 4>(DstOperands.begin(), DstOperands.end())); Metadata *FlagOps[] = {DstOp->getOperand(0), ID, New}; MDNode *Flag = MDTuple::getDistinct(DstM.getContext(), FlagOps); DstModFlags->setOperand(DstIndex, Flag); @@ -1465,6 +1477,20 @@ Error IRLinker::linkModuleFlagsMetadata() { } + // For the Min behavior, set the value to 0 if either module does not have the + // flag. + for (auto Idx : Mins) { + MDNode *Op = DstModFlags->getOperand(Idx); + MDString *ID = cast<MDString>(Op->getOperand(1)); + if (!SeenMin.count(ID)) { + ConstantInt *V = mdconst::extract<ConstantInt>(Op->getOperand(2)); + Metadata *FlagOps[] = { + Op->getOperand(0), ID, + ConstantAsMetadata::get(ConstantInt::get(V->getType(), 0))}; + DstModFlags->setOperand(Idx, MDNode::get(DstM.getContext(), FlagOps)); + } + } + // Check all of the requirements. for (unsigned I = 0, E = Requirements.size(); I != E; ++I) { MDNode *Requirement = Requirements[I]; diff --git a/llvm/lib/MC/ELFObjectWriter.cpp b/llvm/lib/MC/ELFObjectWriter.cpp index 78204ffe4c3b..0b4e9866d50a 100644 --- a/llvm/lib/MC/ELFObjectWriter.cpp +++ b/llvm/lib/MC/ELFObjectWriter.cpp @@ -144,9 +144,9 @@ struct ELFWriter { uint64_t align(unsigned Alignment); - bool maybeWriteCompression(uint64_t Size, + bool maybeWriteCompression(uint32_t ChType, uint64_t Size, SmallVectorImpl<uint8_t> &CompressedContents, - bool ZLibStyle, unsigned Alignment); + unsigned Alignment); public: ELFWriter(ELFObjectWriter &OWriter, raw_pwrite_stream &OS, @@ -819,36 +819,25 @@ MCSectionELF *ELFWriter::createRelocationSection(MCContext &Ctx, // Include the debug info compression header. bool ELFWriter::maybeWriteCompression( - uint64_t Size, SmallVectorImpl<uint8_t> &CompressedContents, bool ZLibStyle, - unsigned Alignment) { - if (ZLibStyle) { - uint64_t HdrSize = - is64Bit() ? sizeof(ELF::Elf32_Chdr) : sizeof(ELF::Elf64_Chdr); - if (Size <= HdrSize + CompressedContents.size()) - return false; - // Platform specific header is followed by compressed data. - if (is64Bit()) { - // Write Elf64_Chdr header. - write(static_cast<ELF::Elf64_Word>(ELF::ELFCOMPRESS_ZLIB)); - write(static_cast<ELF::Elf64_Word>(0)); // ch_reserved field. - write(static_cast<ELF::Elf64_Xword>(Size)); - write(static_cast<ELF::Elf64_Xword>(Alignment)); - } else { - // Write Elf32_Chdr header otherwise. - write(static_cast<ELF::Elf32_Word>(ELF::ELFCOMPRESS_ZLIB)); - write(static_cast<ELF::Elf32_Word>(Size)); - write(static_cast<ELF::Elf32_Word>(Alignment)); - } - return true; - } - - // "ZLIB" followed by 8 bytes representing the uncompressed size of the section, - // useful for consumers to preallocate a buffer to decompress into. - const StringRef Magic = "ZLIB"; - if (Size <= Magic.size() + sizeof(Size) + CompressedContents.size()) + uint32_t ChType, uint64_t Size, + SmallVectorImpl<uint8_t> &CompressedContents, unsigned Alignment) { + uint64_t HdrSize = + is64Bit() ? sizeof(ELF::Elf32_Chdr) : sizeof(ELF::Elf64_Chdr); + if (Size <= HdrSize + CompressedContents.size()) return false; - W.OS << Magic; - support::endian::write(W.OS, Size, support::big); + // Platform specific header is followed by compressed data. + if (is64Bit()) { + // Write Elf64_Chdr header. + write(static_cast<ELF::Elf64_Word>(ChType)); + write(static_cast<ELF::Elf64_Word>(0)); // ch_reserved field. + write(static_cast<ELF::Elf64_Xword>(Size)); + write(static_cast<ELF::Elf64_Xword>(Alignment)); + } else { + // Write Elf32_Chdr header otherwise. + write(static_cast<ELF::Elf32_Word>(ChType)); + write(static_cast<ELF::Elf32_Word>(Size)); + write(static_cast<ELF::Elf32_Word>(Alignment)); + } return true; } @@ -867,38 +856,31 @@ void ELFWriter::writeSectionData(const MCAssembler &Asm, MCSection &Sec, return; } - assert((MAI->compressDebugSections() == DebugCompressionType::Z || - MAI->compressDebugSections() == DebugCompressionType::GNU) && - "expected zlib or zlib-gnu style compression"); + assert(MAI->compressDebugSections() == DebugCompressionType::Z && + "expected zlib style compression"); SmallVector<char, 128> UncompressedData; raw_svector_ostream VecOS(UncompressedData); Asm.writeSectionData(VecOS, &Section, Layout); - SmallVector<uint8_t, 128> CompressedContents; + SmallVector<uint8_t, 128> Compressed; + const uint32_t ChType = ELF::ELFCOMPRESS_ZLIB; compression::zlib::compress( makeArrayRef(reinterpret_cast<uint8_t *>(UncompressedData.data()), UncompressedData.size()), - CompressedContents); + Compressed); - bool ZlibStyle = MAI->compressDebugSections() == DebugCompressionType::Z; - if (!maybeWriteCompression(UncompressedData.size(), CompressedContents, - ZlibStyle, Sec.getAlignment())) { + if (!maybeWriteCompression(ChType, UncompressedData.size(), Compressed, + Sec.getAlignment())) { W.OS << UncompressedData; return; } - if (ZlibStyle) { - // Set the compressed flag. That is zlib style. - Section.setFlags(Section.getFlags() | ELF::SHF_COMPRESSED); - // Alignment field should reflect the requirements of - // the compressed section header. - Section.setAlignment(is64Bit() ? Align(8) : Align(4)); - } else { - // Add "z" prefix to section name. This is zlib-gnu style. - MC.renameELFSection(&Section, (".z" + SectionName.drop_front(1)).str()); - } - W.OS << toStringRef(CompressedContents); + Section.setFlags(Section.getFlags() | ELF::SHF_COMPRESSED); + // Alignment field should reflect the requirements of + // the compressed section header. + Section.setAlignment(is64Bit() ? Align(8) : Align(4)); + W.OS << toStringRef(Compressed); } void ELFWriter::WriteSecHdrEntry(uint32_t Name, uint32_t Type, uint64_t Flags, diff --git a/llvm/lib/MC/MCContext.cpp b/llvm/lib/MC/MCContext.cpp index d312e3521c9e..322ed8e23eb6 100644 --- a/llvm/lib/MC/MCContext.cpp +++ b/llvm/lib/MC/MCContext.cpp @@ -468,24 +468,6 @@ MCSectionMachO *MCContext::getMachOSection(StringRef Segment, StringRef Section, return R.first->second; } -void MCContext::renameELFSection(MCSectionELF *Section, StringRef Name) { - StringRef GroupName; - if (const MCSymbol *Group = Section->getGroup()) - GroupName = Group->getName(); - - // This function is only used by .debug*, which should not have the - // SHF_LINK_ORDER flag. - unsigned UniqueID = Section->getUniqueID(); - ELFUniquingMap.erase( - ELFSectionKey{Section->getName(), GroupName, "", UniqueID}); - auto I = ELFUniquingMap - .insert(std::make_pair( - ELFSectionKey{Name, GroupName, "", UniqueID}, Section)) - .first; - StringRef CachedName = I->first.SectionName; - const_cast<MCSectionELF *>(Section)->setSectionName(CachedName); -} - MCSectionELF *MCContext::createELFSectionImpl(StringRef Section, unsigned Type, unsigned Flags, SectionKind K, unsigned EntrySize, diff --git a/llvm/lib/MC/MCMachOStreamer.cpp b/llvm/lib/MC/MCMachOStreamer.cpp index 9f22b9b0a866..f358f593ff39 100644 --- a/llvm/lib/MC/MCMachOStreamer.cpp +++ b/llvm/lib/MC/MCMachOStreamer.cpp @@ -583,15 +583,27 @@ MCStreamer *llvm::createMachOStreamer(MCContext &Context, return S; } -// Create the AddrSig section and first data fragment here as its layout needs -// to be computed immediately after in order for it to be exported correctly. +// The AddrSig section uses a series of relocations to refer to the symbols that +// should be considered address-significant. The only interesting content of +// these relocations is their symbol; the type, length etc will be ignored by +// the linker. The reason we are not referring to the symbol indices directly is +// that those indices will be invalidated by tools that update the symbol table. +// Symbol relocations OTOH will have their indices updated by e.g. llvm-strip. void MCMachOStreamer::createAddrSigSection() { MCAssembler &Asm = getAssembler(); MCObjectWriter &writer = Asm.getWriter(); if (!writer.getEmitAddrsigSection()) return; + // Create the AddrSig section and first data fragment here as its layout needs + // to be computed immediately after in order for it to be exported correctly. MCSection *AddrSigSection = Asm.getContext().getObjectFileInfo()->getAddrSigSection(); Asm.registerSection(*AddrSigSection); - new MCDataFragment(AddrSigSection); + auto *Frag = new MCDataFragment(AddrSigSection); + // We will generate a series of pointer-sized symbol relocations at offset + // 0x0. Set the section size to be large enough to contain a single pointer + // (instead of emitting a zero-sized section) so these relocations are + // technically valid, even though we don't expect these relocations to + // actually be applied by the linker. + Frag->getContents().resize(8); } diff --git a/llvm/lib/MC/MCParser/MasmParser.cpp b/llvm/lib/MC/MCParser/MasmParser.cpp index 694ea395fdec..af80d8327210 100644 --- a/llvm/lib/MC/MCParser/MasmParser.cpp +++ b/llvm/lib/MC/MCParser/MasmParser.cpp @@ -4238,11 +4238,8 @@ bool MasmParser::parseStructInitializer(const StructInfo &Structure, } } // Default-initialize all remaining fields. - for (auto It = Structure.Fields.begin() + FieldIndex; - It != Structure.Fields.end(); ++It) { - const FieldInfo &Field = *It; + for (const FieldInfo &Field : llvm::drop_begin(Structure.Fields, FieldIndex)) FieldInitializers.push_back(Field.Contents); - } if (EndToken) { if (EndToken.value() == AsmToken::Greater) @@ -4350,9 +4347,8 @@ bool MasmParser::emitFieldInitializer(const FieldInfo &Field, return true; } // Default-initialize all remaining values. - for (auto it = Contents.Values.begin() + Initializer.Values.size(); - it != Contents.Values.end(); ++it) { - const auto &Value = *it; + for (const auto &Value : + llvm::drop_begin(Contents.Values, Initializer.Values.size())) { if (emitIntValue(Value, Field.Type)) return true; } @@ -4367,9 +4363,8 @@ bool MasmParser::emitFieldInitializer(const FieldInfo &Field, AsInt.getBitWidth() / 8); } // Default-initialize all remaining values. - for (auto It = Contents.AsIntValues.begin() + Initializer.AsIntValues.size(); - It != Contents.AsIntValues.end(); ++It) { - const auto &AsInt = *It; + for (const auto &AsInt : + llvm::drop_begin(Contents.AsIntValues, Initializer.AsIntValues.size())) { getStreamer().emitIntValue(AsInt.getLimitedValue(), AsInt.getBitWidth() / 8); } @@ -4384,10 +4379,8 @@ bool MasmParser::emitFieldInitializer(const FieldInfo &Field, return true; } // Default-initialize all remaining values. - for (auto It = - Contents.Initializers.begin() + Initializer.Initializers.size(); - It != Contents.Initializers.end(); ++It) { - const auto &Init = *It; + for (const auto &Init : llvm::drop_begin(Contents.Initializers, + Initializer.Initializers.size())) { if (emitStructInitializer(Contents.Structure, Init)) return true; } @@ -4425,10 +4418,8 @@ bool MasmParser::emitStructInitializer(const StructInfo &Structure, return true; } // Default-initialize all remaining fields. - for (auto It = - Structure.Fields.begin() + Initializer.FieldInitializers.size(); - It != Structure.Fields.end(); ++It) { - const auto &Field = *It; + for (const auto &Field : llvm::drop_begin( + Structure.Fields, Initializer.FieldInitializers.size())) { getStreamer().emitZeros(Field.Offset - Offset); Offset = Field.Offset + Field.SizeOf; if (emitFieldValue(Field)) @@ -4649,10 +4640,8 @@ bool MasmParser::parseDirectiveNestedEnds() { if (ParentStruct.IsUnion) { ParentStruct.Size = std::max(ParentStruct.Size, Structure.Size); } else { - for (auto FieldIter = ParentStruct.Fields.begin() + OldFields; - FieldIter != ParentStruct.Fields.end(); ++FieldIter) { - FieldIter->Offset += FirstFieldOffset; - } + for (auto &Field : llvm::drop_begin(ParentStruct.Fields, OldFields)) + Field.Offset += FirstFieldOffset; const unsigned StructureEnd = FirstFieldOffset + Structure.Size; if (!ParentStruct.IsUnion) { diff --git a/llvm/lib/MC/MCPseudoProbe.cpp b/llvm/lib/MC/MCPseudoProbe.cpp index 5277ce87bee0..fdf8bbbe0a4d 100644 --- a/llvm/lib/MC/MCPseudoProbe.cpp +++ b/llvm/lib/MC/MCPseudoProbe.cpp @@ -7,6 +7,7 @@ //===----------------------------------------------------------------------===// #include "llvm/MC/MCPseudoProbe.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCExpr.h" @@ -519,7 +520,7 @@ void MCPseudoProbeDecoder::printProbesForAllAddresses(raw_ostream &OS) { std::vector<uint64_t> Addresses; for (auto Entry : Address2ProbesMap) Addresses.push_back(Entry.first); - std::sort(Addresses.begin(), Addresses.end()); + llvm::sort(Addresses); for (auto K : Addresses) { OS << "Address:\t"; OS << K; diff --git a/llvm/lib/MC/MachObjectWriter.cpp b/llvm/lib/MC/MachObjectWriter.cpp index 78d0d9cec556..038433cb24fa 100644 --- a/llvm/lib/MC/MachObjectWriter.cpp +++ b/llvm/lib/MC/MachObjectWriter.cpp @@ -753,32 +753,27 @@ static MachO::LoadCommandType getLCFromMCVM(MCVersionMinType Type) { llvm_unreachable("Invalid mc version min type"); } -// Encode addrsig data as symbol indexes in variable length encoding. -void MachObjectWriter::writeAddrsigSection(MCAssembler &Asm) { +void MachObjectWriter::populateAddrSigSection(MCAssembler &Asm) { MCSection *AddrSigSection = Asm.getContext().getObjectFileInfo()->getAddrSigSection(); - MCSection::FragmentListType &fragmentList = AddrSigSection->getFragmentList(); - if (!fragmentList.size()) - return; - - assert(fragmentList.size() == 1); - MCFragment *pFragment = &*fragmentList.begin(); - MCDataFragment *pDataFragment = dyn_cast_or_null<MCDataFragment>(pFragment); - assert(pDataFragment); - - raw_svector_ostream OS(pDataFragment->getContents()); - for (const MCSymbol *sym : this->getAddrsigSyms()) - encodeULEB128(sym->getIndex(), OS); + unsigned Log2Size = is64Bit() ? 3 : 2; + for (const MCSymbol *S : getAddrsigSyms()) { + MachO::any_relocation_info MRE; + MRE.r_word0 = 0; + MRE.r_word1 = (Log2Size << 25) | (MachO::GENERIC_RELOC_VANILLA << 28); + addRelocation(S, AddrSigSection, MRE); + } } uint64_t MachObjectWriter::writeObject(MCAssembler &Asm, const MCAsmLayout &Layout) { uint64_t StartOffset = W.OS.tell(); + populateAddrSigSection(Asm); + // Compute symbol table information and bind symbol indices. computeSymbolTable(Asm, LocalSymbolData, ExternalSymbolData, UndefinedSymbolData); - writeAddrsigSection(Asm); if (!Asm.CGProfile.empty()) { MCSection *CGProfileSection = Asm.getContext().getMachOSection( diff --git a/llvm/lib/MC/WinCOFFObjectWriter.cpp b/llvm/lib/MC/WinCOFFObjectWriter.cpp index 33e496b7a864..809ac37c3442 100644 --- a/llvm/lib/MC/WinCOFFObjectWriter.cpp +++ b/llvm/lib/MC/WinCOFFObjectWriter.cpp @@ -169,6 +169,7 @@ public: Strings.clear(); SectionMap.clear(); SymbolMap.clear(); + WeakDefaults.clear(); MCObjectWriter::reset(); } diff --git a/llvm/lib/MC/XCOFFObjectWriter.cpp b/llvm/lib/MC/XCOFFObjectWriter.cpp index 977e77bf67fd..d46ae2247535 100644 --- a/llvm/lib/MC/XCOFFObjectWriter.cpp +++ b/llvm/lib/MC/XCOFFObjectWriter.cpp @@ -206,6 +206,7 @@ class XCOFFObjectWriter : public MCObjectWriter { uint16_t SectionCount = 0; uint64_t RelocationEntryOffset = 0; std::vector<std::pair<std::string, size_t>> FileNames; + bool HasVisibility = false; support::endian::Writer W; std::unique_ptr<MCXCOFFObjectTargetWriter> TargetObjectWriter; @@ -275,6 +276,7 @@ class XCOFFObjectWriter : public MCObjectWriter { void writeSymbolEntryForDwarfSection(const XCOFFSection &DwarfSectionRef, int16_t SectionIndex); void writeFileHeader(); + void writeAuxFileHeader(); void writeSectionHeaderTable(); void writeSections(const MCAssembler &Asm, const MCAsmLayout &Layout); void writeSectionForControlSectionEntry(const MCAssembler &Asm, @@ -308,14 +310,9 @@ class XCOFFObjectWriter : public MCObjectWriter { void assignAddressesAndIndices(const MCAsmLayout &); void finalizeSectionInfo(); - // TODO aux header support not implemented. - bool needsAuxiliaryHeader() const { return false; } - - // Returns the size of the auxiliary header to be written to the object file. size_t auxiliaryHeaderSize() const { - assert(!needsAuxiliaryHeader() && - "Auxiliary header support not implemented."); - return 0; + // 64-bit object files have no auxiliary header. + return HasVisibility && !is64Bit() ? XCOFF::AuxFileHeaderSizeShort : 0; } public: @@ -468,6 +465,9 @@ void XCOFFObjectWriter::executePostLayoutBinding(MCAssembler &Asm, const MCSymbolXCOFF *XSym = cast<MCSymbolXCOFF>(&S); const MCSectionXCOFF *ContainingCsect = getContainingCsect(XSym); + if (XSym->getVisibilityType() != XCOFF::SYM_V_UNSPECIFIED) + HasVisibility = true; + if (ContainingCsect->getCSectType() == XCOFF::XTY_ER) { // Handle undefined symbol. UndefinedCsects.emplace_back(ContainingCsect); @@ -648,6 +648,7 @@ uint64_t XCOFFObjectWriter::writeObject(MCAssembler &Asm, uint64_t StartOffset = W.OS.tell(); writeFileHeader(); + writeAuxFileHeader(); writeSectionHeaderTable(); writeSections(Asm, Layout); writeRelocations(); @@ -688,12 +689,6 @@ void XCOFFObjectWriter::writeSymbolEntry(StringRef SymbolName, uint64_t Value, W.write<uint32_t>(Value); } W.write<int16_t>(SectionNumber); - // Basic/Derived type. See the description of the n_type field for symbol - // table entries for a detailed description. Since we don't yet support - // visibility, and all other bits are either optionally set or reserved, this - // is always zero. - if (SymbolType != 0) - report_fatal_error("Emitting non-zero visibilities is not supported yet."); // TODO Set the function indicator (bit 10, 0x0020) for functions // when debugging is enabled. W.write<uint16_t>(SymbolType); @@ -773,18 +768,32 @@ void XCOFFObjectWriter::writeFileHeader() { W.write<int32_t>(0); // TimeStamp writeWord(SymbolTableOffset); if (is64Bit()) { - W.write<uint16_t>(0); // AuxHeaderSize. No optional header for an object - // file that is not to be loaded. + W.write<uint16_t>(auxiliaryHeaderSize()); W.write<uint16_t>(0); // Flags W.write<int32_t>(SymbolTableEntryCount); } else { W.write<int32_t>(SymbolTableEntryCount); - W.write<uint16_t>(0); // AuxHeaderSize. No optional header for an object - // file that is not to be loaded. + W.write<uint16_t>(auxiliaryHeaderSize()); W.write<uint16_t>(0); // Flags } } +void XCOFFObjectWriter::writeAuxFileHeader() { + if (!auxiliaryHeaderSize()) + return; + W.write<uint16_t>(0); // Magic + W.write<uint16_t>( + XCOFF::NEW_XCOFF_INTERPRET); // Version. The new interpretation of the + // n_type field in the symbol table entry is + // used in XCOFF32. + W.write<uint32_t>(Sections[0]->Size); // TextSize + W.write<uint32_t>(Sections[1]->Size); // InitDataSize + W.write<uint32_t>(Sections[2]->Size); // BssDataSize + W.write<uint32_t>(0); // EntryPointAddr + W.write<uint32_t>(Sections[0]->Address); // TextStartAddr + W.write<uint32_t>(Sections[1]->Address); // DataStartAddr +} + void XCOFFObjectWriter::writeSectionHeaderTable() { auto writeSectionHeader = [&](const SectionEntry *Sec, bool IsDwarf) { // Nothing to write for this Section. diff --git a/llvm/lib/ObjCopy/ELF/ELFObject.cpp b/llvm/lib/ObjCopy/ELF/ELFObject.cpp index f0e4f91cd347..8b44c09023f1 100644 --- a/llvm/lib/ObjCopy/ELF/ELFObject.cpp +++ b/llvm/lib/ObjCopy/ELF/ELFObject.cpp @@ -523,9 +523,6 @@ Error ELFSectionWriter<ELFT>::visit(const CompressedSection &Sec) { case DebugCompressionType::None: std::copy(Sec.OriginalData.begin(), Sec.OriginalData.end(), Buf); return Error::success(); - case DebugCompressionType::GNU: - llvm_unreachable("unexpected zlib-gnu"); - break; case DebugCompressionType::Z: Chdr.ch_type = ELF::ELFCOMPRESS_ZLIB; break; diff --git a/llvm/lib/Object/Archive.cpp b/llvm/lib/Object/Archive.cpp index ad03f9cae9f8..1dffe007b9a9 100644 --- a/llvm/lib/Object/Archive.cpp +++ b/llvm/lib/Object/Archive.cpp @@ -965,14 +965,15 @@ StringRef Archive::Symbol::getName() const { Expected<Archive::Child> Archive::Symbol::getMember() const { const char *Buf = Parent->getSymbolTable().begin(); const char *Offsets = Buf; - if (Parent->kind() == K_GNU64 || Parent->kind() == K_DARWIN64) + if (Parent->kind() == K_GNU64 || Parent->kind() == K_DARWIN64 || + Parent->kind() == K_AIXBIG) Offsets += sizeof(uint64_t); else Offsets += sizeof(uint32_t); uint64_t Offset = 0; if (Parent->kind() == K_GNU) { Offset = read32be(Offsets + SymbolIndex * 4); - } else if (Parent->kind() == K_GNU64) { + } else if (Parent->kind() == K_GNU64 || Parent->kind() == K_AIXBIG) { Offset = read64be(Offsets + SymbolIndex * 8); } else if (Parent->kind() == K_BSD) { // The SymbolIndex is an index into the ranlib structs that start at @@ -1105,6 +1106,8 @@ Archive::symbol_iterator Archive::symbol_begin() const { // Skip the byte count of the string table. buf += sizeof(uint64_t); buf += ran_strx; + } else if (kind() == K_AIXBIG) { + buf = getStringTable().begin(); } else { uint32_t member_count = 0; uint32_t symbol_count = 0; @@ -1127,7 +1130,7 @@ uint32_t Archive::getNumberOfSymbols() const { const char *buf = getSymbolTable().begin(); if (kind() == K_GNU) return read32be(buf); - if (kind() == K_GNU64) + if (kind() == K_GNU64 || kind() == K_AIXBIG) return read64be(buf); if (kind() == K_BSD) return read32le(buf) / 8; @@ -1180,6 +1183,58 @@ BigArchive::BigArchive(MemoryBufferRef Source, Error &Err) Err = malformedError("malformed AIX big archive: last member offset \"" + RawOffset + "\" is not a number"); + // Calculate the global symbol table. + uint64_t GlobSymOffset = 0; + RawOffset = getFieldRawString(ArFixLenHdr->GlobSymOffset); + if (RawOffset.getAsInteger(10, GlobSymOffset)) + // TODO: add test case. + Err = malformedError( + "malformed AIX big archive: global symbol table offset \"" + RawOffset + + "\" is not a number"); + + if (Err) + return; + + if (GlobSymOffset > 0) { + uint64_t BufferSize = Data.getBufferSize(); + uint64_t GlobalSymTblContentOffset = + GlobSymOffset + sizeof(BigArMemHdrType); + if (GlobalSymTblContentOffset > BufferSize) { + Err = malformedError("global symbol table header at offset 0x" + + Twine::utohexstr(GlobSymOffset) + " and size 0x" + + Twine::utohexstr(sizeof(BigArMemHdrType)) + + " goes past the end of file"); + return; + } + + const char *GlobSymTblLoc = Data.getBufferStart() + GlobSymOffset; + const BigArMemHdrType *GlobalSymHdr = + reinterpret_cast<const BigArMemHdrType *>(GlobSymTblLoc); + RawOffset = getFieldRawString(GlobalSymHdr->Size); + uint64_t Size; + if (RawOffset.getAsInteger(10, Size)) { + // TODO: add test case. + Err = malformedError( + "malformed AIX big archive: global symbol table size \"" + RawOffset + + "\" is not a number"); + return; + } + if (GlobalSymTblContentOffset + Size > BufferSize) { + Err = malformedError("global symbol table content at offset 0x" + + Twine::utohexstr(GlobalSymTblContentOffset) + + " and size 0x" + Twine::utohexstr(Size) + + " goes past the end of file"); + return; + } + SymbolTable = StringRef(GlobSymTblLoc + sizeof(BigArMemHdrType), Size); + unsigned SymNum = getNumberOfSymbols(); + unsigned SymOffsetsSize = 8 * (SymNum + 1); + uint64_t SymbolTableStringSize = Size - SymOffsetsSize; + StringTable = + StringRef(GlobSymTblLoc + sizeof(BigArMemHdrType) + SymOffsetsSize, + SymbolTableStringSize); + } + child_iterator I = child_begin(Err, false); if (Err) return; diff --git a/llvm/lib/Object/Decompressor.cpp b/llvm/lib/Object/Decompressor.cpp index a6a28a0589ac..3842ec92ccfc 100644 --- a/llvm/lib/Object/Decompressor.cpp +++ b/llvm/lib/Object/Decompressor.cpp @@ -23,9 +23,7 @@ Expected<Decompressor> Decompressor::create(StringRef Name, StringRef Data, return createError("zlib is not available"); Decompressor D(Data); - Error Err = isGnuStyle(Name) ? D.consumeCompressedGnuHeader() - : D.consumeCompressedZLibHeader(Is64Bit, IsLE); - if (Err) + if (Error Err = D.consumeCompressedZLibHeader(Is64Bit, IsLE)) return std::move(Err); return D; } @@ -33,21 +31,6 @@ Expected<Decompressor> Decompressor::create(StringRef Name, StringRef Data, Decompressor::Decompressor(StringRef Data) : SectionData(Data), DecompressedSize(0) {} -Error Decompressor::consumeCompressedGnuHeader() { - if (!SectionData.startswith("ZLIB")) - return createError("corrupted compressed section header"); - - SectionData = SectionData.substr(4); - - // Consume uncompressed section size (big-endian 8 bytes). - if (SectionData.size() < 8) - return createError("corrupted uncompressed section size"); - DecompressedSize = read64be(SectionData.data()); - SectionData = SectionData.substr(8); - - return Error::success(); -} - Error Decompressor::consumeCompressedZLibHeader(bool Is64Bit, bool IsLittleEndian) { using namespace ELF; @@ -72,26 +55,6 @@ Error Decompressor::consumeCompressedZLibHeader(bool Is64Bit, return Error::success(); } -bool Decompressor::isGnuStyle(StringRef Name) { - return Name.startswith(".zdebug"); -} - -bool Decompressor::isCompressed(const object::SectionRef &Section) { - if (Section.isCompressed()) - return true; - - Expected<StringRef> SecNameOrErr = Section.getName(); - if (SecNameOrErr) - return isGnuStyle(*SecNameOrErr); - - consumeError(SecNameOrErr.takeError()); - return false; -} - -bool Decompressor::isCompressedELFSection(uint64_t Flags, StringRef Name) { - return (Flags & ELF::SHF_COMPRESSED) || isGnuStyle(Name); -} - Error Decompressor::decompress(MutableArrayRef<uint8_t> Buffer) { size_t Size = Buffer.size(); return compression::zlib::uncompress(arrayRefFromStringRef(SectionData), diff --git a/llvm/lib/Object/WasmObjectFile.cpp b/llvm/lib/Object/WasmObjectFile.cpp index ce816b097691..d00359c6deef 100644 --- a/llvm/lib/Object/WasmObjectFile.cpp +++ b/llvm/lib/Object/WasmObjectFile.cpp @@ -204,7 +204,7 @@ static Error readInitExpr(wasm::WasmInitExpr &Expr, if (Expr.Extended) { Ctx.Ptr = Start; - while (1) { + while (true) { uint8_t Opcode = readOpcode(Ctx); switch (Opcode) { case wasm::WASM_OPCODE_I32_CONST: diff --git a/llvm/lib/Passes/PassBuilderPipelines.cpp b/llvm/lib/Passes/PassBuilderPipelines.cpp index 593243144f01..3b3eefcc29ca 100644 --- a/llvm/lib/Passes/PassBuilderPipelines.cpp +++ b/llvm/lib/Passes/PassBuilderPipelines.cpp @@ -172,7 +172,7 @@ static cl::opt<bool> EnableEagerlyInvalidateAnalyses( cl::desc("Eagerly invalidate more analyses in default pipelines")); static cl::opt<bool> EnableNoRerunSimplificationPipeline( - "enable-no-rerun-simplification-pipeline", cl::init(false), cl::Hidden, + "enable-no-rerun-simplification-pipeline", cl::init(true), cl::Hidden, cl::desc( "Prevent running the simplification pipeline on a function more " "than once in the case that SCC mutations cause a function to be " diff --git a/llvm/lib/Passes/StandardInstrumentations.cpp b/llvm/lib/Passes/StandardInstrumentations.cpp index bad8184dffcf..baea0eb53ef9 100644 --- a/llvm/lib/Passes/StandardInstrumentations.cpp +++ b/llvm/lib/Passes/StandardInstrumentations.cpp @@ -81,36 +81,35 @@ cl::opt<bool> PreservedCFGCheckerInstrumentation::VerifyPreservedCFG( // facilities, the error message will be shown in place of the expected output. // enum class ChangePrinter { - NoChangePrinter, - PrintChangedVerbose, - PrintChangedQuiet, - PrintChangedDiffVerbose, - PrintChangedDiffQuiet, - PrintChangedColourDiffVerbose, - PrintChangedColourDiffQuiet, - PrintChangedDotCfgVerbose, - PrintChangedDotCfgQuiet + None, + Verbose, + Quiet, + DiffVerbose, + DiffQuiet, + ColourDiffVerbose, + ColourDiffQuiet, + DotCfgVerbose, + DotCfgQuiet, }; static cl::opt<ChangePrinter> PrintChanged( "print-changed", cl::desc("Print changed IRs"), cl::Hidden, - cl::ValueOptional, cl::init(ChangePrinter::NoChangePrinter), + cl::ValueOptional, cl::init(ChangePrinter::None), cl::values( - clEnumValN(ChangePrinter::PrintChangedQuiet, "quiet", - "Run in quiet mode"), - clEnumValN(ChangePrinter::PrintChangedDiffVerbose, "diff", + clEnumValN(ChangePrinter::Quiet, "quiet", "Run in quiet mode"), + clEnumValN(ChangePrinter::DiffVerbose, "diff", "Display patch-like changes"), - clEnumValN(ChangePrinter::PrintChangedDiffQuiet, "diff-quiet", + clEnumValN(ChangePrinter::DiffQuiet, "diff-quiet", "Display patch-like changes in quiet mode"), - clEnumValN(ChangePrinter::PrintChangedColourDiffVerbose, "cdiff", + clEnumValN(ChangePrinter::ColourDiffVerbose, "cdiff", "Display patch-like changes with color"), - clEnumValN(ChangePrinter::PrintChangedColourDiffQuiet, "cdiff-quiet", + clEnumValN(ChangePrinter::ColourDiffQuiet, "cdiff-quiet", "Display patch-like changes in quiet mode with color"), - clEnumValN(ChangePrinter::PrintChangedDotCfgVerbose, "dot-cfg", + clEnumValN(ChangePrinter::DotCfgVerbose, "dot-cfg", "Create a website with graphical changes"), - clEnumValN(ChangePrinter::PrintChangedDotCfgQuiet, "dot-cfg-quiet", + clEnumValN(ChangePrinter::DotCfgQuiet, "dot-cfg-quiet", "Create a website with graphical changes in quiet mode"), // Sentinel value for unspecified option. - clEnumValN(ChangePrinter::PrintChangedVerbose, "", ""))); + clEnumValN(ChangePrinter::Verbose, "", ""))); // An option that supports the -print-changed option. See // the description for -print-changed for an explanation of the use @@ -596,8 +595,8 @@ void TextChangeReporter<T>::handleIgnored(StringRef PassID, std::string &Name) { IRChangedPrinter::~IRChangedPrinter() = default; void IRChangedPrinter::registerCallbacks(PassInstrumentationCallbacks &PIC) { - if (PrintChanged == ChangePrinter::PrintChangedVerbose || - PrintChanged == ChangePrinter::PrintChangedQuiet) + if (PrintChanged == ChangePrinter::Verbose || + PrintChanged == ChangePrinter::Quiet) TextChangeReporter<std::string>::registerRequiredCallbacks(PIC); } @@ -940,7 +939,22 @@ void PrintPassInstrumentation::registerCallbacks( if (isSpecialPass(PassID, SpecialPasses)) return; - print() << "Running pass: " << PassID << " on " << getIRName(IR) << "\n"; + auto &OS = print(); + OS << "Running pass: " << PassID << " on " << getIRName(IR); + if (any_isa<const Function *>(IR)) { + unsigned Count = any_cast<const Function *>(IR)->getInstructionCount(); + OS << " (" << Count << " instruction"; + if (Count != 1) + OS << 's'; + OS << ')'; + } else if (any_isa<const LazyCallGraph::SCC *>(IR)) { + int Count = any_cast<const LazyCallGraph::SCC *>(IR)->size(); + OS << " (" << Count << " node"; + if (Count != 1) + OS << 's'; + OS << ')'; + } + OS << "\n"; Indent += 2; }); PIC.registerAfterPassCallback( @@ -1260,10 +1274,10 @@ void InLineChangePrinter::handleFunctionCompare( } void InLineChangePrinter::registerCallbacks(PassInstrumentationCallbacks &PIC) { - if (PrintChanged == ChangePrinter::PrintChangedDiffVerbose || - PrintChanged == ChangePrinter::PrintChangedDiffQuiet || - PrintChanged == ChangePrinter::PrintChangedColourDiffVerbose || - PrintChanged == ChangePrinter::PrintChangedColourDiffQuiet) + if (PrintChanged == ChangePrinter::DiffVerbose || + PrintChanged == ChangePrinter::DiffQuiet || + PrintChanged == ChangePrinter::ColourDiffVerbose || + PrintChanged == ChangePrinter::ColourDiffQuiet) TextChangeReporter<IRDataT<EmptyData>>::registerRequiredCallbacks(PIC); } @@ -2096,8 +2110,8 @@ DotCfgChangeReporter::~DotCfgChangeReporter() { void DotCfgChangeReporter::registerCallbacks( PassInstrumentationCallbacks &PIC) { - if ((PrintChanged == ChangePrinter::PrintChangedDotCfgVerbose || - PrintChanged == ChangePrinter::PrintChangedDotCfgQuiet)) { + if (PrintChanged == ChangePrinter::DotCfgVerbose || + PrintChanged == ChangePrinter::DotCfgQuiet) { SmallString<128> OutputDir; sys::fs::expand_tilde(DotCfgDir, OutputDir); sys::fs::make_absolute(OutputDir); @@ -2114,14 +2128,12 @@ void DotCfgChangeReporter::registerCallbacks( StandardInstrumentations::StandardInstrumentations( bool DebugLogging, bool VerifyEach, PrintPassOptions PrintPassOpts) : PrintPass(DebugLogging, PrintPassOpts), OptNone(DebugLogging), - PrintChangedIR(PrintChanged == ChangePrinter::PrintChangedVerbose), - PrintChangedDiff( - PrintChanged == ChangePrinter::PrintChangedDiffVerbose || - PrintChanged == ChangePrinter::PrintChangedColourDiffVerbose, - PrintChanged == ChangePrinter::PrintChangedColourDiffVerbose || - PrintChanged == ChangePrinter::PrintChangedColourDiffQuiet), - WebsiteChangeReporter(PrintChanged == - ChangePrinter::PrintChangedDotCfgVerbose), + PrintChangedIR(PrintChanged == ChangePrinter::Verbose), + PrintChangedDiff(PrintChanged == ChangePrinter::DiffVerbose || + PrintChanged == ChangePrinter::ColourDiffVerbose, + PrintChanged == ChangePrinter::ColourDiffVerbose || + PrintChanged == ChangePrinter::ColourDiffQuiet), + WebsiteChangeReporter(PrintChanged == ChangePrinter::DotCfgVerbose), Verify(DebugLogging), VerifyEach(VerifyEach) {} PrintCrashIRInstrumentation *PrintCrashIRInstrumentation::CrashReporter = diff --git a/llvm/lib/ProfileData/Coverage/CoverageMapping.cpp b/llvm/lib/ProfileData/Coverage/CoverageMapping.cpp index f4f13bafb233..8c1eadbe8271 100644 --- a/llvm/lib/ProfileData/Coverage/CoverageMapping.cpp +++ b/llvm/lib/ProfileData/Coverage/CoverageMapping.cpp @@ -349,7 +349,7 @@ CoverageMapping::load(ArrayRef<StringRef> ObjectFilenames, StringRef CompilationDir) { auto ProfileReaderOrErr = IndexedInstrProfReader::create(ProfileFilename); if (Error E = ProfileReaderOrErr.takeError()) - return std::move(E); + return createFileError(ProfileFilename, std::move(E)); auto ProfileReader = std::move(ProfileReaderOrErr.get()); auto Coverage = std::unique_ptr<CoverageMapping>(new CoverageMapping()); bool DataFound = false; @@ -358,7 +358,7 @@ CoverageMapping::load(ArrayRef<StringRef> ObjectFilenames, auto CovMappingBufOrErr = MemoryBuffer::getFileOrSTDIN( File.value(), /*IsText=*/false, /*RequiresNullTerminator=*/false); if (std::error_code EC = CovMappingBufOrErr.getError()) - return errorCodeToError(EC); + return createFileError(File.value(), errorCodeToError(EC)); StringRef Arch = Arches.empty() ? StringRef() : Arches[File.index()]; MemoryBufferRef CovMappingBufRef = CovMappingBufOrErr.get()->getMemBufferRef(); @@ -368,7 +368,7 @@ CoverageMapping::load(ArrayRef<StringRef> ObjectFilenames, if (Error E = CoverageReadersOrErr.takeError()) { E = handleMaybeNoDataFoundError(std::move(E)); if (E) - return std::move(E); + return createFileError(File.value(), std::move(E)); // E == success (originally a no_data_found error). continue; } @@ -378,12 +378,14 @@ CoverageMapping::load(ArrayRef<StringRef> ObjectFilenames, Readers.push_back(std::move(Reader)); DataFound |= !Readers.empty(); if (Error E = loadFromReaders(Readers, *ProfileReader, *Coverage)) - return std::move(E); + return createFileError(File.value(), std::move(E)); } // If no readers were created, either no objects were provided or none of them // had coverage data. Return an error in the latter case. if (!DataFound && !ObjectFilenames.empty()) - return make_error<CoverageMapError>(coveragemap_error::no_data_found); + return createFileError( + join(ObjectFilenames.begin(), ObjectFilenames.end(), ", "), + make_error<CoverageMapError>(coveragemap_error::no_data_found)); return std::move(Coverage); } diff --git a/llvm/lib/ProfileData/InstrProfReader.cpp b/llvm/lib/ProfileData/InstrProfReader.cpp index ee8989979a26..23804ce604c4 100644 --- a/llvm/lib/ProfileData/InstrProfReader.cpp +++ b/llvm/lib/ProfileData/InstrProfReader.cpp @@ -1026,20 +1026,50 @@ InstrProfSymtab &IndexedInstrProfReader::getSymtab() { return *Symtab; } -Expected<InstrProfRecord> -IndexedInstrProfReader::getInstrProfRecord(StringRef FuncName, - uint64_t FuncHash) { +Expected<InstrProfRecord> IndexedInstrProfReader::getInstrProfRecord( + StringRef FuncName, uint64_t FuncHash, uint64_t *MismatchedFuncSum) { ArrayRef<NamedInstrProfRecord> Data; + uint64_t FuncSum = 0; Error Err = Remapper->getRecords(FuncName, Data); if (Err) return std::move(Err); // Found it. Look for counters with the right hash. + + // A flag to indicate if the records are from the same type + // of profile (i.e cs vs nocs). + bool CSBitMatch = false; + auto getFuncSum = [](const std::vector<uint64_t> &Counts) { + uint64_t ValueSum = 0; + for (unsigned I = 0, S = Counts.size(); I < S; I++) { + uint64_t CountValue = Counts[I]; + if (CountValue == (uint64_t)-1) + continue; + // Handle overflow -- if that happens, return max. + if (std::numeric_limits<uint64_t>::max() - CountValue <= ValueSum) + return std::numeric_limits<uint64_t>::max(); + ValueSum += CountValue; + } + return ValueSum; + }; + for (const NamedInstrProfRecord &I : Data) { // Check for a match and fill the vector if there is one. if (I.Hash == FuncHash) return std::move(I); + if (NamedInstrProfRecord::hasCSFlagInHash(I.Hash) == + NamedInstrProfRecord::hasCSFlagInHash(FuncHash)) { + CSBitMatch = true; + if (MismatchedFuncSum == nullptr) + continue; + FuncSum = std::max(FuncSum, getFuncSum(I.Counts)); + } + } + if (CSBitMatch) { + if (MismatchedFuncSum != nullptr) + *MismatchedFuncSum = FuncSum; + return error(instrprof_error::hash_mismatch); } - return error(instrprof_error::hash_mismatch); + return error(instrprof_error::unknown_function); } Expected<memprof::MemProfRecord> diff --git a/llvm/lib/Support/AddressRanges.cpp b/llvm/lib/Support/AddressRanges.cpp index 5ba011bac4e9..187d5be00dae 100644 --- a/llvm/lib/Support/AddressRanges.cpp +++ b/llvm/lib/Support/AddressRanges.cpp @@ -12,48 +12,59 @@ using namespace llvm; -void AddressRanges::insert(AddressRange Range) { +AddressRanges::Collection::const_iterator +AddressRanges::insert(AddressRange Range) { if (Range.size() == 0) - return; + return Ranges.end(); auto It = llvm::upper_bound(Ranges, Range); auto It2 = It; - while (It2 != Ranges.end() && It2->start() < Range.end()) + while (It2 != Ranges.end() && It2->start() <= Range.end()) ++It2; if (It != It2) { - Range = {Range.start(), std::max(Range.end(), It2[-1].end())}; + Range = {Range.start(), std::max(Range.end(), std::prev(It2)->end())}; It = Ranges.erase(It, It2); } - if (It != Ranges.begin() && Range.start() < It[-1].end()) - It[-1] = {It[-1].start(), std::max(It[-1].end(), Range.end())}; - else - Ranges.insert(It, Range); + if (It != Ranges.begin() && Range.start() <= std::prev(It)->end()) { + --It; + *It = {It->start(), std::max(It->end(), Range.end())}; + return It; + } + + return Ranges.insert(It, Range); } -bool AddressRanges::contains(uint64_t Addr) const { +AddressRanges::Collection::const_iterator +AddressRanges::find(uint64_t Addr) const { auto It = std::partition_point( Ranges.begin(), Ranges.end(), [=](const AddressRange &R) { return R.start() <= Addr; }); - return It != Ranges.begin() && Addr < It[-1].end(); + + if (It == Ranges.begin()) + return Ranges.end(); + + --It; + if (Addr >= It->end()) + return Ranges.end(); + + return It; } -bool AddressRanges::contains(AddressRange Range) const { +AddressRanges::Collection::const_iterator +AddressRanges::find(AddressRange Range) const { if (Range.size() == 0) - return false; + return Ranges.end(); + auto It = std::partition_point( Ranges.begin(), Ranges.end(), [=](const AddressRange &R) { return R.start() <= Range.start(); }); + if (It == Ranges.begin()) - return false; - return Range.end() <= It[-1].end(); -} + return Ranges.end(); -Optional<AddressRange> -AddressRanges::getRangeThatContains(uint64_t Addr) const { - auto It = std::partition_point( - Ranges.begin(), Ranges.end(), - [=](const AddressRange &R) { return R.start() <= Addr; }); - if (It != Ranges.begin() && Addr < It[-1].end()) - return It[-1]; - return llvm::None; + --It; + if (Range.end() > It->end()) + return Ranges.end(); + + return It; } diff --git a/llvm/lib/Support/CommandLine.cpp b/llvm/lib/Support/CommandLine.cpp index e3df172ef113..5e7d63165130 100644 --- a/llvm/lib/Support/CommandLine.cpp +++ b/llvm/lib/Support/CommandLine.cpp @@ -2382,7 +2382,7 @@ protected: for (size_t I = 0, E = Opts.size(); I != E; ++I) { Option *Opt = Opts[I].second; for (auto &Cat : Opt->Categories) { - assert(find(SortedCategories, Cat) != SortedCategories.end() && + assert(llvm::is_contained(SortedCategories, Cat) && "Option has an unregistered category"); CategorizedOptions[Cat].push_back(Opt); } diff --git a/llvm/lib/Support/Compression.cpp b/llvm/lib/Support/Compression.cpp index 21191972fb8b..e8fb715aa770 100644 --- a/llvm/lib/Support/Compression.cpp +++ b/llvm/lib/Support/Compression.cpp @@ -20,6 +20,9 @@ #if LLVM_ENABLE_ZLIB #include <zlib.h> #endif +#if LLVM_ENABLE_ZSTD +#include <zstd.h> +#endif using namespace llvm; using namespace llvm::compression; @@ -100,3 +103,65 @@ Error zlib::uncompress(ArrayRef<uint8_t> Input, llvm_unreachable("zlib::uncompress is unavailable"); } #endif + +#if LLVM_ENABLE_ZSTD + +bool zstd::isAvailable() { return true; } + +void zstd::compress(ArrayRef<uint8_t> Input, + SmallVectorImpl<uint8_t> &CompressedBuffer, int Level) { + unsigned long CompressedBufferSize = ::ZSTD_compressBound(Input.size()); + CompressedBuffer.resize_for_overwrite(CompressedBufferSize); + unsigned long CompressedSize = + ::ZSTD_compress((char *)CompressedBuffer.data(), CompressedBufferSize, + (const char *)Input.data(), Input.size(), Level); + if (ZSTD_isError(CompressedSize)) + report_bad_alloc_error("Allocation failed"); + // Tell MemorySanitizer that zstd output buffer is fully initialized. + // This avoids a false report when running LLVM with uninstrumented ZLib. + __msan_unpoison(CompressedBuffer.data(), CompressedSize); + if (CompressedSize < CompressedBuffer.size()) + CompressedBuffer.truncate(CompressedSize); +} + +Error zstd::uncompress(ArrayRef<uint8_t> Input, uint8_t *UncompressedBuffer, + size_t &UncompressedSize) { + const size_t Res = + ::ZSTD_decompress(UncompressedBuffer, UncompressedSize, + (const uint8_t *)Input.data(), Input.size()); + UncompressedSize = Res; + // Tell MemorySanitizer that zstd output buffer is fully initialized. + // This avoids a false report when running LLVM with uninstrumented ZLib. + __msan_unpoison(UncompressedBuffer, UncompressedSize); + return ZSTD_isError(Res) ? make_error<StringError>(ZSTD_getErrorName(Res), + inconvertibleErrorCode()) + : Error::success(); +} + +Error zstd::uncompress(ArrayRef<uint8_t> Input, + SmallVectorImpl<uint8_t> &UncompressedBuffer, + size_t UncompressedSize) { + UncompressedBuffer.resize_for_overwrite(UncompressedSize); + Error E = + zstd::uncompress(Input, UncompressedBuffer.data(), UncompressedSize); + if (UncompressedSize < UncompressedBuffer.size()) + UncompressedBuffer.truncate(UncompressedSize); + return E; +} + +#else +bool zstd::isAvailable() { return false; } +void zstd::compress(ArrayRef<uint8_t> Input, + SmallVectorImpl<uint8_t> &CompressedBuffer, int Level) { + llvm_unreachable("zstd::compress is unavailable"); +} +Error zstd::uncompress(ArrayRef<uint8_t> Input, uint8_t *UncompressedBuffer, + size_t &UncompressedSize) { + llvm_unreachable("zstd::uncompress is unavailable"); +} +Error zstd::uncompress(ArrayRef<uint8_t> Input, + SmallVectorImpl<uint8_t> &UncompressedBuffer, + size_t UncompressedSize) { + llvm_unreachable("zstd::uncompress is unavailable"); +} +#endif diff --git a/llvm/lib/Support/DivisionByConstantInfo.cpp b/llvm/lib/Support/DivisionByConstantInfo.cpp index 69f39386798c..35486674e02f 100644 --- a/llvm/lib/Support/DivisionByConstantInfo.cpp +++ b/llvm/lib/Support/DivisionByConstantInfo.cpp @@ -1,4 +1,4 @@ -//===----- DivisonByConstantInfo.cpp - division by constant -*- C++ -*-----===// +//===----- DivisionByConstantInfo.cpp - division by constant -*- C++ -*----===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -62,11 +62,11 @@ SignedDivisionByConstantInfo SignedDivisionByConstantInfo::get(const APInt &D) { /// S. Warren, Jr., chapter 10. /// LeadingZeros can be used to simplify the calculation if the upper bits /// of the divided value are known zero. -UnsignedDivisonByConstantInfo -UnsignedDivisonByConstantInfo::get(const APInt &D, unsigned LeadingZeros) { +UnsignedDivisionByConstantInfo +UnsignedDivisionByConstantInfo::get(const APInt &D, unsigned LeadingZeros) { unsigned P; APInt NC, Delta, Q1, R1, Q2, R2; - struct UnsignedDivisonByConstantInfo Retval; + struct UnsignedDivisionByConstantInfo Retval; Retval.IsAdd = false; // initialize "add" indicator APInt AllOnes = APInt::getAllOnes(D.getBitWidth()).lshr(LeadingZeros); APInt SignedMin = APInt::getSignedMinValue(D.getBitWidth()); diff --git a/llvm/lib/Support/RISCVISAInfo.cpp b/llvm/lib/Support/RISCVISAInfo.cpp index 7fe04af4696b..0fe286d239d4 100644 --- a/llvm/lib/Support/RISCVISAInfo.cpp +++ b/llvm/lib/Support/RISCVISAInfo.cpp @@ -77,6 +77,8 @@ static const RISCVSupportedExtension SupportedExtensions[] = { {"zkt", RISCVExtensionVersion{1, 0}}, {"zk", RISCVExtensionVersion{1, 0}}, + {"zmmul", RISCVExtensionVersion{1, 0}}, + {"v", RISCVExtensionVersion{1, 0}}, {"zvl32b", RISCVExtensionVersion{1, 0}}, {"zvl64b", RISCVExtensionVersion{1, 0}}, diff --git a/llvm/lib/TableGen/JSONBackend.cpp b/llvm/lib/TableGen/JSONBackend.cpp index e38903910275..6dc466e29df3 100644 --- a/llvm/lib/TableGen/JSONBackend.cpp +++ b/llvm/lib/TableGen/JSONBackend.cpp @@ -129,13 +129,13 @@ void JSONEmitter::run(raw_ostream &OS) { // construct the array for each one. std::map<std::string, json::Array> instance_lists; for (const auto &C : Records.getClasses()) { - auto &Name = C.second->getNameInitAsString(); + const auto Name = C.second->getNameInitAsString(); (void)instance_lists[Name]; } // Main iteration over the defs. for (const auto &D : Records.getDefs()) { - auto &Name = D.second->getNameInitAsString(); + const auto Name = D.second->getNameInitAsString(); auto &Def = *D.second; json::Object obj; diff --git a/llvm/lib/TableGen/Record.cpp b/llvm/lib/TableGen/Record.cpp index 75a99e95541a..6b899a049e6b 100644 --- a/llvm/lib/TableGen/Record.cpp +++ b/llvm/lib/TableGen/Record.cpp @@ -2424,6 +2424,14 @@ void RecordVal::print(raw_ostream &OS, bool PrintSem) const { if (PrintSem) OS << ";\n"; } +void Record::updateClassLoc(SMLoc Loc) { + assert(Locs.size() == 1); + ForwardDeclarationLocs.push_back(Locs.front()); + + Locs.clear(); + Locs.push_back(Loc); +} + void Record::checkName() { // Ensure the record name has string type. const TypedInit *TypedName = cast<const TypedInit>(Name); diff --git a/llvm/lib/TableGen/TGParser.cpp b/llvm/lib/TableGen/TGParser.cpp index acf93dc3d792..aab1802c5348 100644 --- a/llvm/lib/TableGen/TGParser.cpp +++ b/llvm/lib/TableGen/TGParser.cpp @@ -3391,6 +3391,8 @@ bool TGParser::ParseClass() { !CurRec->getTemplateArgs().empty()) return TokError("Class '" + CurRec->getNameInitAsString() + "' already defined"); + + CurRec->updateClassLoc(Lex.getLoc()); } else { // If this is the first reference to this class, create and add it. auto NewRec = diff --git a/llvm/lib/Target/AArch64/AArch64.td b/llvm/lib/Target/AArch64/AArch64.td index b332e9dcb176..8fb5d49e2121 100644 --- a/llvm/lib/Target/AArch64/AArch64.td +++ b/llvm/lib/Target/AArch64/AArch64.td @@ -216,7 +216,7 @@ def FeatureSlowPaired128 : SubtargetFeature<"slow-paired-128", "IsPaired128Slow", "true", "Paired 128 bit loads and stores are slow">; def FeatureAscendStoreAddress : SubtargetFeature<"ascend-store-address", - "IsStoreAddressAscend", "false", + "IsStoreAddressAscend", "true", "Schedule vector stores by ascending address">; def FeatureSlowSTRQro : SubtargetFeature<"slow-strqro-store", "IsSTRQroSlow", diff --git a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp index 82fe5772c99d..00621b84d2f2 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp @@ -69,6 +69,7 @@ public: bool tryMLAV64LaneV128(SDNode *N); bool tryMULLV64LaneV128(unsigned IntNo, SDNode *N); bool SelectArithExtendedRegister(SDValue N, SDValue &Reg, SDValue &Shift); + bool SelectArithUXTXRegister(SDValue N, SDValue &Reg, SDValue &Shift); bool SelectArithImmed(SDValue N, SDValue &Val, SDValue &Shift); bool SelectNegArithImmed(SDValue N, SDValue &Val, SDValue &Shift); bool SelectArithShiftedRegister(SDValue N, SDValue &Reg, SDValue &Shift) { @@ -893,6 +894,30 @@ bool AArch64DAGToDAGISel::SelectArithExtendedRegister(SDValue N, SDValue &Reg, return isWorthFolding(N); } +/// SelectArithUXTXRegister - Select a "UXTX register" operand. This +/// operand is refered by the instructions have SP operand +bool AArch64DAGToDAGISel::SelectArithUXTXRegister(SDValue N, SDValue &Reg, + SDValue &Shift) { + unsigned ShiftVal = 0; + AArch64_AM::ShiftExtendType Ext; + + if (N.getOpcode() != ISD::SHL) + return false; + + ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1)); + if (!CSD) + return false; + ShiftVal = CSD->getZExtValue(); + if (ShiftVal > 4) + return false; + + Ext = AArch64_AM::UXTX; + Reg = N.getOperand(0); + Shift = CurDAG->getTargetConstant(getArithExtendImm(Ext, ShiftVal), SDLoc(N), + MVT::i32); + return isWorthFolding(N); +} + /// If there's a use of this ADDlow that's not itself a load/store then we'll /// need to create a real ADD instruction from it anyway and there's no point in /// folding it into the mem op. Theoretically, it shouldn't matter, but there's @@ -4049,6 +4074,24 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) { } break; } + case Intrinsic::swift_async_context_addr: { + SDLoc DL(Node); + SDValue Chain = Node->getOperand(0); + SDValue CopyFP = CurDAG->getCopyFromReg(Chain, DL, AArch64::FP, MVT::i64); + SDValue Res = SDValue( + CurDAG->getMachineNode(AArch64::SUBXri, DL, MVT::i64, CopyFP, + CurDAG->getTargetConstant(8, DL, MVT::i32), + CurDAG->getTargetConstant(0, DL, MVT::i32)), + 0); + ReplaceUses(SDValue(Node, 0), Res); + ReplaceUses(SDValue(Node, 1), CopyFP.getValue(1)); + CurDAG->RemoveDeadNode(Node); + + auto &MF = CurDAG->getMachineFunction(); + MF.getFrameInfo().setFrameAddressIsTaken(true); + MF.getInfo<AArch64FunctionInfo>()->setHasSwiftAsyncContext(true); + return; + } } } break; case ISD::INTRINSIC_WO_CHAIN: { @@ -4094,18 +4137,6 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) { if (tryMULLV64LaneV128(IntNo, Node)) return; break; - case Intrinsic::swift_async_context_addr: { - SDLoc DL(Node); - CurDAG->SelectNodeTo(Node, AArch64::SUBXri, MVT::i64, - CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, - AArch64::FP, MVT::i64), - CurDAG->getTargetConstant(8, DL, MVT::i32), - CurDAG->getTargetConstant(0, DL, MVT::i32)); - auto &MF = CurDAG->getMachineFunction(); - MF.getFrameInfo().setFrameAddressIsTaken(true); - MF.getInfo<AArch64FunctionInfo>()->setHasSwiftAsyncContext(true); - return; - } } break; } diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 447ad10ddf22..e070ce2efa6b 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -521,6 +521,9 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM, setOperationAction(ISD::CTPOP, MVT::i64, Custom); setOperationAction(ISD::CTPOP, MVT::i128, Custom); + setOperationAction(ISD::PARITY, MVT::i64, Custom); + setOperationAction(ISD::PARITY, MVT::i128, Custom); + setOperationAction(ISD::ABS, MVT::i32, Custom); setOperationAction(ISD::ABS, MVT::i64, Custom); @@ -5463,7 +5466,8 @@ SDValue AArch64TargetLowering::LowerOperation(SDValue Op, case ISD::SRA_PARTS: return LowerShiftParts(Op, DAG); case ISD::CTPOP: - return LowerCTPOP(Op, DAG); + case ISD::PARITY: + return LowerCTPOP_PARITY(Op, DAG); case ISD::FCOPYSIGN: return LowerFCOPYSIGN(Op, DAG); case ISD::OR: @@ -7783,7 +7787,8 @@ SDValue AArch64TargetLowering::LowerFCOPYSIGN(SDValue Op, return BitCast(VT, BSP, DAG); } -SDValue AArch64TargetLowering::LowerCTPOP(SDValue Op, SelectionDAG &DAG) const { +SDValue AArch64TargetLowering::LowerCTPOP_PARITY(SDValue Op, + SelectionDAG &DAG) const { if (DAG.getMachineFunction().getFunction().hasFnAttribute( Attribute::NoImplicitFloat)) return SDValue(); @@ -7791,6 +7796,8 @@ SDValue AArch64TargetLowering::LowerCTPOP(SDValue Op, SelectionDAG &DAG) const { if (!Subtarget->hasNEON()) return SDValue(); + bool IsParity = Op.getOpcode() == ISD::PARITY; + // While there is no integer popcount instruction, it can // be more efficiently lowered to the following sequence that uses // AdvSIMD registers/instructions as long as the copies to/from @@ -7813,6 +7820,10 @@ SDValue AArch64TargetLowering::LowerCTPOP(SDValue Op, SelectionDAG &DAG) const { ISD::INTRINSIC_WO_CHAIN, DL, MVT::i32, DAG.getConstant(Intrinsic::aarch64_neon_uaddlv, DL, MVT::i32), CtPop); + if (IsParity) + UaddLV = DAG.getNode(ISD::AND, DL, MVT::i32, UaddLV, + DAG.getConstant(1, DL, MVT::i32)); + if (VT == MVT::i64) UaddLV = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, UaddLV); return UaddLV; @@ -7824,9 +7835,15 @@ SDValue AArch64TargetLowering::LowerCTPOP(SDValue Op, SelectionDAG &DAG) const { ISD::INTRINSIC_WO_CHAIN, DL, MVT::i32, DAG.getConstant(Intrinsic::aarch64_neon_uaddlv, DL, MVT::i32), CtPop); + if (IsParity) + UaddLV = DAG.getNode(ISD::AND, DL, MVT::i32, UaddLV, + DAG.getConstant(1, DL, MVT::i32)); + return DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i128, UaddLV); } + assert(!IsParity && "ISD::PARITY of vector types not supported"); + if (VT.isScalableVector() || useSVEForFixedLengthVectorVT(VT)) return LowerToPredicatedOp(Op, DAG, AArch64ISD::CTPOP_MERGE_PASSTHRU); @@ -11811,6 +11828,12 @@ bool AArch64TargetLowering::isShuffleMaskLegal(ArrayRef<int> M, EVT VT) const { isConcatMask(M, VT, VT.getSizeInBits() == 128)); } +bool AArch64TargetLowering::isVectorClearMaskLegal(ArrayRef<int> M, + EVT VT) const { + // Just delegate to the generic legality, clear masks aren't special. + return isShuffleMaskLegal(M, VT); +} + /// getVShiftImm - Check if this is a valid build_vector for the immediate /// operand of a vector shift operation, where all the elements of the /// build_vector must have the same constant integer value. @@ -11969,6 +11992,11 @@ static SDValue EmitVectorComparison(SDValue LHS, SDValue RHS, if (IsZero) return DAG.getNode(AArch64ISD::FCMGTz, dl, VT, LHS); return DAG.getNode(AArch64ISD::FCMGT, dl, VT, LHS, RHS); + case AArch64CC::LE: + if (!NoNans) + return SDValue(); + // If we ignore NaNs then we can use to the LS implementation. + LLVM_FALLTHROUGH; case AArch64CC::LS: if (IsZero) return DAG.getNode(AArch64ISD::FCMLEz, dl, VT, LHS); @@ -12073,7 +12101,7 @@ SDValue AArch64TargetLowering::LowerVSETCC(SDValue Op, bool ShouldInvert; changeVectorFPCCToAArch64CC(CC, CC1, CC2, ShouldInvert); - bool NoNaNs = getTargetMachine().Options.NoNaNsFPMath; + bool NoNaNs = getTargetMachine().Options.NoNaNsFPMath || Op->getFlags().hasNoNaNs(); SDValue Cmp = EmitVectorComparison(LHS, RHS, CC1, NoNaNs, CmpVT, dl, DAG); if (!Cmp.getNode()) @@ -13587,21 +13615,50 @@ AArch64TargetLowering::getScratchRegisters(CallingConv::ID) const { bool AArch64TargetLowering::isDesirableToCommuteWithShift(const SDNode *N, CombineLevel Level) const { - N = N->getOperand(0).getNode(); + assert((N->getOpcode() == ISD::SHL || N->getOpcode() == ISD::SRA || + N->getOpcode() == ISD::SRL) && + "Expected shift op"); + + SDValue ShiftLHS = N->getOperand(0); EVT VT = N->getValueType(0); - // If N is unsigned bit extraction: ((x >> C) & mask), then do not combine - // it with shift to let it be lowered to UBFX. - if (N->getOpcode() == ISD::AND && (VT == MVT::i32 || VT == MVT::i64) && - isa<ConstantSDNode>(N->getOperand(1))) { - uint64_t TruncMask = N->getConstantOperandVal(1); + + // If ShiftLHS is unsigned bit extraction: ((x >> C) & mask), then do not combine + // it with shift 'N' to let it be lowered to UBFX. + if (ShiftLHS.getOpcode() == ISD::AND && (VT == MVT::i32 || VT == MVT::i64) && + isa<ConstantSDNode>(ShiftLHS.getOperand(1))) { + uint64_t TruncMask = ShiftLHS.getConstantOperandVal(1); if (isMask_64(TruncMask) && - N->getOperand(0).getOpcode() == ISD::SRL && - isa<ConstantSDNode>(N->getOperand(0)->getOperand(1))) + ShiftLHS.getOperand(0).getOpcode() == ISD::SRL && + isa<ConstantSDNode>(ShiftLHS.getOperand(0).getOperand(1))) return false; } return true; } +bool AArch64TargetLowering::isDesirableToCommuteXorWithShift( + const SDNode *N) const { + assert(N->getOpcode() == ISD::XOR && + (N->getOperand(0).getOpcode() == ISD::SHL || + N->getOperand(0).getOpcode() == ISD::SRL) && + "Expected XOR(SHIFT) pattern"); + + // Only commute if the entire NOT mask is a hidden shifted mask. + auto *XorC = dyn_cast<ConstantSDNode>(N->getOperand(1)); + auto *ShiftC = dyn_cast<ConstantSDNode>(N->getOperand(0).getOperand(1)); + if (XorC && ShiftC) { + unsigned MaskIdx, MaskLen; + if (XorC->getAPIntValue().isShiftedMask(MaskIdx, MaskLen)) { + unsigned ShiftAmt = ShiftC->getZExtValue(); + unsigned BitWidth = N->getValueType(0).getScalarSizeInBits(); + if (N->getOperand(0).getOpcode() == ISD::SHL) + return MaskIdx == ShiftAmt && MaskLen == (BitWidth - ShiftAmt); + return MaskIdx == 0 && MaskLen == (BitWidth - ShiftAmt); + } + } + + return false; +} + bool AArch64TargetLowering::shouldFoldConstantShiftPairToMask( const SDNode *N, CombineLevel Level) const { assert(((N->getOpcode() == ISD::SHL && @@ -19221,6 +19278,41 @@ static SDValue performBSPExpandForSVE(SDNode *N, SelectionDAG &DAG, return DAG.getNode(ISD::OR, DL, VT, Sel, SelInv); } +static SDValue performDupLane128Combine(SDNode *N, SelectionDAG &DAG) { + EVT VT = N->getValueType(0); + + SDValue Insert = N->getOperand(0); + if (Insert.getOpcode() != ISD::INSERT_SUBVECTOR) + return SDValue(); + + if (!Insert.getOperand(0).isUndef()) + return SDValue(); + + uint64_t IdxInsert = Insert.getConstantOperandVal(2); + uint64_t IdxDupLane = N->getConstantOperandVal(1); + if (IdxInsert != IdxDupLane) + return SDValue(); + + SDValue Bitcast = Insert.getOperand(1); + if (Bitcast.getOpcode() != ISD::BITCAST) + return SDValue(); + + SDValue Subvec = Bitcast.getOperand(0); + EVT SubvecVT = Subvec.getValueType(); + if (!SubvecVT.is128BitVector()) + return SDValue(); + EVT NewSubvecVT = + getPackedSVEVectorVT(Subvec.getValueType().getVectorElementType()); + + SDLoc DL(N); + SDValue NewInsert = + DAG.getNode(ISD::INSERT_SUBVECTOR, DL, NewSubvecVT, + DAG.getUNDEF(NewSubvecVT), Subvec, Insert->getOperand(2)); + SDValue NewDuplane128 = DAG.getNode(AArch64ISD::DUPLANE128, DL, NewSubvecVT, + NewInsert, N->getOperand(1)); + return DAG.getNode(ISD::BITCAST, DL, VT, NewDuplane128); +} + SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const { SelectionDAG &DAG = DCI.DAG; @@ -19307,6 +19399,8 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N, return performCSELCombine(N, DCI, DAG); case AArch64ISD::DUP: return performDUPCombine(N, DCI); + case AArch64ISD::DUPLANE128: + return performDupLane128Combine(N, DAG); case AArch64ISD::NVCAST: return performNVCASTCombine(N); case AArch64ISD::SPLICE: @@ -19981,7 +20075,8 @@ void AArch64TargetLowering::ReplaceNodeResults( return; case ISD::CTPOP: - if (SDValue Result = LowerCTPOP(SDValue(N, 0), DAG)) + case ISD::PARITY: + if (SDValue Result = LowerCTPOP_PARITY(SDValue(N, 0), DAG)) Results.push_back(Result); return; case AArch64ISD::SADDV: diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h index e02b5e56fd2e..1ba2e2f315ec 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h @@ -549,6 +549,10 @@ public: /// should be stack expanded. bool isShuffleMaskLegal(ArrayRef<int> M, EVT VT) const override; + /// Similar to isShuffleMaskLegal. Return true is the given 'select with zero' + /// shuffle mask can be codegen'd directly. + bool isVectorClearMaskLegal(ArrayRef<int> M, EVT VT) const override; + /// Return the ISD::SETCC ValueType. EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const override; @@ -653,6 +657,9 @@ public: bool isDesirableToCommuteWithShift(const SDNode *N, CombineLevel Level) const override; + /// Returns false if N is a bit extraction pattern of (X >> C) & Mask. + bool isDesirableToCommuteXorWithShift(const SDNode *N) const override; + /// Return true if it is profitable to fold a pair of shifts into a mask. bool shouldFoldConstantShiftPairToMask(const SDNode *N, CombineLevel Level) const override; @@ -995,7 +1002,7 @@ private: SDValue LowerVectorSRA_SRL_SHL(SDValue Op, SelectionDAG &DAG) const; SDValue LowerShiftParts(SDValue Op, SelectionDAG &DAG) const; SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerCTPOP(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerCTPOP_PARITY(SDValue Op, SelectionDAG &DAG) const; SDValue LowerCTTZ(SDValue Op, SelectionDAG &DAG) const; SDValue LowerBitreverse(SDValue Op, SelectionDAG &DAG) const; SDValue LowerMinMax(SDValue Op, SelectionDAG &DAG) const; diff --git a/llvm/lib/Target/AArch64/AArch64InstrFormats.td b/llvm/lib/Target/AArch64/AArch64InstrFormats.td index 02fa36a1df4b..e70d304f37b9 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrFormats.td +++ b/llvm/lib/Target/AArch64/AArch64InstrFormats.td @@ -1168,6 +1168,8 @@ def gi_arith_extended_reg32to64_i64 : GIComplexOperandMatcher<s64, "selectArithExtendedRegister">, GIComplexPatternEquiv<arith_extended_reg32to64_i64>; +def arith_uxtx : ComplexPattern<i64, 2, "SelectArithUXTXRegister", []>; + // Floating-point immediate. def fpimm16XForm : SDNodeXForm<fpimm, [{ @@ -1234,6 +1236,10 @@ def fpimm0 : FPImmLeaf<fAny, [{ return Imm.isExactlyValue(+0.0); }]>; +def fpimm_minus0 : FPImmLeaf<fAny, [{ + return Imm.isExactlyValue(-0.0); +}]>; + def fpimm_half : FPImmLeaf<fAny, [{ return Imm.isExactlyValue(+0.5); }]>; diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td index d444223e4494..a7b7e5270888 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -1691,6 +1691,11 @@ def : InstAlias<"mov $dst, $src", defm ADDS : AddSubS<0, "adds", AArch64add_flag, "cmn", "subs", "cmp">; defm SUBS : AddSubS<1, "subs", AArch64sub_flag, "cmp", "adds", "cmn">; +def copyFromSP: PatLeaf<(i64 GPR64:$src), [{ + return N->getOpcode() == ISD::CopyFromReg && + cast<RegisterSDNode>(N->getOperand(1))->getReg() == AArch64::SP; +}]>; + // Use SUBS instead of SUB to enable CSE between SUBS and SUB. def : Pat<(sub GPR32sp:$Rn, addsub_shifted_imm32:$imm), (SUBSWri GPR32sp:$Rn, addsub_shifted_imm32:$imm)>; @@ -1709,6 +1714,8 @@ def : Pat<(sub GPR32sp:$R2, arith_extended_reg32_i32:$R3), (SUBSWrx GPR32sp:$R2, arith_extended_reg32_i32:$R3)>; def : Pat<(sub GPR64sp:$R2, arith_extended_reg32to64_i64:$R3), (SUBSXrx GPR64sp:$R2, arith_extended_reg32to64_i64:$R3)>; +def : Pat<(sub copyFromSP:$R2, (arith_uxtx GPR64:$R3, arith_extendlsl64:$imm)), + (SUBXrx64 GPR64sp:$R2, GPR64:$R3, arith_extendlsl64:$imm)>; } // Because of the immediate format for add/sub-imm instructions, the @@ -5293,6 +5300,9 @@ def : Pat<(int_aarch64_neon_pmull64 (extractelt (v2i64 V128:$Rn), (i64 1)), // CodeGen patterns for addhn and subhn instructions, which can actually be // written in LLVM IR without too much difficulty. +// Prioritize ADDHN and SUBHN over UZP2. +let AddedComplexity = 10 in { + // ADDHN def : Pat<(v8i8 (trunc (v8i16 (AArch64vlshr (add V128:$Rn, V128:$Rm), (i32 8))))), (ADDHNv8i16_v8i8 V128:$Rn, V128:$Rm)>; @@ -5343,6 +5353,8 @@ def : Pat<(concat_vectors (v2i32 V64:$Rd), (SUBHNv2i64_v4i32 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub), V128:$Rn, V128:$Rm)>; +} // AddedComplexity = 10 + //---------------------------------------------------------------------------- // AdvSIMD bitwise extract from vector instruction. //---------------------------------------------------------------------------- @@ -5409,6 +5421,19 @@ def : Pat<(v4i32 (concat_vectors (v2i32 (trunc (v2i64 V128:$Vn))), (v2i32 (trunc (v2i64 V128:$Vm))))), (UZP1v4i32 V128:$Vn, V128:$Vm)>; +def : Pat<(v16i8 (concat_vectors + (v8i8 (trunc (AArch64vlshr (v8i16 V128:$Vn), (i32 8)))), + (v8i8 (trunc (AArch64vlshr (v8i16 V128:$Vm), (i32 8)))))), + (UZP2v16i8 V128:$Vn, V128:$Vm)>; +def : Pat<(v8i16 (concat_vectors + (v4i16 (trunc (AArch64vlshr (v4i32 V128:$Vn), (i32 16)))), + (v4i16 (trunc (AArch64vlshr (v4i32 V128:$Vm), (i32 16)))))), + (UZP2v8i16 V128:$Vn, V128:$Vm)>; +def : Pat<(v4i32 (concat_vectors + (v2i32 (trunc (AArch64vlshr (v2i64 V128:$Vn), (i32 32)))), + (v2i32 (trunc (AArch64vlshr (v2i64 V128:$Vm), (i32 32)))))), + (UZP2v4i32 V128:$Vn, V128:$Vm)>; + //---------------------------------------------------------------------------- // AdvSIMD TBL/TBX instructions //---------------------------------------------------------------------------- diff --git a/llvm/lib/Target/AArch64/AArch64MachineScheduler.cpp b/llvm/lib/Target/AArch64/AArch64MachineScheduler.cpp index 6c8845ee8598..79866c9b0a05 100644 --- a/llvm/lib/Target/AArch64/AArch64MachineScheduler.cpp +++ b/llvm/lib/Target/AArch64/AArch64MachineScheduler.cpp @@ -22,8 +22,8 @@ static bool needReorderStoreMI(const MachineInstr *MI) { return false; case AArch64::STURQi: case AArch64::STRQui: - if (MI->getMF()->getSubtarget<AArch64Subtarget>().isStoreAddressAscend()) - return false; + if (!MI->getMF()->getSubtarget<AArch64Subtarget>().isStoreAddressAscend()) + return false; LLVM_FALLTHROUGH; case AArch64::STPQi: return AArch64InstrInfo::getLdStOffsetOp(*MI).isImm(); diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td index c66f9cfd9c22..4032c4667bc7 100644 --- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -278,10 +278,18 @@ def AArch64scvtf_mt : SDNode<"AArch64ISD::SINT_TO_FP_MERGE_PASSTHRU", SDT_AArch def AArch64fcvtzu_mt : SDNode<"AArch64ISD::FCVTZU_MERGE_PASSTHRU", SDT_AArch64FCVT>; def AArch64fcvtzs_mt : SDNode<"AArch64ISD::FCVTZS_MERGE_PASSTHRU", SDT_AArch64FCVT>; -def SDT_AArch64ReduceWithInit : SDTypeProfile<1, 3, [SDTCisVec<1>, SDTCisVec<3>]>; -def AArch64clasta_n : SDNode<"AArch64ISD::CLASTA_N", SDT_AArch64ReduceWithInit>; -def AArch64clastb_n : SDNode<"AArch64ISD::CLASTB_N", SDT_AArch64ReduceWithInit>; -def AArch64fadda_p : SDNode<"AArch64ISD::FADDA_PRED", SDT_AArch64ReduceWithInit>; +def SDT_AArch64ReduceWithInit : SDTypeProfile<1, 3, + [SDTCisVec<1>, SDTCVecEltisVT<1,i1>, SDTCisVec<3>, SDTCisSameNumEltsAs<1,3>]>; +def AArch64clasta_n : SDNode<"AArch64ISD::CLASTA_N", SDT_AArch64ReduceWithInit>; +def AArch64clastb_n : SDNode<"AArch64ISD::CLASTB_N", SDT_AArch64ReduceWithInit>; +def AArch64fadda_p_node : SDNode<"AArch64ISD::FADDA_PRED", SDT_AArch64ReduceWithInit>; + +def AArch64fadda_p : PatFrags<(ops node:$op1, node:$op2, node:$op3), + [(AArch64fadda_p_node node:$op1, node:$op2, node:$op3), + (AArch64fadda_p_node (SVEAllActive), node:$op2, + (vselect node:$op1, node:$op3, (splat_vector (f32 fpimm_minus0)))), + (AArch64fadda_p_node (SVEAllActive), node:$op2, + (vselect node:$op1, node:$op3, (splat_vector (f64 fpimm_minus0))))]>; def SDT_AArch64PTest : SDTypeProfile<0, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>]>; def AArch64ptest : SDNode<"AArch64ISD::PTEST", SDT_AArch64PTest>; @@ -447,6 +455,16 @@ let Predicates = [HasSVEorSME] in { defm FABS_ZPmZ : sve_int_un_pred_arit_1_fp<0b100, "fabs", AArch64fabs_mt>; defm FNEG_ZPmZ : sve_int_un_pred_arit_1_fp<0b101, "fneg", AArch64fneg_mt>; + // zext(cmpeq(x, splat(0))) -> cnot(x) + def : Pat<(nxv16i8 (zext (nxv16i1 (AArch64setcc_z (nxv16i1 (SVEAllActive):$Pg), nxv16i8:$Op2, (SVEDup0), SETEQ)))), + (CNOT_ZPmZ_B $Op2, $Pg, $Op2)>; + def : Pat<(nxv8i16 (zext (nxv8i1 (AArch64setcc_z (nxv8i1 (SVEAllActive):$Pg), nxv8i16:$Op2, (SVEDup0), SETEQ)))), + (CNOT_ZPmZ_H $Op2, $Pg, $Op2)>; + def : Pat<(nxv4i32 (zext (nxv4i1 (AArch64setcc_z (nxv4i1 (SVEAllActive):$Pg), nxv4i32:$Op2, (SVEDup0), SETEQ)))), + (CNOT_ZPmZ_S $Op2, $Pg, $Op2)>; + def : Pat<(nxv2i64 (zext (nxv2i1 (AArch64setcc_z (nxv2i1 (SVEAllActive):$Pg), nxv2i64:$Op2, (SVEDup0), SETEQ)))), + (CNOT_ZPmZ_D $Op2, $Pg, $Op2)>; + defm SMAX_ZPmZ : sve_int_bin_pred_arit_1<0b000, "smax", "SMAX_ZPZZ", int_aarch64_sve_smax, DestructiveBinaryComm>; defm UMAX_ZPmZ : sve_int_bin_pred_arit_1<0b001, "umax", "UMAX_ZPZZ", int_aarch64_sve_umax, DestructiveBinaryComm>; defm SMIN_ZPmZ : sve_int_bin_pred_arit_1<0b010, "smin", "SMIN_ZPZZ", int_aarch64_sve_smin, DestructiveBinaryComm>; @@ -857,6 +875,16 @@ let Predicates = [HasSVEorSME] in { defm LD1RQ_W : sve_mem_ldqr_ss<0b10, "ld1rqw", Z_s, ZPR32, GPR64NoXZRshifted32>; defm LD1RQ_D : sve_mem_ldqr_ss<0b11, "ld1rqd", Z_d, ZPR64, GPR64NoXZRshifted64>; + let AddedComplexity = 1 in { + class LD1RQPat<ValueType vt1, ValueType vt2, SDPatternOperator op, Instruction load_instr, Instruction ptrue> : + Pat<(vt1 (op (vt1 (vector_insert_subvec (vt1 undef), (vt2 (load GPR64sp:$Xn)), (i64 0))), (i64 0))), + (load_instr (ptrue 31), GPR64sp:$Xn, 0)>; + } + def : LD1RQPat<nxv16i8, v16i8, AArch64duplane128, LD1RQ_B_IMM, PTRUE_B>; + def : LD1RQPat<nxv8i16, v8i16, AArch64duplane128, LD1RQ_H_IMM, PTRUE_H>; + def : LD1RQPat<nxv4i32, v4i32, AArch64duplane128, LD1RQ_W_IMM, PTRUE_S>; + def : LD1RQPat<nxv2i64, v2i64, AArch64duplane128, LD1RQ_D_IMM, PTRUE_D>; + // continuous load with reg+reg addressing. defm LD1B : sve_mem_cld_ss<0b0000, "ld1b", Z_b, ZPR8, GPR64NoXZRshifted8>; defm LD1B_H : sve_mem_cld_ss<0b0001, "ld1b", Z_h, ZPR16, GPR64NoXZRshifted8>; diff --git a/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp b/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp index 3f9795f5198b..47e4c6589c26 100644 --- a/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp @@ -128,7 +128,7 @@ static cl::opt<bool> static cl::opt<bool> EnableGEPOpt("aarch64-enable-gep-opt", cl::Hidden, cl::desc("Enable optimizations on complex GEPs"), - cl::init(false)); + cl::init(true)); static cl::opt<bool> BranchRelaxation("aarch64-enable-branch-relax", cl::Hidden, cl::init(true), @@ -563,17 +563,6 @@ void AArch64PassConfig::addIRPasses() { addPass(createFalkorMarkStridedAccessesPass()); } - TargetPassConfig::addIRPasses(); - - addPass(createAArch64StackTaggingPass( - /*IsOptNone=*/TM->getOptLevel() == CodeGenOpt::None)); - - // Match interleaved memory accesses to ldN/stN intrinsics. - if (TM->getOptLevel() != CodeGenOpt::None) { - addPass(createInterleavedLoadCombinePass()); - addPass(createInterleavedAccessPass()); - } - if (TM->getOptLevel() == CodeGenOpt::Aggressive && EnableGEPOpt) { // Call SeparateConstOffsetFromGEP pass to extract constants within indices // and lower a GEP with multiple indices to either arithmetic operations or @@ -587,6 +576,17 @@ void AArch64PassConfig::addIRPasses() { addPass(createLICMPass()); } + TargetPassConfig::addIRPasses(); + + addPass(createAArch64StackTaggingPass( + /*IsOptNone=*/TM->getOptLevel() == CodeGenOpt::None)); + + // Match interleaved memory accesses to ldN/stN intrinsics. + if (TM->getOptLevel() != CodeGenOpt::None) { + addPass(createInterleavedLoadCombinePass()); + addPass(createInterleavedAccessPass()); + } + // Add Control Flow Guard checks. if (TM->getTargetTriple().isOSWindows()) addPass(createCFGuardCheckPass()); diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp index 274a025e82a0..66617393c9ae 100644 --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -22,6 +22,7 @@ #include "llvm/IR/PatternMatch.h" #include "llvm/Support/Debug.h" #include "llvm/Transforms/InstCombine/InstCombiner.h" +#include "llvm/Transforms/Vectorize/LoopVectorizationLegality.h" #include <algorithm> using namespace llvm; using namespace llvm::PatternMatch; @@ -37,6 +38,74 @@ static cl::opt<unsigned> SVEGatherOverhead("sve-gather-overhead", cl::init(10), static cl::opt<unsigned> SVEScatterOverhead("sve-scatter-overhead", cl::init(10), cl::Hidden); +class TailFoldingKind { +private: + uint8_t Bits = 0; // Currently defaults to disabled. + +public: + enum TailFoldingOpts { + TFDisabled = 0x0, + TFReductions = 0x01, + TFRecurrences = 0x02, + TFSimple = 0x80, + TFAll = TFReductions | TFRecurrences | TFSimple + }; + + void operator=(const std::string &Val) { + if (Val.empty()) + return; + SmallVector<StringRef, 6> TailFoldTypes; + StringRef(Val).split(TailFoldTypes, '+', -1, false); + for (auto TailFoldType : TailFoldTypes) { + if (TailFoldType == "disabled") + Bits = 0; + else if (TailFoldType == "all") + Bits = TFAll; + else if (TailFoldType == "default") + Bits = 0; // Currently defaults to never tail-folding. + else if (TailFoldType == "simple") + add(TFSimple); + else if (TailFoldType == "reductions") + add(TFReductions); + else if (TailFoldType == "recurrences") + add(TFRecurrences); + else if (TailFoldType == "noreductions") + remove(TFReductions); + else if (TailFoldType == "norecurrences") + remove(TFRecurrences); + else { + errs() + << "invalid argument " << TailFoldType.str() + << " to -sve-tail-folding=; each element must be one of: disabled, " + "all, default, simple, reductions, noreductions, recurrences, " + "norecurrences\n"; + } + } + } + + operator uint8_t() const { return Bits; } + + void add(uint8_t Flag) { Bits |= Flag; } + void remove(uint8_t Flag) { Bits &= ~Flag; } +}; + +TailFoldingKind TailFoldingKindLoc; + +cl::opt<TailFoldingKind, true, cl::parser<std::string>> SVETailFolding( + "sve-tail-folding", + cl::desc( + "Control the use of vectorisation using tail-folding for SVE:" + "\ndisabled No loop types will vectorize using tail-folding" + "\ndefault Uses the default tail-folding settings for the target " + "CPU" + "\nall All legal loop types will vectorize using tail-folding" + "\nsimple Use tail-folding for simple loops (not reductions or " + "recurrences)" + "\nreductions Use tail-folding for loops containing reductions" + "\nrecurrences Use tail-folding for loops containing first order " + "recurrences"), + cl::location(TailFoldingKindLoc)); + bool AArch64TTIImpl::areInlineCompatible(const Function *Caller, const Function *Callee) const { const TargetMachine &TM = getTLI()->getTargetMachine(); @@ -2955,3 +3024,20 @@ InstructionCost AArch64TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, return BaseT::getShuffleCost(Kind, Tp, Mask, Index, SubTp); } + +bool AArch64TTIImpl::preferPredicateOverEpilogue( + Loop *L, LoopInfo *LI, ScalarEvolution &SE, AssumptionCache &AC, + TargetLibraryInfo *TLI, DominatorTree *DT, LoopVectorizationLegality *LVL) { + if (!ST->hasSVE() || TailFoldingKindLoc == TailFoldingKind::TFDisabled) + return false; + + TailFoldingKind Required; // Defaults to 0. + if (LVL->getReductionVars().size()) + Required.add(TailFoldingKind::TFReductions); + if (LVL->getFirstOrderRecurrences().size()) + Required.add(TailFoldingKind::TFRecurrences); + if (!Required) + Required.add(TailFoldingKind::TFSimple); + + return (TailFoldingKindLoc & Required) == Required; +} diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h index 59ec91843266..2231f8705998 100644 --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h @@ -340,6 +340,11 @@ public: return PredicationStyle::None; } + bool preferPredicateOverEpilogue(Loop *L, LoopInfo *LI, ScalarEvolution &SE, + AssumptionCache &AC, TargetLibraryInfo *TLI, + DominatorTree *DT, + LoopVectorizationLegality *LVL); + bool supportsScalableVectors() const { return ST->hasSVE(); } bool enableScalableVectorization() const { return ST->hasSVE(); } @@ -347,6 +352,11 @@ public: bool isLegalToVectorizeReduction(const RecurrenceDescriptor &RdxDesc, ElementCount VF) const; + bool preferPredicatedReductionSelect(unsigned Opcode, Type *Ty, + TTI::ReductionFlags Flags) const { + return ST->hasSVE(); + } + InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, Optional<FastMathFlags> FMF, TTI::TargetCostKind CostKind); diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp index f129bfe11e4d..3fe3b2a69855 100644 --- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp +++ b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp @@ -231,7 +231,70 @@ void AArch64_MC::initLLVMToCVRegMapping(MCRegisterInfo *MRI) { {codeview::RegisterId::ARM64_Q29, AArch64::Q29}, {codeview::RegisterId::ARM64_Q30, AArch64::Q30}, {codeview::RegisterId::ARM64_Q31, AArch64::Q31}, - + {codeview::RegisterId::ARM64_B0, AArch64::B0}, + {codeview::RegisterId::ARM64_B1, AArch64::B1}, + {codeview::RegisterId::ARM64_B2, AArch64::B2}, + {codeview::RegisterId::ARM64_B3, AArch64::B3}, + {codeview::RegisterId::ARM64_B4, AArch64::B4}, + {codeview::RegisterId::ARM64_B5, AArch64::B5}, + {codeview::RegisterId::ARM64_B6, AArch64::B6}, + {codeview::RegisterId::ARM64_B7, AArch64::B7}, + {codeview::RegisterId::ARM64_B8, AArch64::B8}, + {codeview::RegisterId::ARM64_B9, AArch64::B9}, + {codeview::RegisterId::ARM64_B10, AArch64::B10}, + {codeview::RegisterId::ARM64_B11, AArch64::B11}, + {codeview::RegisterId::ARM64_B12, AArch64::B12}, + {codeview::RegisterId::ARM64_B13, AArch64::B13}, + {codeview::RegisterId::ARM64_B14, AArch64::B14}, + {codeview::RegisterId::ARM64_B15, AArch64::B15}, + {codeview::RegisterId::ARM64_B16, AArch64::B16}, + {codeview::RegisterId::ARM64_B17, AArch64::B17}, + {codeview::RegisterId::ARM64_B18, AArch64::B18}, + {codeview::RegisterId::ARM64_B19, AArch64::B19}, + {codeview::RegisterId::ARM64_B20, AArch64::B20}, + {codeview::RegisterId::ARM64_B21, AArch64::B21}, + {codeview::RegisterId::ARM64_B22, AArch64::B22}, + {codeview::RegisterId::ARM64_B23, AArch64::B23}, + {codeview::RegisterId::ARM64_B24, AArch64::B24}, + {codeview::RegisterId::ARM64_B25, AArch64::B25}, + {codeview::RegisterId::ARM64_B26, AArch64::B26}, + {codeview::RegisterId::ARM64_B27, AArch64::B27}, + {codeview::RegisterId::ARM64_B28, AArch64::B28}, + {codeview::RegisterId::ARM64_B29, AArch64::B29}, + {codeview::RegisterId::ARM64_B30, AArch64::B30}, + {codeview::RegisterId::ARM64_B31, AArch64::B31}, + {codeview::RegisterId::ARM64_H0, AArch64::H0}, + {codeview::RegisterId::ARM64_H1, AArch64::H1}, + {codeview::RegisterId::ARM64_H2, AArch64::H2}, + {codeview::RegisterId::ARM64_H3, AArch64::H3}, + {codeview::RegisterId::ARM64_H4, AArch64::H4}, + {codeview::RegisterId::ARM64_H5, AArch64::H5}, + {codeview::RegisterId::ARM64_H6, AArch64::H6}, + {codeview::RegisterId::ARM64_H7, AArch64::H7}, + {codeview::RegisterId::ARM64_H8, AArch64::H8}, + {codeview::RegisterId::ARM64_H9, AArch64::H9}, + {codeview::RegisterId::ARM64_H10, AArch64::H10}, + {codeview::RegisterId::ARM64_H11, AArch64::H11}, + {codeview::RegisterId::ARM64_H12, AArch64::H12}, + {codeview::RegisterId::ARM64_H13, AArch64::H13}, + {codeview::RegisterId::ARM64_H14, AArch64::H14}, + {codeview::RegisterId::ARM64_H15, AArch64::H15}, + {codeview::RegisterId::ARM64_H16, AArch64::H16}, + {codeview::RegisterId::ARM64_H17, AArch64::H17}, + {codeview::RegisterId::ARM64_H18, AArch64::H18}, + {codeview::RegisterId::ARM64_H19, AArch64::H19}, + {codeview::RegisterId::ARM64_H20, AArch64::H20}, + {codeview::RegisterId::ARM64_H21, AArch64::H21}, + {codeview::RegisterId::ARM64_H22, AArch64::H22}, + {codeview::RegisterId::ARM64_H23, AArch64::H23}, + {codeview::RegisterId::ARM64_H24, AArch64::H24}, + {codeview::RegisterId::ARM64_H25, AArch64::H25}, + {codeview::RegisterId::ARM64_H26, AArch64::H26}, + {codeview::RegisterId::ARM64_H27, AArch64::H27}, + {codeview::RegisterId::ARM64_H28, AArch64::H28}, + {codeview::RegisterId::ARM64_H29, AArch64::H29}, + {codeview::RegisterId::ARM64_H30, AArch64::H30}, + {codeview::RegisterId::ARM64_H31, AArch64::H31}, }; for (const auto &I : RegMap) MRI->mapLLVMRegToCVReg(I.Reg, static_cast<int>(I.CVReg)); diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td index 48b5814cd482..2d6f1438e315 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPU.td +++ b/llvm/lib/Target/AMDGPU/AMDGPU.td @@ -585,6 +585,12 @@ def FeatureMAIInsts : SubtargetFeature<"mai-insts", "Has mAI instructions" >; +def FeatureFP8Insts : SubtargetFeature<"fp8-insts", + "HasFP8Insts", + "true", + "Has fp8 and bf8 instructions" +>; + def FeaturePkFmacF16Inst : SubtargetFeature<"pk-fmac-f16-inst", "HasPkFmacF16Inst", "true", @@ -1124,6 +1130,7 @@ def FeatureISAVersion9_4_0 : FeatureSet< Feature64BitDPP, FeaturePackedFP32Ops, FeatureMAIInsts, + FeatureFP8Insts, FeaturePkFmacF16Inst, FeatureAtomicFaddRtnInsts, FeatureAtomicFaddNoRtnInsts, @@ -1265,11 +1272,14 @@ def FeatureISAVersion11_Common : FeatureSet< FeaturePackedTID, FeatureVcmpxPermlaneHazard]>; -// Features for GFX 11.0.0 and 11.0.1 -def FeatureISAVersion11_0 : FeatureSet< +def FeatureISAVersion11_0_0 : FeatureSet< !listconcat(FeatureISAVersion11_Common.Features, [FeatureUserSGPRInit16Bug])>; +def FeatureISAVersion11_0_1 : FeatureSet< + !listconcat(FeatureISAVersion11_Common.Features, + [])>; + def FeatureISAVersion11_0_2 : FeatureSet< !listconcat(FeatureISAVersion11_Common.Features, [FeatureUserSGPRInit16Bug])>; @@ -1704,6 +1714,9 @@ def HasSMemTimeInst : Predicate<"Subtarget->hasSMemTimeInst()">, def HasShaderCyclesRegister : Predicate<"Subtarget->hasShaderCyclesRegister()">, AssemblerPredicate<(all_of FeatureShaderCyclesRegister)>; +def HasFP8Insts : Predicate<"Subtarget->hasFP8Insts()">, + AssemblerPredicate<(all_of FeatureFP8Insts)>; + def HasPkFmacF16Inst : Predicate<"Subtarget->hasPkFmacF16Inst()">, AssemblerPredicate<(all_of FeaturePkFmacF16Inst)>; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.cpp index d28f38e42430..d361e33995cf 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.cpp @@ -74,6 +74,7 @@ void AMDGPUArgumentUsageInfo::print(raw_ostream &OS, const Module *M) const { << " WorkGroupIDY: " << FI.second.WorkGroupIDY << " WorkGroupIDZ: " << FI.second.WorkGroupIDZ << " WorkGroupInfo: " << FI.second.WorkGroupInfo + << " LDSKernelId: " << FI.second.LDSKernelId << " PrivateSegmentWaveByteOffset: " << FI.second.PrivateSegmentWaveByteOffset << " ImplicitBufferPtr: " << FI.second.ImplicitBufferPtr @@ -107,6 +108,9 @@ AMDGPUFunctionArgInfo::getPreloadedValue( case AMDGPUFunctionArgInfo::WORKGROUP_ID_Z: return std::make_tuple(WorkGroupIDZ ? &WorkGroupIDZ : nullptr, &AMDGPU::SGPR_32RegClass, LLT::scalar(32)); + case AMDGPUFunctionArgInfo::LDS_KERNEL_ID: + return std::make_tuple(LDSKernelId ? &LDSKernelId : nullptr, + &AMDGPU::SGPR_32RegClass, LLT::scalar(32)); case AMDGPUFunctionArgInfo::PRIVATE_SEGMENT_WAVE_BYTE_OFFSET: return std::make_tuple( PrivateSegmentWaveByteOffset ? &PrivateSegmentWaveByteOffset : nullptr, @@ -162,6 +166,7 @@ constexpr AMDGPUFunctionArgInfo AMDGPUFunctionArgInfo::fixedABILayout() { AI.WorkGroupIDX = ArgDescriptor::createRegister(AMDGPU::SGPR12); AI.WorkGroupIDY = ArgDescriptor::createRegister(AMDGPU::SGPR13); AI.WorkGroupIDZ = ArgDescriptor::createRegister(AMDGPU::SGPR14); + AI.LDSKernelId = ArgDescriptor::createRegister(AMDGPU::SGPR15); const unsigned Mask = 0x3ff; AI.WorkItemIDX = ArgDescriptor::createRegister(AMDGPU::VGPR31, Mask); diff --git a/llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.h b/llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.h index e9ed45d8cd14..f595e469f998 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.h @@ -103,6 +103,7 @@ struct AMDGPUFunctionArgInfo { KERNARG_SEGMENT_PTR = 3, DISPATCH_ID = 4, FLAT_SCRATCH_INIT = 5, + LDS_KERNEL_ID = 6, // LLVM internal, not part of the ABI WORKGROUP_ID_X = 10, WORKGROUP_ID_Y = 11, WORKGROUP_ID_Z = 12, @@ -128,6 +129,7 @@ struct AMDGPUFunctionArgInfo { ArgDescriptor DispatchID; ArgDescriptor FlatScratchInit; ArgDescriptor PrivateSegmentSize; + ArgDescriptor LDSKernelId; // System SGPRs in kernels. ArgDescriptor WorkGroupIDX; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp index 57a4660bc1eb..13a65f1ad601 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp @@ -27,8 +27,10 @@ #include "SIMachineFunctionInfo.h" #include "TargetInfo/AMDGPUTargetInfo.h" #include "Utils/AMDGPUBaseInfo.h" +#include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/BinaryFormat/ELF.h" #include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h" #include "llvm/IR/DiagnosticInfo.h" #include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCContext.h" @@ -415,6 +417,10 @@ uint16_t AMDGPUAsmPrinter::getAmdhsaKernelCodeProperties( amdhsa::KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32; } + if (CurrentProgramInfo.DynamicCallStack) { + KernelCodeProperties |= amdhsa::KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK; + } + return KernelCodeProperties; } @@ -506,6 +512,9 @@ bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) { emitFunctionBody(); + emitResourceUsageRemarks(MF, CurrentProgramInfo, MFI->isModuleEntryFunction(), + STM.hasMAIInsts()); + if (isVerbose()) { MCSectionELF *CommentSection = Context.getELFSection(".AMDGPU.csdata", ELF::SHT_PROGBITS, 0); @@ -875,6 +884,9 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo, LDSAlignShift = 9; } + ProgInfo.SGPRSpill = MFI->getNumSpilledSGPRs(); + ProgInfo.VGPRSpill = MFI->getNumSpilledVGPRs(); + ProgInfo.LDSSize = MFI->getLDSSize(); ProgInfo.LDSBlocks = alignTo(ProgInfo.LDSSize, 1ULL << LDSAlignShift) >> LDSAlignShift; @@ -1180,3 +1192,58 @@ void AMDGPUAsmPrinter::getAnalysisUsage(AnalysisUsage &AU) const { AU.addPreserved<AMDGPUResourceUsageAnalysis>(); AsmPrinter::getAnalysisUsage(AU); } + +void AMDGPUAsmPrinter::emitResourceUsageRemarks( + const MachineFunction &MF, const SIProgramInfo &CurrentProgramInfo, + bool isModuleEntryFunction, bool hasMAIInsts) { + if (!ORE) + return; + + const char *Name = "kernel-resource-usage"; + const char *Indent = " "; + + // If the remark is not specifically enabled, do not output to yaml + LLVMContext &Ctx = MF.getFunction().getContext(); + if (!Ctx.getDiagHandlerPtr()->isAnalysisRemarkEnabled(Name)) + return; + + auto EmitResourceUsageRemark = [&](StringRef RemarkName, + StringRef RemarkLabel, auto Argument) { + // Add an indent for every line besides the line with the kernel name. This + // makes it easier to tell which resource usage go with which kernel since + // the kernel name will always be displayed first. + std::string LabelStr = RemarkLabel.str() + ": "; + if (!RemarkName.equals("FunctionName")) + LabelStr = Indent + LabelStr; + + ORE->emit([&]() { + return MachineOptimizationRemarkAnalysis(Name, RemarkName, + MF.getFunction().getSubprogram(), + &MF.front()) + << LabelStr << ore::NV(RemarkName, Argument); + }); + }; + + // FIXME: Formatting here is pretty nasty because clang does not accept + // newlines from diagnostics. This forces us to emit multiple diagnostic + // remarks to simulate newlines. If and when clang does accept newlines, this + // formatting should be aggregated into one remark with newlines to avoid + // printing multiple diagnostic location and diag opts. + EmitResourceUsageRemark("FunctionName", "Function Name", + MF.getFunction().getName()); + EmitResourceUsageRemark("NumSGPR", "SGPRs", CurrentProgramInfo.NumSGPR); + EmitResourceUsageRemark("NumVGPR", "VGPRs", CurrentProgramInfo.NumArchVGPR); + if (hasMAIInsts) + EmitResourceUsageRemark("NumAGPR", "AGPRs", CurrentProgramInfo.NumAccVGPR); + EmitResourceUsageRemark("ScratchSize", "ScratchSize [bytes/lane]", + CurrentProgramInfo.ScratchSize); + EmitResourceUsageRemark("Occupancy", "Occupancy [waves/SIMD]", + CurrentProgramInfo.Occupancy); + EmitResourceUsageRemark("SGPRSpill", "SGPRs Spill", + CurrentProgramInfo.SGPRSpill); + EmitResourceUsageRemark("VGPRSpill", "VGPRs Spill", + CurrentProgramInfo.VGPRSpill); + if (isModuleEntryFunction) + EmitResourceUsageRemark("BytesLDS", "LDS Size [bytes/block]", + CurrentProgramInfo.LDSSize); +} diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h index ddda2cf107b1..2881b8d7bcca 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h @@ -69,6 +69,9 @@ private: uint64_t ScratchSize, uint64_t CodeSize, const AMDGPUMachineFunction* MFI); + void emitResourceUsageRemarks(const MachineFunction &MF, + const SIProgramInfo &CurrentProgramInfo, + bool isModuleEntryFunction, bool hasMAIInsts); uint16_t getAmdhsaKernelCodeProperties( const MachineFunction &MF) const; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAttributes.def b/llvm/lib/Target/AMDGPU/AMDGPUAttributes.def index 0a2cf3874245..c7a060c5db5b 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUAttributes.def +++ b/llvm/lib/Target/AMDGPU/AMDGPUAttributes.def @@ -27,5 +27,6 @@ AMDGPU_ATTRIBUTE(WORKGROUP_ID_Z, "amdgpu-no-workgroup-id-z") AMDGPU_ATTRIBUTE(WORKITEM_ID_X, "amdgpu-no-workitem-id-x") AMDGPU_ATTRIBUTE(WORKITEM_ID_Y, "amdgpu-no-workitem-id-y") AMDGPU_ATTRIBUTE(WORKITEM_ID_Z, "amdgpu-no-workitem-id-z") +AMDGPU_ATTRIBUTE(LDS_KERNEL_ID, "amdgpu-no-lds-kernel-id") #undef AMDGPU_ATTRIBUTE diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp index 8de0d7e6bff1..a3634d2440c3 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp @@ -72,6 +72,8 @@ intrinsicToAttrMask(Intrinsic::ID ID, bool &NonKernelOnly, bool &NeedsImplicit, case Intrinsic::amdgcn_workgroup_id_z: case Intrinsic::r600_read_tgid_z: return WORKGROUP_ID_Z; + case Intrinsic::amdgcn_lds_kernel_id: + return LDS_KERNEL_ID; case Intrinsic::amdgcn_dispatch_ptr: return DISPATCH_PTR; case Intrinsic::amdgcn_dispatch_id: @@ -457,6 +459,10 @@ struct AAAMDAttributesFunction : public AAAMDAttributes { removeAssumedBits(QUEUE_PTR); } + if (isAssumed(LDS_KERNEL_ID) && funcRetrievesLDSKernelId(A)) { + removeAssumedBits(LDS_KERNEL_ID); + } + return getAssumed() != OrigAssumed ? ChangeStatus::CHANGED : ChangeStatus::UNCHANGED; } @@ -591,6 +597,16 @@ private: return !A.checkForAllCallLikeInstructions(DoesNotLeadToKernelArgLoc, *this, UsedAssumedInformation); } + + bool funcRetrievesLDSKernelId(Attributor &A) { + auto DoesNotRetrieve = [&](Instruction &I) { + auto &Call = cast<CallBase>(I); + return Call.getIntrinsicID() != Intrinsic::amdgcn_lds_kernel_id; + }; + bool UsedAssumedInformation = false; + return !A.checkForAllCallLikeInstructions(DoesNotRetrieve, *this, + UsedAssumedInformation); + } }; AAAMDAttributes &AAAMDAttributes::createForPosition(const IRPosition &IRP, @@ -743,7 +759,8 @@ public: AMDGPUInformationCache InfoCache(M, AG, Allocator, nullptr, *TM); DenseSet<const char *> Allowed( {&AAAMDAttributes::ID, &AAUniformWorkGroupSize::ID, - &AAAMDFlatWorkGroupSize::ID, &AACallEdges::ID, &AAPointerInfo::ID}); + &AAPotentialValues::ID, &AAAMDFlatWorkGroupSize::ID, &AACallEdges::ID, + &AAPointerInfo::ID}); AttributorConfig AC(CGUpdater); AC.Allowed = &Allowed; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp index fd812eb676ef..4550cfdcf883 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp @@ -764,7 +764,8 @@ bool AMDGPUCallLowering::passSpecialInputs(MachineIRBuilder &MIRBuilder, AMDGPUFunctionArgInfo::DISPATCH_ID, AMDGPUFunctionArgInfo::WORKGROUP_ID_X, AMDGPUFunctionArgInfo::WORKGROUP_ID_Y, - AMDGPUFunctionArgInfo::WORKGROUP_ID_Z + AMDGPUFunctionArgInfo::WORKGROUP_ID_Z, + AMDGPUFunctionArgInfo::LDS_KERNEL_ID, }; static constexpr StringLiteral ImplicitAttrNames[] = { @@ -774,7 +775,8 @@ bool AMDGPUCallLowering::passSpecialInputs(MachineIRBuilder &MIRBuilder, "amdgpu-no-dispatch-id", "amdgpu-no-workgroup-id-x", "amdgpu-no-workgroup-id-y", - "amdgpu-no-workgroup-id-z" + "amdgpu-no-workgroup-id-z", + "amdgpu-no-lds-kernel-id", }; MachineRegisterInfo &MRI = MF.getRegInfo(); @@ -810,6 +812,14 @@ bool AMDGPUCallLowering::passSpecialInputs(MachineIRBuilder &MIRBuilder, LI->loadInputValue(InputReg, MIRBuilder, IncomingArg, ArgRC, ArgTy); } else if (InputID == AMDGPUFunctionArgInfo::IMPLICIT_ARG_PTR) { LI->getImplicitArgPtr(InputReg, MRI, MIRBuilder); + } else if (InputID == AMDGPUFunctionArgInfo::LDS_KERNEL_ID) { + Optional<uint32_t> Id = + AMDGPUMachineFunction::getLDSKernelIdMetadata(MF.getFunction()); + if (Id.has_value()) { + MIRBuilder.buildConstant(InputReg, Id.value()); + } else { + MIRBuilder.buildUndef(InputReg); + } } else { // We may have proven the input wasn't needed, although the ABI is // requiring it. We just need to allocate the register appropriately. diff --git a/llvm/lib/Target/AMDGPU/AMDGPUGISel.td b/llvm/lib/Target/AMDGPU/AMDGPUGISel.td index 5747fc0ca8e6..229dfb62ef6e 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUGISel.td +++ b/llvm/lib/Target/AMDGPU/AMDGPUGISel.td @@ -88,6 +88,10 @@ def gi_smrd_sgpr : GIComplexOperandMatcher<s64, "selectSmrdSgpr">, GIComplexPatternEquiv<SMRDSgpr>; +def gi_smrd_sgpr_imm : + GIComplexOperandMatcher<s64, "selectSmrdSgprImm">, + GIComplexPatternEquiv<SMRDSgprImm>; + def gi_flat_offset : GIComplexOperandMatcher<s64, "selectFlatOffset">, GIComplexPatternEquiv<FlatOffset>; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp b/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp index 6fa44ffcbfaa..632a76b32009 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp @@ -875,6 +875,8 @@ MetadataStreamerV3::getHSAKernelProps(const MachineFunction &MF, Kern.getDocument()->getNode(ProgramInfo.LDSSize); Kern[".private_segment_fixed_size"] = Kern.getDocument()->getNode(ProgramInfo.ScratchSize); + Kern[".uses_dynamic_stack"] = + Kern.getDocument()->getNode(ProgramInfo.DynamicCallStack); // FIXME: The metadata treats the minimum as 16? Kern[".kernarg_segment_align"] = diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp index 589992c7a7ec..147c8850587e 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp @@ -33,7 +33,7 @@ #include "llvm/IR/Dominators.h" #endif -#define DEBUG_TYPE "isel" +#define DEBUG_TYPE "amdgpu-isel" using namespace llvm; @@ -1886,21 +1886,21 @@ bool AMDGPUDAGToDAGISel::SelectScratchSVAddr(SDNode *N, SDValue Addr, // Match an immediate (if Imm is true) or an SGPR (if Imm is false) // offset. If Imm32Only is true, match only 32-bit immediate offsets // available on CI. -bool AMDGPUDAGToDAGISel::SelectSMRDOffset(SDValue ByteOffsetNode, - SDValue &Offset, bool Imm, +bool AMDGPUDAGToDAGISel::SelectSMRDOffset(SDValue Addr, SDValue ByteOffsetNode, + SDValue *SOffset, SDValue *Offset, bool Imm32Only) const { ConstantSDNode *C = dyn_cast<ConstantSDNode>(ByteOffsetNode); if (!C) { - if (Imm) + if (!SOffset) return false; if (ByteOffsetNode.getValueType().isScalarInteger() && ByteOffsetNode.getValueType().getSizeInBits() == 32) { - Offset = ByteOffsetNode; + *SOffset = ByteOffsetNode; return true; } if (ByteOffsetNode.getOpcode() == ISD::ZERO_EXTEND) { if (ByteOffsetNode.getOperand(0).getValueType().getSizeInBits() == 32) { - Offset = ByteOffsetNode.getOperand(0); + *SOffset = ByteOffsetNode.getOperand(0); return true; } } @@ -1912,8 +1912,8 @@ bool AMDGPUDAGToDAGISel::SelectSMRDOffset(SDValue ByteOffsetNode, int64_t ByteOffset = C->getSExtValue(); Optional<int64_t> EncodedOffset = AMDGPU::getSMRDEncodedOffset(*Subtarget, ByteOffset, false); - if (EncodedOffset && Imm && !Imm32Only) { - Offset = CurDAG->getTargetConstant(*EncodedOffset, SL, MVT::i32); + if (EncodedOffset && Offset && !Imm32Only) { + *Offset = CurDAG->getTargetConstant(*EncodedOffset, SL, MVT::i32); return true; } @@ -1922,17 +1922,17 @@ bool AMDGPUDAGToDAGISel::SelectSMRDOffset(SDValue ByteOffsetNode, return false; EncodedOffset = AMDGPU::getSMRDEncodedLiteralOffset32(*Subtarget, ByteOffset); - if (EncodedOffset && Imm32Only) { - Offset = CurDAG->getTargetConstant(*EncodedOffset, SL, MVT::i32); + if (EncodedOffset && Offset && Imm32Only) { + *Offset = CurDAG->getTargetConstant(*EncodedOffset, SL, MVT::i32); return true; } if (!isUInt<32>(ByteOffset) && !isInt<32>(ByteOffset)) return false; - if (!Imm) { + if (SOffset) { SDValue C32Bit = CurDAG->getTargetConstant(ByteOffset, SL, MVT::i32); - Offset = SDValue( + *SOffset = SDValue( CurDAG->getMachineNode(AMDGPU::S_MOV_B32, SL, MVT::i32, C32Bit), 0); return true; } @@ -1968,11 +1968,18 @@ SDValue AMDGPUDAGToDAGISel::Expand32BitAddress(SDValue Addr) const { // Match a base and an immediate (if Imm is true) or an SGPR // (if Imm is false) offset. If Imm32Only is true, match only 32-bit // immediate offsets available on CI. -bool AMDGPUDAGToDAGISel::SelectSMRD(SDValue Addr, SDValue &SBase, - SDValue &Offset, bool Imm, - bool Imm32Only) const { +bool AMDGPUDAGToDAGISel::SelectSMRDBaseOffset(SDValue Addr, SDValue &SBase, + SDValue *SOffset, SDValue *Offset, + bool Imm32Only) const { SDLoc SL(Addr); + if (SOffset && Offset) { + assert(!Imm32Only); + SDValue B; + return SelectSMRDBaseOffset(Addr, B, nullptr, Offset) && + SelectSMRDBaseOffset(B, SBase, SOffset, nullptr); + } + // A 32-bit (address + offset) should not cause unsigned 32-bit integer // wraparound, because s_load instructions perform the addition in 64 bits. if ((Addr.getValueType() != MVT::i32 || @@ -1987,34 +1994,55 @@ bool AMDGPUDAGToDAGISel::SelectSMRD(SDValue Addr, SDValue &SBase, assert(N0 && N1 && isa<ConstantSDNode>(N1)); } if (N0 && N1) { - if (SelectSMRDOffset(N1, Offset, Imm, Imm32Only)) { - SBase = Expand32BitAddress(N0); + if (SelectSMRDOffset(N0, N1, SOffset, Offset, Imm32Only)) { + SBase = N0; + return true; + } + if (SelectSMRDOffset(N1, N0, SOffset, Offset, Imm32Only)) { + SBase = N1; return true; } } return false; } - if (!Imm) + if (Offset && !SOffset) { + SBase = Addr; + *Offset = CurDAG->getTargetConstant(0, SL, MVT::i32); + return true; + } + return false; +} + +bool AMDGPUDAGToDAGISel::SelectSMRD(SDValue Addr, SDValue &SBase, + SDValue *SOffset, SDValue *Offset, + bool Imm32Only) const { + if (!SelectSMRDBaseOffset(Addr, SBase, SOffset, Offset, Imm32Only)) return false; - SBase = Expand32BitAddress(Addr); - Offset = CurDAG->getTargetConstant(0, SL, MVT::i32); + SBase = Expand32BitAddress(SBase); return true; } bool AMDGPUDAGToDAGISel::SelectSMRDImm(SDValue Addr, SDValue &SBase, SDValue &Offset) const { - return SelectSMRD(Addr, SBase, Offset, /* Imm */ true); + return SelectSMRD(Addr, SBase, /* SOffset */ nullptr, &Offset); } bool AMDGPUDAGToDAGISel::SelectSMRDImm32(SDValue Addr, SDValue &SBase, SDValue &Offset) const { assert(Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS); - return SelectSMRD(Addr, SBase, Offset, /* Imm */ true, /* Imm32Only */ true); + return SelectSMRD(Addr, SBase, /* SOffset */ nullptr, &Offset, + /* Imm32Only */ true); } bool AMDGPUDAGToDAGISel::SelectSMRDSgpr(SDValue Addr, SDValue &SBase, - SDValue &Offset) const { - return SelectSMRD(Addr, SBase, Offset, /* Imm */ false); + SDValue &SOffset) const { + return SelectSMRD(Addr, SBase, &SOffset, /* Offset */ nullptr); +} + +bool AMDGPUDAGToDAGISel::SelectSMRDSgprImm(SDValue Addr, SDValue &SBase, + SDValue &SOffset, + SDValue &Offset) const { + return SelectSMRD(Addr, SBase, &SOffset, &Offset); } bool AMDGPUDAGToDAGISel::SelectSMRDBufferImm(SDValue Addr, diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h index 7894b8eb5b67..fda2bfac71fc 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h @@ -193,14 +193,18 @@ private: bool SelectScratchSVAddr(SDNode *N, SDValue Addr, SDValue &VAddr, SDValue &SAddr, SDValue &Offset) const; - bool SelectSMRDOffset(SDValue ByteOffsetNode, SDValue &Offset, bool Imm, - bool Imm32Only) const; + bool SelectSMRDOffset(SDValue Base, SDValue ByteOffsetNode, SDValue *SOffset, + SDValue *Offset, bool Imm32Only = false) const; SDValue Expand32BitAddress(SDValue Addr) const; - bool SelectSMRD(SDValue Addr, SDValue &SBase, SDValue &Offset, bool Imm, - bool Imm32Only = false) const; + bool SelectSMRDBaseOffset(SDValue Addr, SDValue &SBase, SDValue *SOffset, + SDValue *Offset, bool Imm32Only = false) const; + bool SelectSMRD(SDValue Addr, SDValue &SBase, SDValue *SOffset, + SDValue *Offset, bool Imm32Only = false) const; bool SelectSMRDImm(SDValue Addr, SDValue &SBase, SDValue &Offset) const; bool SelectSMRDImm32(SDValue Addr, SDValue &SBase, SDValue &Offset) const; - bool SelectSMRDSgpr(SDValue Addr, SDValue &SBase, SDValue &Offset) const; + bool SelectSMRDSgpr(SDValue Addr, SDValue &SBase, SDValue &SOffset) const; + bool SelectSMRDSgprImm(SDValue Addr, SDValue &SBase, SDValue &SOffset, + SDValue &Offset) const; bool SelectSMRDBufferImm(SDValue Addr, SDValue &Offset) const; bool SelectSMRDBufferImm32(SDValue Addr, SDValue &Offset) const; bool SelectMOVRELOffset(SDValue Index, SDValue &Base, SDValue &Offset) const; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp index 70fae9d784a2..f2e5c2fe00e8 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -1006,6 +1006,14 @@ bool AMDGPUInstructionSelector::selectG_INTRINSIC(MachineInstr &I) const { case Intrinsic::amdgcn_smfmac_f32_32x32x16_bf16: case Intrinsic::amdgcn_smfmac_i32_16x16x64_i8: case Intrinsic::amdgcn_smfmac_i32_32x32x32_i8: + case Intrinsic::amdgcn_smfmac_f32_16x16x64_bf8_bf8: + case Intrinsic::amdgcn_smfmac_f32_16x16x64_bf8_fp8: + case Intrinsic::amdgcn_smfmac_f32_16x16x64_fp8_bf8: + case Intrinsic::amdgcn_smfmac_f32_16x16x64_fp8_fp8: + case Intrinsic::amdgcn_smfmac_f32_32x32x32_bf8_bf8: + case Intrinsic::amdgcn_smfmac_f32_32x32x32_bf8_fp8: + case Intrinsic::amdgcn_smfmac_f32_32x32x32_fp8_bf8: + case Intrinsic::amdgcn_smfmac_f32_32x32x32_fp8_fp8: return selectSMFMACIntrin(I); default: return selectImpl(I, *CoverageInfo); @@ -2361,7 +2369,7 @@ void AMDGPUInstructionSelector::getAddrModeInfo(const MachineInstr &Load, if (PtrMI->getOpcode() != TargetOpcode::G_PTR_ADD) return; - GEPInfo GEPInfo(*PtrMI); + GEPInfo GEPInfo; for (unsigned i = 1; i != 3; ++i) { const MachineOperand &GEPOp = PtrMI->getOperand(i); @@ -3237,6 +3245,8 @@ static Register matchZeroExtendFromS32(MachineRegisterInfo &MRI, Register Reg) { if (Def->getOpcode() != AMDGPU::G_MERGE_VALUES) return Register(); + assert(Def->getNumOperands() == 3 && + MRI.getType(Def->getOperand(0).getReg()) == LLT::scalar(64)); if (mi_match(Def->getOperand(2).getReg(), MRI, m_ZeroInt())) { return Def->getOperand(1).getReg(); } @@ -3354,6 +3364,30 @@ bool AMDGPUInstructionSelector::selectSMFMACIntrin(MachineInstr &MI) const { case Intrinsic::amdgcn_smfmac_i32_32x32x32_i8: Opc = AMDGPU::V_SMFMAC_I32_32X32X32_I8_e64; break; + case Intrinsic::amdgcn_smfmac_f32_16x16x64_bf8_bf8: + Opc = AMDGPU::V_SMFMAC_F32_16X16X64_BF8_BF8_e64; + break; + case Intrinsic::amdgcn_smfmac_f32_16x16x64_bf8_fp8: + Opc = AMDGPU::V_SMFMAC_F32_16X16X64_BF8_FP8_e64; + break; + case Intrinsic::amdgcn_smfmac_f32_16x16x64_fp8_bf8: + Opc = AMDGPU::V_SMFMAC_F32_16X16X64_FP8_BF8_e64; + break; + case Intrinsic::amdgcn_smfmac_f32_16x16x64_fp8_fp8: + Opc = AMDGPU::V_SMFMAC_F32_16X16X64_FP8_FP8_e64; + break; + case Intrinsic::amdgcn_smfmac_f32_32x32x32_bf8_bf8: + Opc = AMDGPU::V_SMFMAC_F32_32X32X32_BF8_BF8_e64; + break; + case Intrinsic::amdgcn_smfmac_f32_32x32x32_bf8_fp8: + Opc = AMDGPU::V_SMFMAC_F32_32X32X32_BF8_FP8_e64; + break; + case Intrinsic::amdgcn_smfmac_f32_32x32x32_fp8_bf8: + Opc = AMDGPU::V_SMFMAC_F32_32X32X32_FP8_BF8_e64; + break; + case Intrinsic::amdgcn_smfmac_f32_32x32x32_fp8_fp8: + Opc = AMDGPU::V_SMFMAC_F32_32X32X32_FP8_FP8_e64; + break; default: llvm_unreachable("unhandled smfmac intrinsic"); } @@ -3800,25 +3834,82 @@ AMDGPUInstructionSelector::selectVINTERPModsHi(MachineOperand &Root) const { }}; } -InstructionSelector::ComplexRendererFns -AMDGPUInstructionSelector::selectSmrdImm(MachineOperand &Root) const { +bool AMDGPUInstructionSelector::selectSmrdOffset(MachineOperand &Root, + Register &Base, + Register *SOffset, + int64_t *Offset) const { + MachineInstr *MI = Root.getParent(); + MachineBasicBlock *MBB = MI->getParent(); + + // FIXME: We should shrink the GEP if the offset is known to be <= 32-bits, + // then we can select all ptr + 32-bit offsets. SmallVector<GEPInfo, 4> AddrInfo; - getAddrModeInfo(*Root.getParent(), *MRI, AddrInfo); + getAddrModeInfo(*MI, *MRI, AddrInfo); - if (AddrInfo.empty() || AddrInfo[0].SgprParts.size() != 1) - return None; + if (AddrInfo.empty()) + return false; - const GEPInfo &GEPInfo = AddrInfo[0]; + const GEPInfo &GEPI = AddrInfo[0]; Optional<int64_t> EncodedImm = - AMDGPU::getSMRDEncodedOffset(STI, GEPInfo.Imm, false); - if (!EncodedImm) + AMDGPU::getSMRDEncodedOffset(STI, GEPI.Imm, false); + + if (SOffset && Offset) { + if (GEPI.SgprParts.size() == 1 && GEPI.Imm != 0 && EncodedImm && + AddrInfo.size() > 1) { + const GEPInfo &GEPI2 = AddrInfo[1]; + if (GEPI2.SgprParts.size() == 2 && GEPI2.Imm == 0) { + if (Register OffsetReg = + matchZeroExtendFromS32(*MRI, GEPI2.SgprParts[1])) { + Base = GEPI2.SgprParts[0]; + *SOffset = OffsetReg; + *Offset = *EncodedImm; + return true; + } + } + } + return false; + } + + if (Offset && GEPI.SgprParts.size() == 1 && EncodedImm) { + Base = GEPI.SgprParts[0]; + *Offset = *EncodedImm; + return true; + } + + // SGPR offset is unsigned. + if (SOffset && GEPI.SgprParts.size() == 1 && isUInt<32>(GEPI.Imm) && + GEPI.Imm != 0) { + // If we make it this far we have a load with an 32-bit immediate offset. + // It is OK to select this using a sgpr offset, because we have already + // failed trying to select this load into one of the _IMM variants since + // the _IMM Patterns are considered before the _SGPR patterns. + Base = GEPI.SgprParts[0]; + *SOffset = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass); + BuildMI(*MBB, MI, MI->getDebugLoc(), TII.get(AMDGPU::S_MOV_B32), *SOffset) + .addImm(GEPI.Imm); + return true; + } + + if (SOffset && GEPI.SgprParts.size() && GEPI.Imm == 0) { + if (Register OffsetReg = matchZeroExtendFromS32(*MRI, GEPI.SgprParts[1])) { + Base = GEPI.SgprParts[0]; + *SOffset = OffsetReg; + return true; + } + } + + return false; +} + +InstructionSelector::ComplexRendererFns +AMDGPUInstructionSelector::selectSmrdImm(MachineOperand &Root) const { + Register Base; + int64_t Offset; + if (!selectSmrdOffset(Root, Base, /* SOffset= */ nullptr, &Offset)) return None; - unsigned PtrReg = GEPInfo.SgprParts[0]; - return {{ - [=](MachineInstrBuilder &MIB) { MIB.addReg(PtrReg); }, - [=](MachineInstrBuilder &MIB) { MIB.addImm(*EncodedImm); } - }}; + return {{[=](MachineInstrBuilder &MIB) { MIB.addReg(Base); }, + [=](MachineInstrBuilder &MIB) { MIB.addImm(Offset); }}}; } InstructionSelector::ComplexRendererFns @@ -3844,43 +3935,24 @@ AMDGPUInstructionSelector::selectSmrdImm32(MachineOperand &Root) const { InstructionSelector::ComplexRendererFns AMDGPUInstructionSelector::selectSmrdSgpr(MachineOperand &Root) const { - MachineInstr *MI = Root.getParent(); - MachineBasicBlock *MBB = MI->getParent(); - - SmallVector<GEPInfo, 4> AddrInfo; - getAddrModeInfo(*MI, *MRI, AddrInfo); - - // FIXME: We should shrink the GEP if the offset is known to be <= 32-bits, - // then we can select all ptr + 32-bit offsets. - if (AddrInfo.empty()) + Register Base, SOffset; + if (!selectSmrdOffset(Root, Base, &SOffset, /* Offset= */ nullptr)) return None; - const GEPInfo &GEPInfo = AddrInfo[0]; - Register PtrReg = GEPInfo.SgprParts[0]; - - // SGPR offset is unsigned. - if (AddrInfo[0].SgprParts.size() == 1 && isUInt<32>(GEPInfo.Imm) && - GEPInfo.Imm != 0) { - // If we make it this far we have a load with an 32-bit immediate offset. - // It is OK to select this using a sgpr offset, because we have already - // failed trying to select this load into one of the _IMM variants since - // the _IMM Patterns are considered before the _SGPR patterns. - Register OffsetReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass); - BuildMI(*MBB, MI, MI->getDebugLoc(), TII.get(AMDGPU::S_MOV_B32), OffsetReg) - .addImm(GEPInfo.Imm); - return {{[=](MachineInstrBuilder &MIB) { MIB.addReg(PtrReg); }, - [=](MachineInstrBuilder &MIB) { MIB.addReg(OffsetReg); }}}; - } + return {{[=](MachineInstrBuilder &MIB) { MIB.addReg(Base); }, + [=](MachineInstrBuilder &MIB) { MIB.addReg(SOffset); }}}; +} - if (AddrInfo[0].SgprParts.size() == 2 && GEPInfo.Imm == 0) { - if (Register OffsetReg = - matchZeroExtendFromS32(*MRI, GEPInfo.SgprParts[1])) { - return {{[=](MachineInstrBuilder &MIB) { MIB.addReg(PtrReg); }, - [=](MachineInstrBuilder &MIB) { MIB.addReg(OffsetReg); }}}; - } - } +InstructionSelector::ComplexRendererFns +AMDGPUInstructionSelector::selectSmrdSgprImm(MachineOperand &Root) const { + Register Base, SOffset; + int64_t Offset; + if (!selectSmrdOffset(Root, Base, &SOffset, &Offset)) + return None; - return None; + return {{[=](MachineInstrBuilder &MIB) { MIB.addReg(Base); }, + [=](MachineInstrBuilder &MIB) { MIB.addReg(SOffset); }, + [=](MachineInstrBuilder &MIB) { MIB.addImm(Offset); }}}; } std::pair<Register, int> diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h index 22672ba59e76..5baf55d23480 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h @@ -63,11 +63,9 @@ public: private: struct GEPInfo { - const MachineInstr &GEP; SmallVector<unsigned, 2> SgprParts; SmallVector<unsigned, 2> VgprParts; - int64_t Imm; - GEPInfo(const MachineInstr &GEP) : GEP(GEP), Imm(0) { } + int64_t Imm = 0; }; bool isSGPR(Register Reg) const; @@ -200,12 +198,16 @@ private: InstructionSelector::ComplexRendererFns selectVINTERPModsHi(MachineOperand &Root) const; + bool selectSmrdOffset(MachineOperand &Root, Register &Base, Register *SOffset, + int64_t *Offset) const; InstructionSelector::ComplexRendererFns selectSmrdImm(MachineOperand &Root) const; InstructionSelector::ComplexRendererFns selectSmrdImm32(MachineOperand &Root) const; InstructionSelector::ComplexRendererFns selectSmrdSgpr(MachineOperand &Root) const; + InstructionSelector::ComplexRendererFns + selectSmrdSgprImm(MachineOperand &Root) const; std::pair<Register, int> selectFlatOffsetImpl(MachineOperand &Root, uint64_t FlatVariant) const; diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp index 01a3e78ea48c..0979debe9777 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -4197,6 +4197,35 @@ bool AMDGPULegalizerInfo::legalizeImplicitArgPtr(MachineInstr &MI, return true; } +bool AMDGPULegalizerInfo::getLDSKernelId(Register DstReg, + MachineRegisterInfo &MRI, + MachineIRBuilder &B) const { + Function &F = B.getMF().getFunction(); + Optional<uint32_t> KnownSize = + AMDGPUMachineFunction::getLDSKernelIdMetadata(F); + if (KnownSize.has_value()) + B.buildConstant(DstReg, KnownSize.value()); + return false; +} + +bool AMDGPULegalizerInfo::legalizeLDSKernelId(MachineInstr &MI, + MachineRegisterInfo &MRI, + MachineIRBuilder &B) const { + + const SIMachineFunctionInfo *MFI = B.getMF().getInfo<SIMachineFunctionInfo>(); + if (!MFI->isEntryFunction()) { + return legalizePreloadedArgIntrin(MI, MRI, B, + AMDGPUFunctionArgInfo::LDS_KERNEL_ID); + } + + Register DstReg = MI.getOperand(0).getReg(); + if (!getLDSKernelId(DstReg, MRI, B)) + return false; + + MI.eraseFromParent(); + return true; +} + bool AMDGPULegalizerInfo::legalizeIsAddrSpace(MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B, @@ -5636,6 +5665,9 @@ bool AMDGPULegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper, case Intrinsic::amdgcn_workgroup_id_z: return legalizePreloadedArgIntrin(MI, MRI, B, AMDGPUFunctionArgInfo::WORKGROUP_ID_Z); + case Intrinsic::amdgcn_lds_kernel_id: + return legalizePreloadedArgIntrin(MI, MRI, B, + AMDGPUFunctionArgInfo::LDS_KERNEL_ID); case Intrinsic::amdgcn_dispatch_ptr: return legalizePreloadedArgIntrin(MI, MRI, B, AMDGPUFunctionArgInfo::DISPATCH_PTR); diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h index cee533aa34ec..5e8111e22aad 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h +++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h @@ -155,6 +155,13 @@ public: bool legalizeImplicitArgPtr(MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B) const; + + bool getLDSKernelId(Register DstReg, MachineRegisterInfo &MRI, + MachineIRBuilder &B) const; + + bool legalizeLDSKernelId(MachineInstr &MI, MachineRegisterInfo &MRI, + MachineIRBuilder &B) const; + bool legalizeIsAddrSpace(MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B, unsigned AddrSpace) const; diff --git a/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp b/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp index 78e092b2e872..7e49a6117ebd 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp @@ -376,15 +376,7 @@ static bool HasNative(AMDGPULibFunc::EFuncId id) { return false; } -struct TableRef { - size_t size; - const TableEntry *table; // variable size: from 0 to (size - 1) - - TableRef() : size(0), table(nullptr) {} - - template <size_t N> - TableRef(const TableEntry (&tbl)[N]) : size(N), table(&tbl[0]) {} -}; +using TableRef = ArrayRef<TableEntry>; static TableRef getOptTable(AMDGPULibFunc::EFuncId id) { switch(id) { @@ -698,11 +690,10 @@ bool AMDGPULibCalls::fold(CallInst *CI, AliasAnalysis *AA) { bool AMDGPULibCalls::TDOFold(CallInst *CI, const FuncInfo &FInfo) { // Table-Driven optimization const TableRef tr = getOptTable(FInfo.getId()); - if (tr.size==0) + if (tr.empty()) return false; - int const sz = (int)tr.size; - const TableEntry * const ftbl = tr.table; + int const sz = (int)tr.size(); Value *opr0 = CI->getArgOperand(0); if (getVecSize(FInfo) > 1) { @@ -714,8 +705,8 @@ bool AMDGPULibCalls::TDOFold(CallInst *CI, const FuncInfo &FInfo) { assert(eltval && "Non-FP arguments in math function!"); bool found = false; for (int i=0; i < sz; ++i) { - if (eltval->isExactlyValue(ftbl[i].input)) { - DVal.push_back(ftbl[i].result); + if (eltval->isExactlyValue(tr[i].input)) { + DVal.push_back(tr[i].result); found = true; break; } @@ -746,8 +737,8 @@ bool AMDGPULibCalls::TDOFold(CallInst *CI, const FuncInfo &FInfo) { // Scalar version if (ConstantFP *CF = dyn_cast<ConstantFP>(opr0)) { for (int i = 0; i < sz; ++i) { - if (CF->isExactlyValue(ftbl[i].input)) { - Value *nval = ConstantFP::get(CF->getType(), ftbl[i].result); + if (CF->isExactlyValue(tr[i].input)) { + Value *nval = ConstantFP::get(CF->getType(), tr[i].result); LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> " << *nval << "\n"); replaceCall(nval); return true; diff --git a/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp b/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp index 35922341de26..b4a8766d682e 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp @@ -55,21 +55,6 @@ static cl::opt<bool> SuperAlignLDSGlobals( cl::init(true), cl::Hidden); namespace { - -SmallPtrSet<GlobalValue *, 32> getUsedList(Module &M) { - SmallPtrSet<GlobalValue *, 32> UsedList; - - SmallVector<GlobalValue *, 32> TmpVec; - collectUsedGlobalVariables(M, TmpVec, true); - UsedList.insert(TmpVec.begin(), TmpVec.end()); - - TmpVec.clear(); - collectUsedGlobalVariables(M, TmpVec, false); - UsedList.insert(TmpVec.begin(), TmpVec.end()); - - return UsedList; -} - class AMDGPULowerModuleLDS : public ModulePass { static void removeFromUsedList(Module &M, StringRef Name, @@ -153,9 +138,6 @@ class AMDGPULowerModuleLDS : public ModulePass { ""); } -private: - SmallPtrSet<GlobalValue *, 32> UsedList; - public: static char ID; @@ -165,9 +147,10 @@ public: bool runOnModule(Module &M) override { CallGraph CG = CallGraph(M); - UsedList = getUsedList(M); bool Changed = superAlignLDSGlobals(M); - Changed |= processUsedLDS(CG, M); + std::vector<GlobalVariable *> ModuleScopeVariables = + AMDGPU::findVariablesToLower(M, nullptr); + Changed |= processUsedLDS(CG, M, ModuleScopeVariables); for (Function &F : M.functions()) { if (F.isDeclaration()) @@ -176,10 +159,11 @@ public: // Only lower compute kernels' LDS. if (!AMDGPU::isKernel(F.getCallingConv())) continue; - Changed |= processUsedLDS(CG, M, &F); + std::vector<GlobalVariable *> KernelUsedVariables = + AMDGPU::findVariablesToLower(M, &F); + Changed |= processUsedLDS(CG, M, KernelUsedVariables, &F); } - UsedList.clear(); return Changed; } @@ -228,22 +212,20 @@ private: return Changed; } - bool processUsedLDS(CallGraph const &CG, Module &M, Function *F = nullptr) { + bool processUsedLDS(CallGraph const &CG, Module &M, + std::vector<GlobalVariable *> const &LDSVarsToTransform, + Function *F = nullptr) { LLVMContext &Ctx = M.getContext(); const DataLayout &DL = M.getDataLayout(); - // Find variables to move into new struct instance - std::vector<GlobalVariable *> FoundLocalVars = - AMDGPU::findVariablesToLower(M, F); - - if (FoundLocalVars.empty()) { + if (LDSVarsToTransform.empty()) { // No variables to rewrite, no changes made. return false; } SmallVector<OptimizedStructLayoutField, 8> LayoutFields; - LayoutFields.reserve(FoundLocalVars.size()); - for (GlobalVariable *GV : FoundLocalVars) { + LayoutFields.reserve(LDSVarsToTransform.size()); + for (GlobalVariable *GV : LDSVarsToTransform) { OptimizedStructLayoutField F(GV, DL.getTypeAllocSize(GV->getValueType()), AMDGPU::getAlign(DL, GV)); LayoutFields.emplace_back(F); @@ -252,7 +234,7 @@ private: performOptimizedStructLayout(LayoutFields); std::vector<GlobalVariable *> LocalVars; - LocalVars.reserve(FoundLocalVars.size()); // will be at least this large + LocalVars.reserve(LDSVarsToTransform.size()); // will be at least this large { // This usually won't need to insert any padding, perhaps avoid the alloc uint64_t CurrentOffset = 0; @@ -352,7 +334,6 @@ private: GV->replaceAllUsesWith(GEP); } if (GV->use_empty()) { - UsedList.erase(GV); GV->eraseFromParent(); } diff --git a/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp b/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp index b461c3c4bfdc..f5e12fd960d0 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp @@ -11,6 +11,7 @@ #include "AMDGPUPerfHintAnalysis.h" #include "AMDGPUSubtarget.h" #include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/IR/Constants.h" #include "llvm/Target/TargetMachine.h" using namespace llvm; @@ -101,6 +102,21 @@ void AMDGPUMachineFunction::allocateModuleLDSGlobal(const Function &F) { } } +Optional<uint32_t> +AMDGPUMachineFunction::getLDSKernelIdMetadata(const Function &F) { + auto MD = F.getMetadata("llvm.amdgcn.lds.kernel.id"); + if (MD && MD->getNumOperands() == 1) { + ConstantInt *KnownSize = mdconst::extract<ConstantInt>(MD->getOperand(0)); + if (KnownSize) { + uint64_t V = KnownSize->getZExtValue(); + if (V <= UINT32_MAX) { + return V; + } + } + } + return {}; +} + void AMDGPUMachineFunction::setDynLDSAlign(const DataLayout &DL, const GlobalVariable &GV) { assert(DL.getTypeAllocSize(GV.getValueType()).isZero()); diff --git a/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.h b/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.h index df62c2314617..97db8b7eb8d6 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.h @@ -11,11 +11,12 @@ #include "Utils/AMDGPUBaseInfo.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/Optional.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/IR/DataLayout.h" +#include "llvm/IR/Function.h" #include "llvm/IR/GlobalValue.h" #include "llvm/IR/GlobalVariable.h" -#include "llvm/IR/Function.h" namespace llvm { @@ -104,6 +105,8 @@ public: unsigned allocateLDSGlobal(const DataLayout &DL, const GlobalVariable &GV); void allocateModuleLDSGlobal(const Function &F); + static Optional<uint32_t> getLDSKernelIdMetadata(const Function &F); + Align getDynLDSAlign() const { return DynLDSAlign; } void setDynLDSAlign(const DataLayout &DL, const GlobalVariable &GV); diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPerfHintAnalysis.cpp b/llvm/lib/Target/AMDGPU/AMDGPUPerfHintAnalysis.cpp index 09dbd2150db6..a9f1e9bd0996 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUPerfHintAnalysis.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUPerfHintAnalysis.cpp @@ -74,10 +74,10 @@ public: private: struct MemAccessInfo { - const Value *V; - const Value *Base; - int64_t Offset; - MemAccessInfo() : V(nullptr), Base(nullptr), Offset(0) {} + const Value *V = nullptr; + const Value *Base = nullptr; + int64_t Offset = 0; + MemAccessInfo() = default; bool isLargeStride(MemAccessInfo &Reference) const; #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) Printable print() const { @@ -116,6 +116,7 @@ private: bool isGlobalAddr(const Value *V) const; bool isLocalAddr(const Value *V) const; + bool isGlobalLoadUsedInBB(const Instruction &) const; }; static std::pair<const Value *, const Type *> getMemoryInstrPtrAndType( @@ -196,6 +197,24 @@ bool AMDGPUPerfHint::isIndirectAccess(const Instruction *Inst) const { return false; } +// Returns true if the global load `I` is used in its own basic block. +bool AMDGPUPerfHint::isGlobalLoadUsedInBB(const Instruction &I) const { + const auto *Ld = dyn_cast<LoadInst>(&I); + if (!Ld) + return false; + if (!isGlobalAddr(Ld->getPointerOperand())) + return false; + + for (const User *Usr : Ld->users()) { + if (const Instruction *UsrInst = dyn_cast<Instruction>(Usr)) { + if (UsrInst->getParent() == I.getParent()) + return true; + } + } + + return false; +} + AMDGPUPerfHintAnalysis::FuncInfo *AMDGPUPerfHint::visit(const Function &F) { AMDGPUPerfHintAnalysis::FuncInfo &FI = FIM[&F]; @@ -203,9 +222,14 @@ AMDGPUPerfHintAnalysis::FuncInfo *AMDGPUPerfHint::visit(const Function &F) { for (auto &B : F) { LastAccess = MemAccessInfo(); + unsigned UsedGlobalLoadsInBB = 0; for (auto &I : B) { if (const Type *Ty = getMemoryInstrPtrAndType(&I).second) { unsigned Size = divideCeil(Ty->getPrimitiveSizeInBits(), 32); + // TODO: Check if the global load and its user are close to each other + // instead (Or do this analysis in GCNSchedStrategy?). + if (isGlobalLoadUsedInBB(I)) + UsedGlobalLoadsInBB += Size; if (isIndirectAccess(&I)) FI.IAMInstCost += Size; if (isLargeStride(&I)) @@ -245,6 +269,16 @@ AMDGPUPerfHintAnalysis::FuncInfo *AMDGPUPerfHint::visit(const Function &F) { ++FI.InstCost; } } + + if (!FI.HasDenseGlobalMemAcc) { + unsigned GlobalMemAccPercentage = UsedGlobalLoadsInBB * 100 / B.size(); + if (GlobalMemAccPercentage > 50) { + LLVM_DEBUG(dbgs() << "[HasDenseGlobalMemAcc] Set to true since " + << B.getName() << " has " << GlobalMemAccPercentage + << "% global memory access\n"); + FI.HasDenseGlobalMemAcc = true; + } + } } return &FI; @@ -286,6 +320,11 @@ bool AMDGPUPerfHint::runOnFunction(Function &F) { } bool AMDGPUPerfHint::isMemBound(const AMDGPUPerfHintAnalysis::FuncInfo &FI) { + // Reverting optimal scheduling in favour of occupancy with basic block(s) + // having dense global memory access can potentially hurt performance. + if (FI.HasDenseGlobalMemAcc) + return true; + return FI.MemInstCost * 100 / FI.InstCost > MemBoundThresh; } diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPerfHintAnalysis.h b/llvm/lib/Target/AMDGPU/AMDGPUPerfHintAnalysis.h index 31ff80f5f431..2db8db6957ce 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUPerfHintAnalysis.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUPerfHintAnalysis.h @@ -41,7 +41,11 @@ public: unsigned InstCost; unsigned IAMInstCost; // Indirect access memory instruction count unsigned LSMInstCost; // Large stride memory instruction count - FuncInfo() : MemInstCost(0), InstCost(0), IAMInstCost(0), LSMInstCost(0) {} + bool HasDenseGlobalMemAcc; // Set if at least 1 basic block has relatively + // high global memory access + FuncInfo() + : MemInstCost(0), InstCost(0), IAMInstCost(0), LSMInstCost(0), + HasDenseGlobalMemAcc(false) {} }; typedef ValueMap<const Function*, FuncInfo> FuncInfoMap; diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankCombiner.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegBankCombiner.cpp index 0df6f4d45b06..bd8e568213b7 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURegBankCombiner.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankCombiner.cpp @@ -153,7 +153,10 @@ bool AMDGPURegBankCombinerHelper::matchIntMinMaxToMed3( if (!isVgprRegBank(Dst)) return false; - if (MRI.getType(Dst).isVector()) + // med3 for i16 is only available on gfx9+, and not available for v2i16. + LLT Ty = MRI.getType(Dst); + if ((Ty != LLT::scalar(16) || !Subtarget.hasMed3_16()) && + Ty != LLT::scalar(32)) return false; MinMaxMedOpc OpcodeTriple = getMinMaxPair(MI.getOpcode()); diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp index 0830cbd919a0..887341e67454 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp @@ -4426,7 +4426,15 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { case Intrinsic::amdgcn_mfma_i32_16x16x32_i8: case Intrinsic::amdgcn_mfma_i32_32x32x16_i8: case Intrinsic::amdgcn_mfma_f32_16x16x8_xf32: - case Intrinsic::amdgcn_mfma_f32_32x32x4_xf32: { + case Intrinsic::amdgcn_mfma_f32_32x32x4_xf32: + case Intrinsic::amdgcn_mfma_f32_16x16x32_bf8_bf8: + case Intrinsic::amdgcn_mfma_f32_16x16x32_bf8_fp8: + case Intrinsic::amdgcn_mfma_f32_16x16x32_fp8_bf8: + case Intrinsic::amdgcn_mfma_f32_16x16x32_fp8_fp8: + case Intrinsic::amdgcn_mfma_f32_32x32x16_bf8_bf8: + case Intrinsic::amdgcn_mfma_f32_32x32x16_bf8_fp8: + case Intrinsic::amdgcn_mfma_f32_32x32x16_fp8_bf8: + case Intrinsic::amdgcn_mfma_f32_32x32x16_fp8_fp8: { // Default for MAI intrinsics. // srcC can also be an immediate which can be folded later. // FIXME: Should we eventually add an alternative mapping with AGPR src @@ -4451,7 +4459,15 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { case Intrinsic::amdgcn_smfmac_f32_16x16x32_bf16: case Intrinsic::amdgcn_smfmac_f32_32x32x16_bf16: case Intrinsic::amdgcn_smfmac_i32_16x16x64_i8: - case Intrinsic::amdgcn_smfmac_i32_32x32x32_i8: { + case Intrinsic::amdgcn_smfmac_i32_32x32x32_i8: + case Intrinsic::amdgcn_smfmac_f32_16x16x64_bf8_bf8: + case Intrinsic::amdgcn_smfmac_f32_16x16x64_bf8_fp8: + case Intrinsic::amdgcn_smfmac_f32_16x16x64_fp8_bf8: + case Intrinsic::amdgcn_smfmac_f32_16x16x64_fp8_fp8: + case Intrinsic::amdgcn_smfmac_f32_32x32x32_bf8_bf8: + case Intrinsic::amdgcn_smfmac_f32_32x32x32_bf8_fp8: + case Intrinsic::amdgcn_smfmac_f32_32x32x32_fp8_bf8: + case Intrinsic::amdgcn_smfmac_f32_32x32x32_fp8_fp8: { // vdst, srcA, srcB, srcC, idx OpdsMapping[0] = getAGPROpMapping(MI.getOperand(0).getReg(), MRI, *TRI); OpdsMapping[2] = getVGPROpMapping(MI.getOperand(2).getReg(), MRI, *TRI); diff --git a/llvm/lib/Target/AMDGPU/AMDGPUReplaceLDSUseWithPointer.cpp b/llvm/lib/Target/AMDGPU/AMDGPUReplaceLDSUseWithPointer.cpp index 4d7a3f4028e8..aa51c5d20bdc 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUReplaceLDSUseWithPointer.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUReplaceLDSUseWithPointer.cpp @@ -141,7 +141,7 @@ class ReplaceLDSUseImpl { std::vector<GlobalVariable *> collectLDSRequiringPointerReplace() { // Collect LDS which requires module lowering. std::vector<GlobalVariable *> LDSGlobals = - llvm::AMDGPU::findVariablesToLower(M); + llvm::AMDGPU::findVariablesToLower(M, nullptr); // Remove LDS which don't qualify for replacement. llvm::erase_if(LDSGlobals, [&](GlobalVariable *GV) { diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSearchableTables.td b/llvm/lib/Target/AMDGPU/AMDGPUSearchableTables.td index 8297635d7bb2..5d7bade00a3e 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUSearchableTables.td +++ b/llvm/lib/Target/AMDGPU/AMDGPUSearchableTables.td @@ -340,12 +340,28 @@ def : SourceOfDivergence<int_amdgcn_mfma_i32_16x16x32_i8>; def : SourceOfDivergence<int_amdgcn_mfma_i32_32x32x16_i8>; def : SourceOfDivergence<int_amdgcn_mfma_f32_16x16x8_xf32>; def : SourceOfDivergence<int_amdgcn_mfma_f32_32x32x4_xf32>; +def : SourceOfDivergence<int_amdgcn_mfma_f32_16x16x32_bf8_bf8>; +def : SourceOfDivergence<int_amdgcn_mfma_f32_16x16x32_bf8_fp8>; +def : SourceOfDivergence<int_amdgcn_mfma_f32_16x16x32_fp8_bf8>; +def : SourceOfDivergence<int_amdgcn_mfma_f32_16x16x32_fp8_fp8>; +def : SourceOfDivergence<int_amdgcn_mfma_f32_32x32x16_bf8_bf8>; +def : SourceOfDivergence<int_amdgcn_mfma_f32_32x32x16_bf8_fp8>; +def : SourceOfDivergence<int_amdgcn_mfma_f32_32x32x16_fp8_bf8>; +def : SourceOfDivergence<int_amdgcn_mfma_f32_32x32x16_fp8_fp8>; def : SourceOfDivergence<int_amdgcn_smfmac_f32_16x16x32_f16>; def : SourceOfDivergence<int_amdgcn_smfmac_f32_32x32x16_f16>; def : SourceOfDivergence<int_amdgcn_smfmac_f32_16x16x32_bf16>; def : SourceOfDivergence<int_amdgcn_smfmac_f32_32x32x16_bf16>; def : SourceOfDivergence<int_amdgcn_smfmac_i32_16x16x64_i8>; def : SourceOfDivergence<int_amdgcn_smfmac_i32_32x32x32_i8>; +def : SourceOfDivergence<int_amdgcn_smfmac_f32_16x16x64_bf8_bf8>; +def : SourceOfDivergence<int_amdgcn_smfmac_f32_16x16x64_bf8_fp8>; +def : SourceOfDivergence<int_amdgcn_smfmac_f32_16x16x64_fp8_bf8>; +def : SourceOfDivergence<int_amdgcn_smfmac_f32_16x16x64_fp8_fp8>; +def : SourceOfDivergence<int_amdgcn_smfmac_f32_32x32x32_bf8_bf8>; +def : SourceOfDivergence<int_amdgcn_smfmac_f32_32x32x32_bf8_fp8>; +def : SourceOfDivergence<int_amdgcn_smfmac_f32_32x32x32_fp8_bf8>; +def : SourceOfDivergence<int_amdgcn_smfmac_f32_32x32x32_fp8_fp8>; // The dummy boolean output is divergent from the IR's perspective, // but the mask results are uniform. These produce a divergent and diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp index 6bd906439ee8..cf4826d81b4b 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp @@ -736,13 +736,18 @@ static unsigned getMaxNumPreloadedSGPRs() { 2 + // dispatch ID 2 + // flat scratch init 2; // Implicit buffer ptr + // Max number of system SGPRs unsigned MaxSystemSGPRs = 1 + // WorkGroupIDX 1 + // WorkGroupIDY 1 + // WorkGroupIDZ 1 + // WorkGroupInfo 1; // private segment wave byte offset - return MaxUserSGPRs + MaxSystemSGPRs; + + // Max number of synthetic SGPRs + unsigned SyntheticSGPRs = 1; // LDSKernelId + + return MaxUserSGPRs + MaxSystemSGPRs + SyntheticSGPRs; } unsigned GCNSubtarget::getMaxNumSGPRs(const Function &F) const { @@ -852,34 +857,6 @@ struct FillMFMAShadowMutation : ScheduleDAGMutation { return MI && TII->isVALU(*MI); } - bool canAddEdge(const SUnit *Succ, const SUnit *Pred) const { - if (Pred->NodeNum < Succ->NodeNum) - return true; - - SmallVector<const SUnit*, 64> Succs({Succ}), Preds({Pred}); - - for (unsigned I = 0; I < Succs.size(); ++I) { - for (const SDep &SI : Succs[I]->Succs) { - const SUnit *SU = SI.getSUnit(); - if (SU != Succs[I] && !llvm::is_contained(Succs, SU)) - Succs.push_back(SU); - } - } - - SmallPtrSet<const SUnit*, 32> Visited; - while (!Preds.empty()) { - const SUnit *SU = Preds.pop_back_val(); - if (llvm::is_contained(Succs, SU)) - return false; - Visited.insert(SU); - for (const SDep &SI : SU->Preds) - if (SI.getSUnit() != SU && !Visited.count(SI.getSUnit())) - Preds.push_back(SI.getSUnit()); - } - - return true; - } - // Link as many SALU instructions in chain as possible. Return the size // of the chain. Links up to MaxChain instructions. unsigned linkSALUChain(SUnit *From, SUnit *To, unsigned MaxChain, @@ -895,18 +872,20 @@ struct FillMFMAShadowMutation : ScheduleDAGMutation { LLVM_DEBUG(dbgs() << "Inserting edge from\n" ; DAG->dumpNode(*From); dbgs() << "to\n"; DAG->dumpNode(*SU); dbgs() << '\n'); - if (SU->addPred(SDep(From, SDep::Artificial), false)) - ++Linked; + if (SU != From && From != &DAG->ExitSU && DAG->canAddEdge(SU, From)) + if (DAG->addEdge(SU, SDep(From, SDep::Artificial))) + ++Linked; for (SDep &SI : From->Succs) { SUnit *SUv = SI.getSUnit(); - if (SUv != From && isVALU(SUv) && canAddEdge(SUv, SU)) - SUv->addPred(SDep(SU, SDep::Artificial), false); + if (SUv != From && SU != &DAG->ExitSU && isVALU(SUv) && + DAG->canAddEdge(SUv, SU)) + DAG->addEdge(SUv, SDep(SU, SDep::Artificial)); } for (SDep &SI : SU->Succs) { SUnit *Succ = SI.getSUnit(); - if (Succ != SU && isSALU(Succ) && canAddEdge(From, Succ)) + if (Succ != SU && isSALU(Succ)) Worklist.push_back(Succ); } } @@ -949,7 +928,8 @@ struct FillMFMAShadowMutation : ScheduleDAGMutation { if (Visited.count(&*LastSALU)) continue; - if (!isSALU(&*LastSALU) || !canAddEdge(&*LastSALU, &SU)) + if (&SU == &DAG->ExitSU || &SU == &*LastSALU || !isSALU(&*LastSALU) || + !DAG->canAddEdge(&*LastSALU, &SU)) continue; Lat -= linkSALUChain(&SU, &*LastSALU, Lat, Visited); diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp index 971e44723758..dca926867300 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -1584,6 +1584,9 @@ bool GCNTargetMachine::parseMachineFunctionInfo( parseAndCheckArgument(YamlMFI.ArgInfo->PrivateSegmentSize, AMDGPU::SGPR_32RegClass, MFI->ArgInfo.PrivateSegmentSize, 0, 0) || + parseAndCheckArgument(YamlMFI.ArgInfo->LDSKernelId, + AMDGPU::SGPR_32RegClass, + MFI->ArgInfo.LDSKernelId, 0, 1) || parseAndCheckArgument(YamlMFI.ArgInfo->WorkGroupIDX, AMDGPU::SGPR_32RegClass, MFI->ArgInfo.WorkGroupIDX, 0, 1) || diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp index e12d0ffef35c..2a9393fc1595 100644 --- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp +++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp @@ -1739,6 +1739,8 @@ public: void cvtVOP3(MCInst &Inst, const OperandVector &Operands); void cvtVOP3P(MCInst &Inst, const OperandVector &Operands); void cvtVOPD(MCInst &Inst, const OperandVector &Operands); + void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands, + OptionalImmIndexMap &OptionalIdx); void cvtVOP3P(MCInst &Inst, const OperandVector &Operands, OptionalImmIndexMap &OptionalIdx); @@ -1767,21 +1769,11 @@ public: void cvtDPP8(MCInst &Inst, const OperandVector &Operands) { cvtDPP(Inst, Operands, true); } - void cvtVOPCNoDstDPP(MCInst &Inst, const OperandVector &Operands, - bool IsDPP8 = false); - void cvtVOPCNoDstDPP8(MCInst &Inst, const OperandVector &Operands) { - cvtVOPCNoDstDPP(Inst, Operands, true); - } void cvtVOP3DPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false); void cvtVOP3DPP8(MCInst &Inst, const OperandVector &Operands) { cvtVOP3DPP(Inst, Operands, true); } - void cvtVOPC64NoDstDPP(MCInst &Inst, const OperandVector &Operands, - bool IsDPP8 = false); - void cvtVOPC64NoDstDPP8(MCInst &Inst, const OperandVector &Operands) { - cvtVOPC64NoDstDPP(Inst, Operands, true); - } OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix, AMDGPUOperand::ImmTy Type); @@ -4177,7 +4169,9 @@ bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) { return false; } - if (isGFX940() && (MII.get(Opc).TSFlags & SIInstrFlags::IsDOT)) { + uint64_t TSFlags = MII.get(Opc).TSFlags; + + if (isGFX940() && (TSFlags & SIInstrFlags::IsDOT)) { int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); if (OpSelIdx != -1) { if (Inst.getOperand(OpSelIdx).getImm() != 0) @@ -4190,6 +4184,15 @@ bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) { } } + // op_sel[0:1] must be 0 for v_dot2_bf16_bf16 and v_dot2_f16_f16 (VOP3 Dot). + if ((TSFlags & SIInstrFlags::IsDOT) && (TSFlags & SIInstrFlags::VOP3) && + !(TSFlags & SIInstrFlags::VOP3P)) { + int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); + unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); + if (OpSel & 3) + return false; + } + return true; } @@ -4636,9 +4639,6 @@ bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst, Error(IDLoc, "ABS not allowed in VOP3B instructions"); return false; } - if (!validateCoherencyBits(Inst, Operands, IDLoc)) { - return false; - } if (!validateExeczVcczOperands(Operands)) { return false; } @@ -5004,6 +5004,9 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() { PARSE_BITS_ENTRY(KD.kernel_code_properties, KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32, Val, ValRange); + } else if (ID == ".amdhsa_uses_dynamic_stack") { + PARSE_BITS_ENTRY(KD.kernel_code_properties, + KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK, Val, ValRange); } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") { if (hasArchitectedFlatScratch()) return Error(IDRange.Start, @@ -8024,10 +8027,13 @@ OperandMatchResultTy AMDGPUAsmParser::parseOModOperand(OperandVector &Operands) return MatchOperand_NoMatch; } -void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands) { - cvtVOP3P(Inst, Operands); - +// Determines which bit DST_OP_SEL occupies in the op_sel operand according to +// the number of src operands present, then copies that bit into src0_modifiers. +void cvtVOP3DstOpSelOnly(MCInst &Inst) { int Opc = Inst.getOpcode(); + int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); + if (OpSelIdx == -1) + return; int SrcNum; const int Ops[] = { AMDGPU::OpName::src0, @@ -8038,7 +8044,6 @@ void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands) ++SrcNum); assert(SrcNum > 0); - int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); if ((OpSel & (1 << SrcNum)) != 0) { @@ -8048,6 +8053,18 @@ void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands) } } +void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, + const OperandVector &Operands) { + cvtVOP3P(Inst, Operands); + cvtVOP3DstOpSelOnly(Inst); +} + +void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands, + OptionalImmIndexMap &OptionalIdx) { + cvtVOP3P(Inst, Operands, OptionalIdx); + cvtVOP3DstOpSelOnly(Inst); +} + static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) { // 1. This operand is input modifiers return Desc.OpInfo[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS @@ -8241,6 +8258,12 @@ void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands, const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0; + if (Opc == AMDGPU::V_CVT_SR_BF8_F32_vi || + Opc == AMDGPU::V_CVT_SR_FP8_F32_vi) { + Inst.addOperand(MCOperand::createImm(0)); // Placeholder for src2_mods + Inst.addOperand(Inst.getOperand(0)); + } + if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in) != -1) { assert(!IsPacked); Inst.addOperand(Inst.getOperand(0)); @@ -8747,14 +8770,6 @@ AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFI() const { return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppFi); } -// Add dummy $old operand -void AMDGPUAsmParser::cvtVOPC64NoDstDPP(MCInst &Inst, - const OperandVector &Operands, - bool IsDPP8) { - Inst.addOperand(MCOperand::createReg(0)); - cvtVOP3DPP(Inst, Operands, IsDPP8); -} - void AMDGPUAsmParser::cvtVOP3DPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) { OptionalImmIndexMap OptionalIdx; unsigned Opc = Inst.getOpcode(); @@ -8802,6 +8817,8 @@ void AMDGPUAsmParser::cvtVOP3DPP(MCInst &Inst, const OperandVector &Operands, bo } if (Desc.TSFlags & SIInstrFlags::VOP3P) cvtVOP3P(Inst, Operands, OptionalIdx); + else if (Desc.TSFlags & SIInstrFlags::VOP3) + cvtVOP3OpSel(Inst, Operands, OptionalIdx); else if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel) != -1) { addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOpSel); } @@ -8821,14 +8838,6 @@ void AMDGPUAsmParser::cvtVOP3DPP(MCInst &Inst, const OperandVector &Operands, bo } } -// Add dummy $old operand -void AMDGPUAsmParser::cvtVOPCNoDstDPP(MCInst &Inst, - const OperandVector &Operands, - bool IsDPP8) { - Inst.addOperand(MCOperand::createReg(0)); - cvtDPP(Inst, Operands, IsDPP8); -} - void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) { OptionalImmIndexMap OptionalIdx; @@ -9043,12 +9052,27 @@ void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands, // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments switch (BasicInstType) { case SIInstrFlags::VOP1: - addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); - if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) { - addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0); + if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), + AMDGPU::OpName::clamp) != -1) { + addOptionalImmOperand(Inst, Operands, OptionalIdx, + AMDGPUOperand::ImmTyClampSI, 0); + } + if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), + AMDGPU::OpName::omod) != -1) { + addOptionalImmOperand(Inst, Operands, OptionalIdx, + AMDGPUOperand::ImmTyOModSI, 0); + } + if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), + AMDGPU::OpName::dst_sel) != -1) { + addOptionalImmOperand(Inst, Operands, OptionalIdx, + AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD); + } + if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), + AMDGPU::OpName::dst_unused) != -1) { + addOptionalImmOperand(Inst, Operands, OptionalIdx, + AMDGPUOperand::ImmTySdwaDstUnused, + DstUnused::UNUSED_PRESERVE); } - addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD); - addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE); addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); break; diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp index ccaf646008b1..98ee720200b4 100644 --- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp +++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp @@ -451,7 +451,11 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size, if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::VOP3P) convertVOP3PDPPInst(MI); else if (AMDGPU::isVOPC64DPP(MI.getOpcode())) - convertVOPCDPPInst(MI); + convertVOPCDPPInst(MI); // Special VOP3 case + else { + assert(MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::VOP3); + convertVOP3DPPInst(MI); // Regular VOP3 case + } break; } Res = tryDecodeInst(DecoderTableGFX1196, MI, DecW, Address); @@ -745,6 +749,43 @@ DecodeStatus AMDGPUDisassembler::convertSDWAInst(MCInst &MI) const { return MCDisassembler::Success; } +struct VOPModifiers { + unsigned OpSel = 0; + unsigned OpSelHi = 0; + unsigned NegLo = 0; + unsigned NegHi = 0; +}; + +// Reconstruct values of VOP3/VOP3P operands such as op_sel. +// Note that these values do not affect disassembler output, +// so this is only necessary for consistency with src_modifiers. +static VOPModifiers collectVOPModifiers(const MCInst &MI, + bool IsVOP3P = false) { + VOPModifiers Modifiers; + unsigned Opc = MI.getOpcode(); + const int ModOps[] = {AMDGPU::OpName::src0_modifiers, + AMDGPU::OpName::src1_modifiers, + AMDGPU::OpName::src2_modifiers}; + for (int J = 0; J < 3; ++J) { + int OpIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]); + if (OpIdx == -1) + continue; + + unsigned Val = MI.getOperand(OpIdx).getImm(); + + Modifiers.OpSel |= !!(Val & SISrcMods::OP_SEL_0) << J; + if (IsVOP3P) { + Modifiers.OpSelHi |= !!(Val & SISrcMods::OP_SEL_1) << J; + Modifiers.NegLo |= !!(Val & SISrcMods::NEG) << J; + Modifiers.NegHi |= !!(Val & SISrcMods::NEG_HI) << J; + } else if (J == 0) { + Modifiers.OpSel |= !!(Val & SISrcMods::DST_OP_SEL) << 3; + } + } + + return Modifiers; +} + // We must check FI == literal to reject not genuine dpp8 insts, and we must // first add optional MI operands to check FI DecodeStatus AMDGPUDisassembler::convertDPP8Inst(MCInst &MI) const { @@ -755,6 +796,11 @@ DecodeStatus AMDGPUDisassembler::convertDPP8Inst(MCInst &MI) const { } else if ((MCII->get(Opc).TSFlags & SIInstrFlags::VOPC) || AMDGPU::isVOPC64DPP(Opc)) { convertVOPCDPPInst(MI); + } else if (MI.getNumOperands() < DescNumOps && + AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel) != -1) { + auto Mods = collectVOPModifiers(MI); + insertNamedMCOperand(MI, MCOperand::createImm(Mods.OpSel), + AMDGPU::OpName::op_sel); } else { // Insert dummy unused src modifiers. if (MI.getNumOperands() < DescNumOps && @@ -770,6 +816,18 @@ DecodeStatus AMDGPUDisassembler::convertDPP8Inst(MCInst &MI) const { return isValidDPP8(MI) ? MCDisassembler::Success : MCDisassembler::SoftFail; } +DecodeStatus AMDGPUDisassembler::convertVOP3DPPInst(MCInst &MI) const { + unsigned Opc = MI.getOpcode(); + unsigned DescNumOps = MCII->get(Opc).getNumOperands(); + if (MI.getNumOperands() < DescNumOps && + AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel) != -1) { + auto Mods = collectVOPModifiers(MI); + insertNamedMCOperand(MI, MCOperand::createImm(Mods.OpSel), + AMDGPU::OpName::op_sel); + } + return MCDisassembler::Success; +} + // Note that before gfx10, the MIMG encoding provided no information about // VADDR size. Consequently, decoded instructions always show address as if it // has 1 dword, which could be not really so. @@ -914,45 +972,27 @@ DecodeStatus AMDGPUDisassembler::convertMIMGInst(MCInst &MI) const { DecodeStatus AMDGPUDisassembler::convertVOP3PDPPInst(MCInst &MI) const { unsigned Opc = MI.getOpcode(); unsigned DescNumOps = MCII->get(Opc).getNumOperands(); + auto Mods = collectVOPModifiers(MI, true); if (MI.getNumOperands() < DescNumOps && AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in) != -1) insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::vdst_in); - const int ModOps[] = {AMDGPU::OpName::src0_modifiers, - AMDGPU::OpName::src1_modifiers, - AMDGPU::OpName::src2_modifiers}; - unsigned OpSel = 0; - unsigned OpSelHi = 0; - unsigned NegLo = 0; - unsigned NegHi = 0; - for (int J = 0; J < 3; ++J) { - int OpIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]); - if (OpIdx == -1) - break; - unsigned Val = MI.getOperand(OpIdx).getImm(); - - OpSel |= !!(Val & SISrcMods::OP_SEL_0) << J; - OpSelHi |= !!(Val & SISrcMods::OP_SEL_1) << J; - NegLo |= !!(Val & SISrcMods::NEG) << J; - NegHi |= !!(Val & SISrcMods::NEG_HI) << J; - } - if (MI.getNumOperands() < DescNumOps && AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel) != -1) - insertNamedMCOperand(MI, MCOperand::createImm(OpSel), + insertNamedMCOperand(MI, MCOperand::createImm(Mods.OpSel), AMDGPU::OpName::op_sel); if (MI.getNumOperands() < DescNumOps && AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi) != -1) - insertNamedMCOperand(MI, MCOperand::createImm(OpSelHi), + insertNamedMCOperand(MI, MCOperand::createImm(Mods.OpSelHi), AMDGPU::OpName::op_sel_hi); if (MI.getNumOperands() < DescNumOps && AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo) != -1) - insertNamedMCOperand(MI, MCOperand::createImm(NegLo), + insertNamedMCOperand(MI, MCOperand::createImm(Mods.NegLo), AMDGPU::OpName::neg_lo); if (MI.getNumOperands() < DescNumOps && AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi) != -1) - insertNamedMCOperand(MI, MCOperand::createImm(NegHi), + insertNamedMCOperand(MI, MCOperand::createImm(Mods.NegHi), AMDGPU::OpName::neg_hi); return MCDisassembler::Success; @@ -2000,6 +2040,9 @@ AMDGPUDisassembler::decodeKernelDescriptorDirective( KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32); } + PRINT_DIRECTIVE(".amdhsa_uses_dynamic_stack", + KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK); + if (TwoByteBuffer & KERNEL_CODE_PROPERTY_RESERVED1) return MCDisassembler::Fail; diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h index 31869f0917ae..d17e2d8d5082 100644 --- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h +++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h @@ -162,6 +162,7 @@ public: DecodeStatus convertSDWAInst(MCInst &MI) const; DecodeStatus convertDPP8Inst(MCInst &MI) const; DecodeStatus convertMIMGInst(MCInst &MI) const; + DecodeStatus convertVOP3DPPInst(MCInst &MI) const; DecodeStatus convertVOP3PDPPInst(MCInst &MI) const; DecodeStatus convertVOPCDPPInst(MCInst &MI) const; diff --git a/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp b/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp index 5d254518c67a..4558ddf6dbfe 100644 --- a/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp +++ b/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp @@ -202,6 +202,19 @@ MachineInstr *GCNDPPCombine::createDPPInst(MachineInstr &OrigMI, LLVM_DEBUG(dbgs() << " failed: no DPP opcode\n"); return nullptr; } + int OrigOpE32 = AMDGPU::getVOPe32(OrigOp); + // Prior checks cover Mask with VOPC condition, but not on purpose + auto *RowMaskOpnd = TII->getNamedOperand(MovMI, AMDGPU::OpName::row_mask); + assert(RowMaskOpnd && RowMaskOpnd->isImm()); + auto *BankMaskOpnd = TII->getNamedOperand(MovMI, AMDGPU::OpName::bank_mask); + assert(BankMaskOpnd && BankMaskOpnd->isImm()); + const bool MaskAllLanes = + RowMaskOpnd->getImm() == 0xF && BankMaskOpnd->getImm() == 0xF; + (void)MaskAllLanes; + assert(MaskAllLanes || + !(TII->isVOPC(DPPOp) || + (TII->isVOP3(DPPOp) && OrigOpE32 != -1 && TII->isVOPC(OrigOpE32))) && + "VOPC cannot form DPP unless mask is full"); auto DPPInst = BuildMI(*OrigMI.getParent(), OrigMI, OrigMI.getDebugLoc(), TII->get(DPPOp)) @@ -234,6 +247,10 @@ MachineInstr *GCNDPPCombine::createDPPInst(MachineInstr &OrigMI, DPPInst.addReg(CombOldVGPR.Reg, Def ? 0 : RegState::Undef, CombOldVGPR.SubReg); ++NumOperands; + } else if (TII->isVOPC(DPPOp) || (TII->isVOP3(DPPOp) && OrigOpE32 != -1 && + TII->isVOPC(OrigOpE32))) { + // VOPC DPP and VOPC promoted to VOP3 DPP do not have an old operand + // because they write to SGPRs not VGPRs } else { // TODO: this discards MAC/FMA instructions for now, let's add it later LLVM_DEBUG(dbgs() << " failed: no old operand in DPP instruction," diff --git a/llvm/lib/Target/AMDGPU/GCNProcessors.td b/llvm/lib/Target/AMDGPU/GCNProcessors.td index 281474994bca..6ff349e31f22 100644 --- a/llvm/lib/Target/AMDGPU/GCNProcessors.td +++ b/llvm/lib/Target/AMDGPU/GCNProcessors.td @@ -249,11 +249,11 @@ def : ProcessorModel<"gfx1036", GFX10SpeedModel, //===----------------------------------------------------------------------===// def : ProcessorModel<"gfx1100", GFX11SpeedModel, - FeatureISAVersion11_0.Features + FeatureISAVersion11_0_0.Features >; def : ProcessorModel<"gfx1101", GFX11SpeedModel, - FeatureISAVersion11_0.Features + FeatureISAVersion11_0_1.Features >; def : ProcessorModel<"gfx1102", GFX11SpeedModel, diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp index 100410bb7644..04da14cc4916 100644 --- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp +++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp @@ -733,7 +733,7 @@ void GCNScheduleDAGMILive::collectRematerializableInstructions() { MachineOperand *Op = MRI.getOneDef(Reg); MachineInstr *Def = Op->getParent(); - if (Op->getSubReg() != 0 || !isTriviallyReMaterializable(*Def, AA)) + if (Op->getSubReg() != 0 || !isTriviallyReMaterializable(*Def)) continue; MachineInstr *UseI = &*MRI.use_instr_nodbg_begin(Reg); @@ -943,9 +943,8 @@ bool GCNScheduleDAGMILive::sinkTriviallyRematInsts(const GCNSubtarget &ST, } // Copied from MachineLICM -bool GCNScheduleDAGMILive::isTriviallyReMaterializable(const MachineInstr &MI, - AAResults *AA) { - if (!TII->isTriviallyReMaterializable(MI, AA)) +bool GCNScheduleDAGMILive::isTriviallyReMaterializable(const MachineInstr &MI) { + if (!TII->isTriviallyReMaterializable(MI)) return false; for (const MachineOperand &MO : MI.operands()) diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h index 97f94f69b70e..c3db849cf81a 100644 --- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h +++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h @@ -142,7 +142,7 @@ class GCNScheduleDAGMILive final : public ScheduleDAGMILive { // and single use outside the defining block into RematerializableInsts. void collectRematerializableInstructions(); - bool isTriviallyReMaterializable(const MachineInstr &MI, AAResults *AA); + bool isTriviallyReMaterializable(const MachineInstr &MI); // TODO: Should also attempt to reduce RP of SGPRs and AGPRs // Attempt to reduce RP of VGPR by sinking trivially rematerializable diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h index d269d0945f3b..d71f80c5f458 100644 --- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h +++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h @@ -145,6 +145,7 @@ protected: bool HasDot7Insts = false; bool HasDot8Insts = false; bool HasMAIInsts = false; + bool HasFP8Insts = false; bool HasPkFmacF16Inst = false; bool HasAtomicFaddRtnInsts = false; bool HasAtomicFaddNoRtnInsts = false; @@ -721,6 +722,10 @@ public: return HasMAIInsts; } + bool hasFP8Insts() const { + return HasFP8Insts; + } + bool hasPkFmacF16Inst() const { return HasPkFmacF16Inst; } @@ -930,7 +935,7 @@ public: } bool hasUserSGPRInit16Bug() const { - return UserSGPRInit16Bug; + return UserSGPRInit16Bug && isWave32(); } bool hasNegativeScratchOffsetBug() const { return NegativeScratchOffsetBug; } diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp index bd938d829953..21ff2744e5b4 100644 --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp @@ -627,7 +627,7 @@ void AMDGPUInstPrinter::printWaitEXP(const MCInst *MI, unsigned OpNo, bool AMDGPUInstPrinter::needsImpliedVcc(const MCInstrDesc &Desc, unsigned OpNo) const { - return OpNo == 1 && (Desc.TSFlags & SIInstrFlags::DPP) && + return OpNo == 0 && (Desc.TSFlags & SIInstrFlags::DPP) && (Desc.TSFlags & SIInstrFlags::VOPC) && (Desc.hasImplicitDefOfPhysReg(AMDGPU::VCC) || Desc.hasImplicitDefOfPhysReg(AMDGPU::VCC_LO)); @@ -644,8 +644,7 @@ void AMDGPUInstPrinter::printOperand(const MCInst *MI, unsigned OpNo, // If there are printed modifiers, printOperandAndFPInputMods or // printOperandAndIntInputMods will be called instead if ((OpNo == 0 || - (OpNo == 1 && (Desc.TSFlags & SIInstrFlags::DPP)) || - (OpNo == 2 && (Desc.TSFlags & SIInstrFlags::DPP) && ModIdx != -1)) && + (OpNo == 1 && (Desc.TSFlags & SIInstrFlags::DPP) && ModIdx != -1)) && (Desc.TSFlags & SIInstrFlags::VOPC) && (Desc.hasImplicitDefOfPhysReg(AMDGPU::VCC) || Desc.hasImplicitDefOfPhysReg(AMDGPU::VCC_LO))) diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp index 078133469549..0e71509cf2bd 100644 --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp @@ -367,6 +367,8 @@ void AMDGPUTargetAsmStreamer::EmitAmdhsaKernelDescriptor( PRINT_FIELD(OS, ".amdhsa_wavefront_size32", KD, kernel_code_properties, amdhsa::KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32); + PRINT_FIELD(OS, ".amdhsa_uses_dynamic_stack", KD, kernel_code_properties, + amdhsa::KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK); PRINT_FIELD(OS, (hasArchitectedFlatScratch(STI) ? ".amdhsa_enable_private_segment" diff --git a/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp b/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp index f54778535b7c..3e95c55df57e 100644 --- a/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp +++ b/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp @@ -67,6 +67,7 @@ #include "AMDGPU.h" #include "GCNSubtarget.h" #include "MCTargetDesc/AMDGPUMCTargetDesc.h" +#include "llvm/ADT/SetOperations.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/InitializePasses.h" #include "llvm/Target/TargetMachine.h" @@ -81,9 +82,9 @@ static cl::opt<bool> EnableM0Merge( cl::init(true)); namespace { - class SIFixSGPRCopies : public MachineFunctionPass { MachineDominatorTree *MDT; + unsigned NextVGPRToSGPRCopyID; public: static char ID; @@ -92,9 +93,16 @@ public: const SIRegisterInfo *TRI; const SIInstrInfo *TII; - SIFixSGPRCopies() : MachineFunctionPass(ID) {} + SIFixSGPRCopies() : MachineFunctionPass(ID), NextVGPRToSGPRCopyID(0) {} bool runOnMachineFunction(MachineFunction &MF) override; + unsigned getNextVGPRToSGPRCopyId() { return ++NextVGPRToSGPRCopyID; } + void lowerVGPR2SGPRCopies(MachineFunction &MF); + // Handles copies which source register is: + // 1. Physical register + // 2. AGPR + // 3. Defined by the instruction the merely moves the immediate + bool lowerSpecialCase(MachineInstr &MI); MachineBasicBlock *processPHINode(MachineInstr &MI); @@ -569,6 +577,14 @@ bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) { TII = ST.getInstrInfo(); MDT = &getAnalysis<MachineDominatorTree>(); + // We have to lower VGPR to SGPR copies before the main loop + // because the REG_SEQUENCE and PHI lowering in main loop + // convert the def-use chains to VALU and close the opportunities + // for keeping them scalar. + // TODO: REG_SEQENCE and PHIs are semantically copies. The next patch + // addresses their lowering and unify the processing in one main loop. + lowerVGPR2SGPRCopies(MF); + for (MachineFunction::iterator BI = MF.begin(), BE = MF.end(); BI != BE; ++BI) { MachineBasicBlock *MBB = &*BI; @@ -640,42 +656,7 @@ bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) { continue; } - if (isVGPRToSGPRCopy(SrcRC, DstRC, *TRI)) { - Register SrcReg = MI.getOperand(1).getReg(); - if (!SrcReg.isVirtual()) { - MachineBasicBlock *NewBB = TII->moveToVALU(MI, MDT); - if (NewBB && NewBB != MBB) { - MBB = NewBB; - E = MBB->end(); - BI = MachineFunction::iterator(MBB); - BE = MF.end(); - } - assert((!NewBB || NewBB == I->getParent()) && - "moveToVALU did not return the right basic block"); - break; - } - - MachineInstr *DefMI = MRI->getVRegDef(SrcReg); - unsigned SMovOp; - int64_t Imm; - // If we are just copying an immediate, we can replace the copy with - // s_mov_b32. - if (isSafeToFoldImmIntoCopy(&MI, DefMI, TII, SMovOp, Imm)) { - MI.getOperand(1).ChangeToImmediate(Imm); - MI.addImplicitDefUseOperands(MF); - MI.setDesc(TII->get(SMovOp)); - break; - } - MachineBasicBlock *NewBB = TII->moveToVALU(MI, MDT); - if (NewBB && NewBB != MBB) { - MBB = NewBB; - E = MBB->end(); - BI = MachineFunction::iterator(MBB); - BE = MF.end(); - } - assert((!NewBB || NewBB == I->getParent()) && - "moveToVALU did not return the right basic block"); - } else if (isSGPRToVGPRCopy(SrcRC, DstRC, *TRI)) { + if (isSGPRToVGPRCopy(SrcRC, DstRC, *TRI)) { tryChangeVGPRtoSGPRinCopy(MI, TRI, TII); } @@ -916,3 +897,269 @@ MachineBasicBlock *SIFixSGPRCopies::processPHINode(MachineInstr &MI) { } return CreatedBB; } + +bool SIFixSGPRCopies::lowerSpecialCase(MachineInstr &MI) { + MachineBasicBlock *MBB = MI.getParent(); + const TargetRegisterClass *SrcRC, *DstRC; + std::tie(SrcRC, DstRC) = getCopyRegClasses(MI, *TRI, *MRI); + + // We return true to indicate that no further processing needed + if (!isVGPRToSGPRCopy(SrcRC, DstRC, *TRI)) + return true; + + Register SrcReg = MI.getOperand(1).getReg(); + if (!SrcReg.isVirtual() || TRI->isAGPR(*MRI, SrcReg)) { + TII->moveToVALU(MI, MDT); + return true; + } + + unsigned SMovOp; + int64_t Imm; + // If we are just copying an immediate, we can replace the copy with + // s_mov_b32. + if (isSafeToFoldImmIntoCopy(&MI, MRI->getVRegDef(SrcReg), TII, SMovOp, Imm)) { + MI.getOperand(1).ChangeToImmediate(Imm); + MI.addImplicitDefUseOperands(*MBB->getParent()); + MI.setDesc(TII->get(SMovOp)); + return true; + } + return false; +} + +class V2SCopyInfo { +public: + // VGPR to SGPR copy being processed + MachineInstr *Copy; + // All SALU instructions reachable from this copy in SSA graph + DenseSet<MachineInstr *> SChain; + // Number of SGPR to VGPR copies that are used to put the SALU computation + // results back to VALU. + unsigned NumSVCopies; + + unsigned Score; + // Actual count of v_readfirstlane_b32 + // which need to be inserted to keep SChain SALU + unsigned NumReadfirstlanes; + // Current score state. To speedup selection V2SCopyInfos for processing + bool NeedToBeConvertedToVALU = false; + // Unique ID. Used as a key for mapping to keep permanent order. + unsigned ID; + + // Count of another VGPR to SGPR copies that contribute to the + // current copy SChain + unsigned SiblingPenalty = 0; + SetVector<unsigned> Siblings; + V2SCopyInfo() : Copy(nullptr), ID(0){}; + V2SCopyInfo(unsigned Id, MachineInstr *C, unsigned Width) + : Copy(C), NumSVCopies(0), NumReadfirstlanes(Width / 32), ID(Id){}; +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) + void dump() { + dbgs() << ID << " : " << *Copy << "\n\tS:" << SChain.size() + << "\n\tSV:" << NumSVCopies << "\n\tSP: " << SiblingPenalty + << "\nScore: " << Score << "\n"; + } +#endif +}; + +void SIFixSGPRCopies::lowerVGPR2SGPRCopies(MachineFunction &MF) { + + DenseMap<unsigned, V2SCopyInfo> Copies; + DenseMap<MachineInstr *, SetVector<unsigned>> SiblingPenalty; + + // The main function that computes the VGPR to SGPR copy score + // and determines copy further lowering way: v_readfirstlane_b32 or moveToVALU + auto needToBeConvertedToVALU = [&](V2SCopyInfo *I) -> bool { + if (I->SChain.empty()) + return true; + I->Siblings = SiblingPenalty[*std::max_element( + I->SChain.begin(), I->SChain.end(), + [&](MachineInstr *A, MachineInstr *B) -> bool { + return SiblingPenalty[A].size() < SiblingPenalty[B].size(); + })]; + I->Siblings.remove_if([&](unsigned ID) { return ID == I->ID; }); + // The loop below computes the number of another VGPR to SGPR copies + // which contribute to the current copy SALU chain. We assume that all the + // copies with the same source virtual register will be squashed to one by + // regalloc. Also we take careof the copies of the differnt subregs of the + // same register. + SmallSet<std::pair<Register, unsigned>, 4> SrcRegs; + for (auto J : I->Siblings) { + auto InfoIt = Copies.find(J); + if (InfoIt != Copies.end()) { + MachineInstr *SiblingCopy = InfoIt->getSecond().Copy; + if (SiblingCopy->isImplicitDef()) + // the COPY has already been MoveToVALUed + continue; + + SrcRegs.insert(std::make_pair(SiblingCopy->getOperand(1).getReg(), + SiblingCopy->getOperand(1).getSubReg())); + } + } + I->SiblingPenalty = SrcRegs.size(); + + unsigned Penalty = + I->NumSVCopies + I->SiblingPenalty + I->NumReadfirstlanes; + unsigned Profit = I->SChain.size(); + I->Score = Penalty > Profit ? 0 : Profit - Penalty; + I->NeedToBeConvertedToVALU = I->Score < 3; + return I->NeedToBeConvertedToVALU; + }; + + auto needProcessing = [](MachineInstr &MI) -> bool { + switch (MI.getOpcode()) { + case AMDGPU::COPY: + case AMDGPU::WQM: + case AMDGPU::STRICT_WQM: + case AMDGPU::SOFT_WQM: + case AMDGPU::STRICT_WWM: + return true; + default: + return false; + } + }; + + for (MachineFunction::iterator BI = MF.begin(), BE = MF.end(); BI != BE; + ++BI) { + MachineBasicBlock *MBB = &*BI; + for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E; + ++I) { + MachineInstr &MI = *I; + if (!needProcessing(MI)) + continue; + if (lowerSpecialCase(MI)) + continue; + + // Compute the COPY width to pass it to V2SCopyInfo Ctor + Register DstReg = MI.getOperand(0).getReg(); + + const TargetRegisterClass *DstRC = TRI->getRegClassForReg(*MRI, DstReg); + + V2SCopyInfo In(getNextVGPRToSGPRCopyId(), &MI, + TRI->getRegSizeInBits(*DstRC)); + + SmallVector<MachineInstr *, 8> AnalysisWorklist; + // Needed because the SSA is not a tree but a graph and may have + // forks and joins. We should not then go same way twice. + DenseSet<MachineInstr *> Visited; + AnalysisWorklist.push_back(&MI); + while (!AnalysisWorklist.empty()) { + + MachineInstr *Inst = AnalysisWorklist.pop_back_val(); + + if (!Visited.insert(Inst).second) + continue; + + // Copies and REG_SEQUENCE do not contribute to the final assembly + // So, skip them but take care of the SGPR to VGPR copies bookkeeping. + if (Inst->isCopy() || Inst->isRegSequence()) { + if (TRI->isVGPR(*MRI, Inst->getOperand(0).getReg())) { + if (!Inst->isCopy() || + !tryChangeVGPRtoSGPRinCopy(*Inst, TRI, TII)) { + In.NumSVCopies++; + continue; + } + } + } + + SiblingPenalty[Inst].insert(In.ID); + + SmallVector<MachineInstr *, 4> Users; + if ((TII->isSALU(*Inst) && Inst->isCompare()) || + (Inst->isCopy() && Inst->getOperand(0).getReg() == AMDGPU::SCC)) { + auto I = Inst->getIterator(); + auto E = Inst->getParent()->end(); + while (++I != E && !I->findRegisterDefOperand(AMDGPU::SCC)) { + if (I->readsRegister(AMDGPU::SCC)) + Users.push_back(&*I); + } + } else if (Inst->getNumExplicitDefs() != 0) { + Register Reg = Inst->getOperand(0).getReg(); + if (TRI->isSGPRReg(*MRI, Reg)) + for (auto &U : MRI->use_instructions(Reg)) + Users.push_back(&U); + } + for (auto U : Users) { + if (TII->isSALU(*U)) + In.SChain.insert(U); + AnalysisWorklist.push_back(U); + } + } + Copies[In.ID] = In; + } + } + + SmallVector<unsigned, 8> LoweringWorklist; + for (auto &C : Copies) { + if (needToBeConvertedToVALU(&C.second)) + LoweringWorklist.push_back(C.second.ID); + } + + while (!LoweringWorklist.empty()) { + unsigned CurID = LoweringWorklist.pop_back_val(); + auto CurInfoIt = Copies.find(CurID); + if (CurInfoIt != Copies.end()) { + V2SCopyInfo C = CurInfoIt->getSecond(); + LLVM_DEBUG(dbgs() << "Processing ...\n"; C.dump()); + for (auto S : C.Siblings) { + auto SibInfoIt = Copies.find(S); + if (SibInfoIt != Copies.end()) { + V2SCopyInfo &SI = SibInfoIt->getSecond(); + LLVM_DEBUG(dbgs() << "Sibling:\n"; SI.dump()); + if (!SI.NeedToBeConvertedToVALU) { + set_subtract(SI.SChain, C.SChain); + if (needToBeConvertedToVALU(&SI)) + LoweringWorklist.push_back(SI.ID); + } + SI.Siblings.remove_if([&](unsigned ID) { return ID == C.ID; }); + } + } + LLVM_DEBUG(dbgs() << "V2S copy " << *C.Copy + << " is being turned to VALU\n"); + Copies.erase(C.ID); + TII->moveToVALU(*C.Copy, MDT); + } + } + + // Now do actual lowering + for (auto C : Copies) { + MachineInstr *MI = C.second.Copy; + MachineBasicBlock *MBB = MI->getParent(); + // We decide to turn V2S copy to v_readfirstlane_b32 + // remove it from the V2SCopies and remove it from all its siblings + LLVM_DEBUG(dbgs() << "V2S copy " << *MI + << " is being turned to v_readfirstlane_b32" + << " Score: " << C.second.Score << "\n"); + Register DstReg = MI->getOperand(0).getReg(); + Register SrcReg = MI->getOperand(1).getReg(); + unsigned SubReg = MI->getOperand(1).getSubReg(); + const TargetRegisterClass *SrcRC = TRI->getRegClassForReg(*MRI, SrcReg); + SrcRC = TRI->getSubRegClass(SrcRC, SubReg); + size_t SrcSize = TRI->getRegSizeInBits(*SrcRC); + if (SrcSize == 16) { + // HACK to handle possible 16bit VGPR source + auto MIB = BuildMI(*MBB, MI, MI->getDebugLoc(), + TII->get(AMDGPU::V_READFIRSTLANE_B32), DstReg); + MIB.addReg(SrcReg, 0, AMDGPU::NoSubRegister); + } else if (SrcSize == 32) { + auto MIB = BuildMI(*MBB, MI, MI->getDebugLoc(), + TII->get(AMDGPU::V_READFIRSTLANE_B32), DstReg); + MIB.addReg(SrcReg, 0, SubReg); + } else { + auto Result = BuildMI(*MBB, MI, MI->getDebugLoc(), + TII->get(AMDGPU::REG_SEQUENCE), DstReg); + int N = TRI->getRegSizeInBits(*SrcRC) / 32; + for (int i = 0; i < N; i++) { + Register PartialSrc = TII->buildExtractSubReg( + Result, *MRI, MI->getOperand(1), SrcRC, + TRI->getSubRegFromChannel(i), &AMDGPU::VGPR_32RegClass); + Register PartialDst = + MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass); + BuildMI(*MBB, *Result, Result->getDebugLoc(), + TII->get(AMDGPU::V_READFIRSTLANE_B32), PartialDst) + .addReg(PartialSrc); + Result.addReg(PartialDst).addImm(TRI->getSubRegFromChannel(i)); + } + } + MI->eraseFromParent(); + } +} diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index d16da2a8b86b..438e8b200ecc 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -1664,6 +1664,17 @@ SDValue SITargetLowering::getImplicitArgPtr(SelectionDAG &DAG, return lowerKernArgParameterPtr(DAG, SL, DAG.getEntryNode(), Offset); } +SDValue SITargetLowering::getLDSKernelId(SelectionDAG &DAG, + const SDLoc &SL) const { + + Function &F = DAG.getMachineFunction().getFunction(); + Optional<uint32_t> KnownSize = + AMDGPUMachineFunction::getLDSKernelIdMetadata(F); + if (KnownSize.has_value()) + return DAG.getConstant(KnownSize.value(), SL, MVT::i32); + return SDValue(); +} + SDValue SITargetLowering::convertArgType(SelectionDAG &DAG, EVT VT, EVT MemVT, const SDLoc &SL, SDValue Val, bool Signed, @@ -2049,6 +2060,9 @@ void SITargetLowering::allocateSpecialInputSGPRs( if (Info.hasWorkGroupIDZ()) allocateSGPR32Input(CCInfo, ArgInfo.WorkGroupIDZ); + + if (Info.hasLDSKernelId()) + allocateSGPR32Input(CCInfo, ArgInfo.LDSKernelId); } // Allocate special inputs passed in user SGPRs. @@ -2102,6 +2116,12 @@ void SITargetLowering::allocateHSAUserSGPRs(CCState &CCInfo, CCInfo.AllocateReg(FlatScratchInitReg); } + if (Info.hasLDSKernelId()) { + Register Reg = Info.addLDSKernelId(); + MF.addLiveIn(Reg, &AMDGPU::SGPR_32RegClass); + CCInfo.AllocateReg(Reg); + } + // TODO: Add GridWorkGroupCount user SGPRs when used. For now with HSA we read // these from the dispatch pointer. } @@ -2347,8 +2367,8 @@ SDValue SITargetLowering::LowerFormalArguments( (!Info->hasFlatScratchInit() || Subtarget->enableFlatScratch()) && !Info->hasWorkGroupIDX() && !Info->hasWorkGroupIDY() && !Info->hasWorkGroupIDZ() && !Info->hasWorkGroupInfo() && - !Info->hasWorkItemIDX() && !Info->hasWorkItemIDY() && - !Info->hasWorkItemIDZ()); + !Info->hasLDSKernelId() && !Info->hasWorkItemIDX() && + !Info->hasWorkItemIDY() && !Info->hasWorkItemIDZ()); } if (CallConv == CallingConv::AMDGPU_PS) { @@ -2762,7 +2782,8 @@ void SITargetLowering::passSpecialInputs( {AMDGPUFunctionArgInfo::DISPATCH_ID, "amdgpu-no-dispatch-id"}, {AMDGPUFunctionArgInfo::WORKGROUP_ID_X, "amdgpu-no-workgroup-id-x"}, {AMDGPUFunctionArgInfo::WORKGROUP_ID_Y,"amdgpu-no-workgroup-id-y"}, - {AMDGPUFunctionArgInfo::WORKGROUP_ID_Z,"amdgpu-no-workgroup-id-z"} + {AMDGPUFunctionArgInfo::WORKGROUP_ID_Z,"amdgpu-no-workgroup-id-z"}, + {AMDGPUFunctionArgInfo::LDS_KERNEL_ID,"amdgpu-no-lds-kernel-id"}, }; for (auto Attr : ImplicitAttrs) { @@ -2798,6 +2819,13 @@ void SITargetLowering::passSpecialInputs( // The implicit arg ptr is special because it doesn't have a corresponding // input for kernels, and is computed from the kernarg segment pointer. InputReg = getImplicitArgPtr(DAG, DL); + } else if (InputID == AMDGPUFunctionArgInfo::LDS_KERNEL_ID) { + Optional<uint32_t> Id = AMDGPUMachineFunction::getLDSKernelIdMetadata(F); + if (Id.has_value()) { + InputReg = DAG.getConstant(Id.value(), DL, ArgVT); + } else { + InputReg = DAG.getUNDEF(ArgVT); + } } else { // We may have proven the input wasn't needed, although the ABI is // requiring it. We just need to allocate the register appropriately. @@ -6887,6 +6915,12 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, case Intrinsic::amdgcn_workgroup_id_z: return getPreloadedValue(DAG, *MFI, VT, AMDGPUFunctionArgInfo::WORKGROUP_ID_Z); + case Intrinsic::amdgcn_lds_kernel_id: { + if (MFI->isEntryFunction()) + return getLDSKernelId(DAG, DL); + return getPreloadedValue(DAG, *MFI, VT, + AMDGPUFunctionArgInfo::LDS_KERNEL_ID); + } case Intrinsic::amdgcn_workitem_id_x: return lowerWorkitemID(DAG, Op, 0, MFI->getArgInfo().WorkItemIDX); case Intrinsic::amdgcn_workitem_id_y: diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.h b/llvm/lib/Target/AMDGPU/SIISelLowering.h index 4fbccf0c5850..d1fecc1afc7f 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.h +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.h @@ -48,6 +48,7 @@ private: SDValue lowerKernArgParameterPtr(SelectionDAG &DAG, const SDLoc &SL, SDValue Chain, uint64_t Offset) const; SDValue getImplicitArgPtr(SelectionDAG &DAG, const SDLoc &SL) const; + SDValue getLDSKernelId(SelectionDAG &DAG, const SDLoc &SL) const; SDValue lowerKernargMemParameter(SelectionDAG &DAG, EVT VT, EVT MemVT, const SDLoc &SL, SDValue Chain, uint64_t Offset, Align Alignment, diff --git a/llvm/lib/Target/AMDGPU/SIInstrFormats.td b/llvm/lib/Target/AMDGPU/SIInstrFormats.td index b398e108bf62..7c1d8d32b624 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrFormats.td +++ b/llvm/lib/Target/AMDGPU/SIInstrFormats.td @@ -85,7 +85,7 @@ class InstSI <dag outs, dag ins, string asm = "", field bit VOPAsmPrefer32Bit = 0; // This bit indicates that this is a VOP3 opcode which supports op_sel - // modifier (gfx9 only). + // modifier. field bit VOP3_OPSEL = 0; // Is it possible for this instruction to be atomic? diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index 799d34e32d27..8916f06598c6 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -108,8 +108,8 @@ static bool nodesHaveSameOperandValue(SDNode *N0, SDNode* N1, unsigned OpName) { return N0->getOperand(Op0Idx) == N1->getOperand(Op1Idx); } -bool SIInstrInfo::isReallyTriviallyReMaterializable(const MachineInstr &MI, - AAResults *AA) const { +bool SIInstrInfo::isReallyTriviallyReMaterializable( + const MachineInstr &MI) const { if (isVOP1(MI) || isVOP2(MI) || isVOP3(MI) || isSDWA(MI) || isSALU(MI)) { // Normally VALU use of exec would block the rematerialization, but that // is OK in this case to have an implicit exec read as all VALU do. @@ -220,16 +220,23 @@ bool SIInstrInfo::areLoadsFromSameBasePtr(SDNode *Load0, SDNode *Load1, AMDGPU::getNamedOperandIdx(Opc1, AMDGPU::OpName::sbase) == -1) return false; - assert(getNumOperandsNoGlue(Load0) == getNumOperandsNoGlue(Load1)); + unsigned NumOps = getNumOperandsNoGlue(Load0); + if (NumOps != getNumOperandsNoGlue(Load1)) + return false; // Check base reg. if (Load0->getOperand(0) != Load1->getOperand(0)) return false; + // Match register offsets, if both register and immediate offsets present. + assert(NumOps == 4 || NumOps == 5); + if (NumOps == 5 && Load0->getOperand(1) != Load1->getOperand(1)) + return false; + const ConstantSDNode *Load0Offset = - dyn_cast<ConstantSDNode>(Load0->getOperand(1)); + dyn_cast<ConstantSDNode>(Load0->getOperand(NumOps - 3)); const ConstantSDNode *Load1Offset = - dyn_cast<ConstantSDNode>(Load1->getOperand(1)); + dyn_cast<ConstantSDNode>(Load1->getOperand(NumOps - 3)); if (!Load0Offset || !Load1Offset) return false; @@ -5011,10 +5018,8 @@ bool SIInstrInfo::isOperandLegal(const MachineInstr &MI, unsigned OpIdx, } if (MO->isReg()) { - if (!DefinedRC) { - // This operand allows any register. - return true; - } + if (!DefinedRC) + return OpInfo.OperandType == MCOI::OPERAND_UNKNOWN; if (!isLegalRegOperand(MRI, OpInfo, *MO)) return false; bool IsAGPR = RI.isAGPR(MRI, MO->getReg()); diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/llvm/lib/Target/AMDGPU/SIInstrInfo.h index 1b411eb83eb3..5840f45bdc5a 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.h +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.h @@ -184,8 +184,7 @@ public: return ST; } - bool isReallyTriviallyReMaterializable(const MachineInstr &MI, - AAResults *AA) const override; + bool isReallyTriviallyReMaterializable(const MachineInstr &MI) const override; bool isIgnorableUse(const MachineOperand &MO) const override; diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td index 23afd6556bc9..81f8dcc482da 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td @@ -324,7 +324,8 @@ class isFloatType<ValueType SrcVT> { // XXX - do v2i16 instructions? class isIntType<ValueType SrcVT> { - bit ret = !or(!eq(SrcVT.Value, i16.Value), + bit ret = !or(!eq(SrcVT.Value, i8.Value), + !eq(SrcVT.Value, i16.Value), !eq(SrcVT.Value, i32.Value), !eq(SrcVT.Value, i64.Value), !eq(SrcVT.Value, v4i16.Value), @@ -1411,6 +1412,10 @@ class IntSDWAInputModsMatchClass <int opSize> : AsmOperandClass { def Int16SDWAInputModsMatchClass : IntSDWAInputModsMatchClass<16>; def Int32SDWAInputModsMatchClass : IntSDWAInputModsMatchClass<32>; +def Bin32SDWAInputModsMatchClass : IntSDWAInputModsMatchClass<32> { + let Name = "SDWAWithBin32InputMods"; + let ParserMethod = "parseRegOrImm"; +} class IntSDWAInputMods <IntSDWAInputModsMatchClass matchClass> : InputMods <matchClass> { @@ -1419,6 +1424,7 @@ class IntSDWAInputMods <IntSDWAInputModsMatchClass matchClass> : def Int16SDWAInputMods : IntSDWAInputMods<Int16SDWAInputModsMatchClass>; def Int32SDWAInputMods : IntSDWAInputMods<Int32SDWAInputModsMatchClass>; +def Bin32SDWAInputMods : IntSDWAInputMods<Bin32SDWAInputModsMatchClass>; def IntVRegInputModsMatchClass : AsmOperandClass { let Name = "VRegWithIntInputMods"; @@ -1897,94 +1903,94 @@ class getInsVOP3OpSel <RegisterOperand Src0RC, RegisterOperand Src1RC, class getInsDPPBase <RegisterOperand OldRC, RegisterClass Src0RC, RegisterClass Src1RC, RegisterClass Src2RC, int NumSrcArgs, bit HasModifiers, - Operand Src0Mod, Operand Src1Mod, Operand Src2Mod> { + Operand Src0Mod, Operand Src1Mod, Operand Src2Mod, bit HasOld> { - dag ret = !if (!eq(NumSrcArgs, 0), + dag ret = !if(!eq(NumSrcArgs, 0), // VOP1 without input operands (V_NOP) (ins ), - !if (!eq(NumSrcArgs, 1), - !if (HasModifiers, - // VOP1_DPP with modifiers - (ins OldRC:$old, Src0Mod:$src0_modifiers, - Src0RC:$src0) - /* else */, - // VOP1_DPP without modifiers - (ins OldRC:$old, Src0RC:$src0) - /* endif */), - !if (!eq(NumSrcArgs, 2), - !if (HasModifiers, - // VOP2_DPP with modifiers - (ins OldRC:$old, - Src0Mod:$src0_modifiers, Src0RC:$src0, - Src1Mod:$src1_modifiers, Src1RC:$src1) - /* else */, - // VOP2_DPP without modifiers - (ins OldRC:$old, - Src0RC:$src0, Src1RC:$src1) - ) - /* NumSrcArgs == 3, VOP3 */, - !if (HasModifiers, - // VOP3_DPP with modifiers - (ins OldRC:$old, - Src0Mod:$src0_modifiers, Src0RC:$src0, - Src1Mod:$src1_modifiers, Src1RC:$src1, - Src2Mod:$src2_modifiers, Src2RC:$src2) - /* else */, - // VOP3_DPP without modifiers - (ins OldRC:$old, - Src0RC:$src0, Src1RC:$src1, - Src2RC:$src2) + !con( + !if(HasOld ,(ins OldRC:$old), (ins)), + !if (!eq(NumSrcArgs, 1), + !if (HasModifiers, + // VOP1_DPP with modifiers + (ins Src0Mod:$src0_modifiers, Src0RC:$src0) + /* else */, + // VOP1_DPP without modifiers + (ins Src0RC:$src0) + /* endif */), + !if (!eq(NumSrcArgs, 2), + !if (HasModifiers, + // VOP2_DPP with modifiers + (ins Src0Mod:$src0_modifiers, Src0RC:$src0, + Src1Mod:$src1_modifiers, Src1RC:$src1) + /* else */, + // VOP2_DPP without modifiers + (ins Src0RC:$src0, Src1RC:$src1) + ) + /* NumSrcArgs == 3, VOP3 */, + !if (HasModifiers, + // VOP3_DPP with modifiers + (ins Src0Mod:$src0_modifiers, Src0RC:$src0, + Src1Mod:$src1_modifiers, Src1RC:$src1, + Src2Mod:$src2_modifiers, Src2RC:$src2) + /* else */, + // VOP3_DPP without modifiers + (ins Src0RC:$src0, Src1RC:$src1, + Src2RC:$src2) + ) + ) + ) ) - /* endif */))); + ); } class getInsDPP <RegisterOperand OldRC, RegisterClass Src0RC, RegisterClass Src1RC, RegisterClass Src2RC, int NumSrcArgs, bit HasModifiers, - Operand Src0Mod, Operand Src1Mod, Operand Src2Mod> { + Operand Src0Mod, Operand Src1Mod, Operand Src2Mod, bit HasOld = 1> { dag ret = !con(getInsDPPBase<OldRC, Src0RC, Src1RC, Src2RC, NumSrcArgs, - HasModifiers, Src0Mod, Src1Mod, Src2Mod>.ret, + HasModifiers, Src0Mod, Src1Mod, Src2Mod, HasOld>.ret, (ins dpp_ctrl:$dpp_ctrl, row_mask:$row_mask, bank_mask:$bank_mask, bound_ctrl:$bound_ctrl)); } class getInsDPP16 <RegisterOperand OldRC, RegisterClass Src0RC, RegisterClass Src1RC, RegisterClass Src2RC, int NumSrcArgs, bit HasModifiers, - Operand Src0Mod, Operand Src1Mod, Operand Src2Mod> { + Operand Src0Mod, Operand Src1Mod, Operand Src2Mod, bit HasOld = 1> { dag ret = !con(getInsDPP<OldRC, Src0RC, Src1RC, Src2RC, NumSrcArgs, - HasModifiers, Src0Mod, Src1Mod, Src2Mod>.ret, + HasModifiers, Src0Mod, Src1Mod, Src2Mod, HasOld>.ret, (ins FI:$fi)); } class getInsDPP8 <RegisterOperand OldRC, RegisterClass Src0RC, RegisterClass Src1RC, RegisterClass Src2RC, int NumSrcArgs, bit HasModifiers, - Operand Src0Mod, Operand Src1Mod, Operand Src2Mod> { + Operand Src0Mod, Operand Src1Mod, Operand Src2Mod, bit HasOld = 1> { dag ret = !con(getInsDPPBase<OldRC, Src0RC, Src1RC, Src2RC, NumSrcArgs, - HasModifiers, Src0Mod, Src1Mod, Src2Mod>.ret, + HasModifiers, Src0Mod, Src1Mod, Src2Mod, HasOld>.ret, (ins dpp8:$dpp8, FI:$fi)); } -class getInsVOP3DPPBase<dag VOP3Base, RegisterOperand OldRC, int NumSrcArgs> { +class getInsVOP3DPPBase<dag VOP3Base, RegisterOperand OldRC, int NumSrcArgs, bit HasOld> { dag old = ( ins OldRC:$old ); dag base = VOP3Base; dag ret = !con( - !if(!ne(NumSrcArgs, 0), old, (ins)), + !if(!and(HasOld,!ne(NumSrcArgs, 0)), old, (ins)), base ); } -class getInsVOP3DPP<dag VOP3Base, RegisterOperand OldRC, int NumSrcArgs> { - dag ret = !con(getInsVOP3DPPBase<VOP3Base,OldRC,NumSrcArgs>.ret, +class getInsVOP3DPP<dag VOP3Base, RegisterOperand OldRC, int NumSrcArgs, bit HasOld = 1> { + dag ret = !con(getInsVOP3DPPBase<VOP3Base,OldRC,NumSrcArgs,HasOld>.ret, (ins dpp_ctrl:$dpp_ctrl, row_mask:$row_mask, bank_mask:$bank_mask, bound_ctrl:$bound_ctrl)); } -class getInsVOP3DPP16<dag VOP3Base, RegisterOperand OldRC, int NumSrcArgs> { - dag ret = !con(getInsVOP3DPP<VOP3Base,OldRC,NumSrcArgs>.ret, +class getInsVOP3DPP16<dag VOP3Base, RegisterOperand OldRC, int NumSrcArgs, bit HasOld = 1> { + dag ret = !con(getInsVOP3DPP<VOP3Base,OldRC,NumSrcArgs,HasOld>.ret, (ins FI:$fi)); } -class getInsVOP3DPP8<dag VOP3Base, RegisterOperand OldRC, int NumSrcArgs> { - dag ret = !con(getInsVOP3DPPBase<VOP3Base,OldRC,NumSrcArgs>.ret, +class getInsVOP3DPP8<dag VOP3Base, RegisterOperand OldRC, int NumSrcArgs, bit HasOld = 1> { + dag ret = !con(getInsVOP3DPPBase<VOP3Base,OldRC,NumSrcArgs,HasOld>.ret, (ins dpp8:$dpp8, FI:$fi)); } @@ -2665,6 +2671,8 @@ def VOP_V4I32_I64_I64_V4I32 : VOPProfile <[v4i32, i64, i64, v4i32]>; def VOP_V16I32_I64_I64_V16I32 : VOPProfile <[v16i32, i64, i64, v16i32]>; def VOP_V4F32_V2F32_V2F32_V4F32 : VOPProfile <[v4f32, v2f32, v2f32, v4f32]>; def VOP_V16F32_V2F32_V2F32_V16F32 : VOPProfile <[v16f32, v2f32, v2f32, v16f32]>; +def VOP_V4F32_I64_I64_V4F32 : VOPProfile <[v4f32, i64, i64, v4f32]>; +def VOP_V16F32_I64_I64_V16F32 : VOPProfile <[v16f32, i64, i64, v16f32]>; def VOP_V4F32_V4F16_V8F16_I32 : VOPProfile <[v4f32, v4f16, v8f16, i32]>; def VOP_V16F32_V4F16_V8F16_I32 : VOPProfile <[v16f32, v4f16, v8f16, i32]>; @@ -2672,6 +2680,8 @@ def VOP_V4F32_V4I16_V8I16_I32 : VOPProfile <[v4f32, v4i16, v8i16, i32]>; def VOP_V16F32_V4I16_V8I16_I32 : VOPProfile <[v16f32, v4i16, v8i16, i32]>; def VOP_V4I32_V2I32_V4I32_I32 : VOPProfile <[v4i32, v2i32, v4i32, i32]>; def VOP_V16I32_V2I32_V4I32_I32 : VOPProfile <[v16i32, v2i32, v4i32, i32]>; +def VOP_V4F32_V2I32_V4I32_I32 : VOPProfile <[v4f32, v2i32, v4i32, i32]>; +def VOP_V16F32_V2I32_V4I32_I32 : VOPProfile <[v16f32, v2i32, v4i32, i32]>; class Commutable_REV <string revOp, bit isOrig> { string RevOp = revOp; diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp index 0504c59ebd9e..9176e85568ee 100644 --- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp @@ -44,6 +44,7 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF) WorkGroupIDY(false), WorkGroupIDZ(false), WorkGroupInfo(false), + LDSKernelId(false), PrivateSegmentWaveByteOffset(false), WorkItemIDX(false), WorkItemIDY(false), @@ -143,6 +144,9 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF) if (!F.hasFnAttribute("amdgpu-no-dispatch-id")) DispatchID = true; + + if (!IsKernel && !F.hasFnAttribute("amdgpu-no-lds-kernel-id")) + LDSKernelId = true; } // FIXME: This attribute is a hack, we just need an analysis on the function @@ -261,6 +265,12 @@ Register SIMachineFunctionInfo::addImplicitBufferPtr(const SIRegisterInfo &TRI) return ArgInfo.ImplicitBufferPtr.getRegister(); } +Register SIMachineFunctionInfo::addLDSKernelId() { + ArgInfo.LDSKernelId = ArgDescriptor::createRegister(getNextUserSGPR()); + NumUserSGPRs += 1; + return ArgInfo.LDSKernelId.getRegister(); +} + bool SIMachineFunctionInfo::isCalleeSavedReg(const MCPhysReg *CSRegs, MCPhysReg Reg) { for (unsigned I = 0; CSRegs[I]; ++I) { @@ -561,6 +571,7 @@ convertArgumentInfo(const AMDGPUFunctionArgInfo &ArgInfo, Any |= convertArg(AI.KernargSegmentPtr, ArgInfo.KernargSegmentPtr); Any |= convertArg(AI.DispatchID, ArgInfo.DispatchID); Any |= convertArg(AI.FlatScratchInit, ArgInfo.FlatScratchInit); + Any |= convertArg(AI.LDSKernelId, ArgInfo.LDSKernelId); Any |= convertArg(AI.PrivateSegmentSize, ArgInfo.PrivateSegmentSize); Any |= convertArg(AI.WorkGroupIDX, ArgInfo.WorkGroupIDX); Any |= convertArg(AI.WorkGroupIDY, ArgInfo.WorkGroupIDY); diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h index bebb13cbf09f..5105587617fd 100644 --- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h +++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h @@ -191,6 +191,7 @@ struct SIArgumentInfo { Optional<SIArgument> WorkGroupIDY; Optional<SIArgument> WorkGroupIDZ; Optional<SIArgument> WorkGroupInfo; + Optional<SIArgument> LDSKernelId; Optional<SIArgument> PrivateSegmentWaveByteOffset; Optional<SIArgument> ImplicitArgPtr; @@ -215,6 +216,7 @@ template <> struct MappingTraits<SIArgumentInfo> { YamlIO.mapOptional("workGroupIDY", AI.WorkGroupIDY); YamlIO.mapOptional("workGroupIDZ", AI.WorkGroupIDZ); YamlIO.mapOptional("workGroupInfo", AI.WorkGroupInfo); + YamlIO.mapOptional("LDSKernelId", AI.LDSKernelId); YamlIO.mapOptional("privateSegmentWaveByteOffset", AI.PrivateSegmentWaveByteOffset); @@ -418,6 +420,7 @@ private: bool WorkGroupIDY : 1; bool WorkGroupIDZ : 1; bool WorkGroupInfo : 1; + bool LDSKernelId : 1; bool PrivateSegmentWaveByteOffset : 1; bool WorkItemIDX : 1; // Always initialized. @@ -608,6 +611,7 @@ public: Register addDispatchID(const SIRegisterInfo &TRI); Register addFlatScratchInit(const SIRegisterInfo &TRI); Register addImplicitBufferPtr(const SIRegisterInfo &TRI); + Register addLDSKernelId(); /// Increment user SGPRs used for padding the argument list only. Register addReservedUserSGPR() { @@ -705,6 +709,8 @@ public: return WorkGroupInfo; } + bool hasLDSKernelId() const { return LDSKernelId; } + bool hasPrivateSegmentWaveByteOffset() const { return PrivateSegmentWaveByteOffset; } diff --git a/llvm/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp b/llvm/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp index 66bc46aaefea..19a83ad53e2e 100644 --- a/llvm/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp +++ b/llvm/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp @@ -12,6 +12,8 @@ #include "SIRegisterInfo.h" #include "llvm/CodeGen/LivePhysRegs.h" #include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineOperand.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/InitializePasses.h" using namespace llvm; @@ -26,6 +28,10 @@ class SIOptimizeExecMasking : public MachineFunctionPass { const SIRegisterInfo *TRI = nullptr; const SIInstrInfo *TII = nullptr; const MachineRegisterInfo *MRI = nullptr; + MCRegister Exec; + + DenseMap<MachineInstr *, MachineInstr *> SaveExecVCmpMapping; + SmallVector<std::pair<MachineInstr *, MachineInstr *>, 1> OrXors; Register isCopyFromExec(const MachineInstr &MI) const; Register isCopyToExec(const MachineInstr &MI) const; @@ -44,13 +50,13 @@ class SIOptimizeExecMasking : public MachineFunctionPass { std::function<bool(MachineInstr *)> Pred, ArrayRef<MCRegister> NonModifiableRegs, unsigned MaxInstructions = 20) const; - MachineInstr *findPossibleVCMPVCMPXOptimization(MachineInstr &SaveExec, - MCRegister Exec) const; - bool optimizeExecSequence() const; - bool optimizeVCmpxAndSaveexecSequence() const; - bool optimizeSingleVCMPSaveExecSequence(MachineInstr &SaveExecInstr, - MachineInstr &VCmp, - MCRegister Exec) const; + bool optimizeExecSequence(); + void tryRecordVCmpxAndSaveexecSequence(MachineInstr &MI); + bool optimizeVCMPSaveExecSequence(MachineInstr &SaveExecInstr, + MachineInstr &VCmp, MCRegister Exec) const; + + void tryRecordOrSaveexecXorSequence(MachineInstr &MI); + bool optimizeOrSaveexecXorSequences(); public: static char ID; @@ -92,7 +98,7 @@ Register SIOptimizeExecMasking::isCopyFromExec(const MachineInstr &MI) const { case AMDGPU::S_MOV_B32: case AMDGPU::S_MOV_B32_term: { const MachineOperand &Src = MI.getOperand(1); - if (Src.isReg() && Src.getReg() == TRI->getExec()) + if (Src.isReg() && Src.getReg() == Exec) return MI.getOperand(0).getReg(); } } @@ -107,8 +113,7 @@ Register SIOptimizeExecMasking::isCopyToExec(const MachineInstr &MI) const { case AMDGPU::S_MOV_B64: case AMDGPU::S_MOV_B32: { const MachineOperand &Dst = MI.getOperand(0); - if (Dst.isReg() && Dst.getReg() == TRI->getExec() && - MI.getOperand(1).isReg()) + if (Dst.isReg() && Dst.getReg() == Exec && MI.getOperand(1).isReg()) return MI.getOperand(1).getReg(); break; } @@ -394,9 +399,7 @@ bool SIOptimizeExecMasking::isRegisterInUseAfter(MachineInstr &Stop, // => // x = s_<op>_saveexec_b64 y // -bool SIOptimizeExecMasking::optimizeExecSequence() const { - MCRegister Exec = TRI->getExec(); - +bool SIOptimizeExecMasking::optimizeExecSequence() { bool Changed = false; for (MachineBasicBlock &MBB : *MF) { MachineBasicBlock::reverse_iterator I = fixTerminators(MBB); @@ -551,88 +554,9 @@ bool SIOptimizeExecMasking::optimizeExecSequence() const { return Changed; } -// Tries to find a possibility to optimize a v_cmp ..., s_and_saveexec sequence -// by looking at an instance of a s_and_saveexec instruction. Returns a pointer -// to the v_cmp instruction if it is safe to replace the sequence (see the -// conditions in the function body). This is after register allocation, so some -// checks on operand dependencies need to be considered. -MachineInstr *SIOptimizeExecMasking::findPossibleVCMPVCMPXOptimization( - MachineInstr &SaveExec, MCRegister Exec) const { - - MachineInstr *VCmp = nullptr; - - Register SaveExecDest = SaveExec.getOperand(0).getReg(); - if (!TRI->isSGPRReg(*MRI, SaveExecDest)) - return nullptr; - - MachineOperand *SaveExecSrc0 = - TII->getNamedOperand(SaveExec, AMDGPU::OpName::src0); - if (!SaveExecSrc0->isReg()) - return nullptr; - - // Try to find the last v_cmp instruction that defs the saveexec input - // operand without any write to Exec or the saveexec input operand inbetween. - VCmp = findInstrBackwards( - SaveExec, - [&](MachineInstr *Check) { - return AMDGPU::getVCMPXOpFromVCMP(Check->getOpcode()) != -1 && - Check->modifiesRegister(SaveExecSrc0->getReg(), TRI); - }, - {Exec, SaveExecSrc0->getReg()}); - - if (!VCmp) - return nullptr; - - MachineOperand *VCmpDest = TII->getNamedOperand(*VCmp, AMDGPU::OpName::sdst); - assert(VCmpDest && "Should have an sdst operand!"); - - // Check if any of the v_cmp source operands is written by the saveexec. - MachineOperand *Src0 = TII->getNamedOperand(*VCmp, AMDGPU::OpName::src0); - if (Src0->isReg() && TRI->isSGPRReg(*MRI, Src0->getReg()) && - SaveExec.modifiesRegister(Src0->getReg(), TRI)) - return nullptr; - - MachineOperand *Src1 = TII->getNamedOperand(*VCmp, AMDGPU::OpName::src1); - if (Src1->isReg() && TRI->isSGPRReg(*MRI, Src1->getReg()) && - SaveExec.modifiesRegister(Src1->getReg(), TRI)) - return nullptr; - - // Don't do the transformation if the destination operand is included in - // it's MBB Live-outs, meaning it's used in any of it's successors, leading - // to incorrect code if the v_cmp and therefore the def of - // the dest operand is removed. - if (isLiveOut(*VCmp->getParent(), VCmpDest->getReg())) - return nullptr; - - // If the v_cmp target is in use between v_cmp and s_and_saveexec or after the - // s_and_saveexec, skip the optimization. - if (isRegisterInUseBetween(*VCmp, SaveExec, VCmpDest->getReg(), false, - true) || - isRegisterInUseAfter(SaveExec, VCmpDest->getReg())) - return nullptr; - - // Try to determine if there is a write to any of the VCmp - // operands between the saveexec and the vcmp. - // If yes, additional VGPR spilling might need to be inserted. In this case, - // it's not worth replacing the instruction sequence. - SmallVector<MCRegister, 2> NonDefRegs; - if (Src0->isReg()) - NonDefRegs.push_back(Src0->getReg()); - - if (Src1->isReg()) - NonDefRegs.push_back(Src1->getReg()); - - if (!findInstrBackwards( - SaveExec, [&](MachineInstr *Check) { return Check == VCmp; }, - NonDefRegs)) - return nullptr; - - return VCmp; -} - // Inserts the optimized s_mov_b32 / v_cmpx sequence based on the // operands extracted from a v_cmp ..., s_and_saveexec pattern. -bool SIOptimizeExecMasking::optimizeSingleVCMPSaveExecSequence( +bool SIOptimizeExecMasking::optimizeVCMPSaveExecSequence( MachineInstr &SaveExecInstr, MachineInstr &VCmp, MCRegister Exec) const { const int NewOpcode = AMDGPU::getVCMPXOpFromVCMP(VCmp.getOpcode()); @@ -678,50 +602,164 @@ bool SIOptimizeExecMasking::optimizeSingleVCMPSaveExecSequence( if (Src1->isReg()) MRI->clearKillFlags(Src1->getReg()); + SaveExecInstr.eraseFromParent(); + VCmp.eraseFromParent(); + return true; } -// After all s_op_saveexec instructions are inserted, -// replace (on GFX10.3 and later) +// Record (on GFX10.3 and later) occurences of // v_cmp_* SGPR, IMM, VGPR // s_and_saveexec_b32 EXEC_SGPR_DEST, SGPR -// with +// to be replaced with // s_mov_b32 EXEC_SGPR_DEST, exec_lo // v_cmpx_* IMM, VGPR // to reduce pipeline stalls. -bool SIOptimizeExecMasking::optimizeVCmpxAndSaveexecSequence() const { +void SIOptimizeExecMasking::tryRecordVCmpxAndSaveexecSequence( + MachineInstr &MI) { if (!ST->hasGFX10_3Insts()) - return false; + return; - bool Changed = false; - - DenseMap<MachineInstr *, MachineInstr *> SaveExecVCmpMapping; - MCRegister Exec = TRI->getExec(); const unsigned AndSaveExecOpcode = ST->isWave32() ? AMDGPU::S_AND_SAVEEXEC_B32 : AMDGPU::S_AND_SAVEEXEC_B64; - for (MachineBasicBlock &MBB : *MF) { - for (MachineInstr &MI : MBB) { - // Record relevant v_cmp / s_and_saveexec instruction pairs for - // replacement. - if (MI.getOpcode() != AndSaveExecOpcode) - continue; + if (MI.getOpcode() != AndSaveExecOpcode) + return; + + Register SaveExecDest = MI.getOperand(0).getReg(); + if (!TRI->isSGPRReg(*MRI, SaveExecDest)) + return; - if (MachineInstr *VCmp = findPossibleVCMPVCMPXOptimization(MI, Exec)) - SaveExecVCmpMapping[&MI] = VCmp; + MachineOperand *SaveExecSrc0 = TII->getNamedOperand(MI, AMDGPU::OpName::src0); + if (!SaveExecSrc0->isReg()) + return; + + // Tries to find a possibility to optimize a v_cmp ..., s_and_saveexec + // sequence by looking at an instance of a s_and_saveexec instruction. Returns + // a pointer to the v_cmp instruction if it is safe to replace the sequence + // (see the conditions in the function body). This is after register + // allocation, so some checks on operand dependencies need to be considered. + MachineInstr *VCmp = nullptr; + + // Try to find the last v_cmp instruction that defs the saveexec input + // operand without any write to Exec or the saveexec input operand inbetween. + VCmp = findInstrBackwards( + MI, + [&](MachineInstr *Check) { + return AMDGPU::getVCMPXOpFromVCMP(Check->getOpcode()) != -1 && + Check->modifiesRegister(SaveExecSrc0->getReg(), TRI); + }, + {Exec, SaveExecSrc0->getReg()}); + + if (!VCmp) + return; + + MachineOperand *VCmpDest = TII->getNamedOperand(*VCmp, AMDGPU::OpName::sdst); + assert(VCmpDest && "Should have an sdst operand!"); + + // Check if any of the v_cmp source operands is written by the saveexec. + MachineOperand *Src0 = TII->getNamedOperand(*VCmp, AMDGPU::OpName::src0); + if (Src0->isReg() && TRI->isSGPRReg(*MRI, Src0->getReg()) && + MI.modifiesRegister(Src0->getReg(), TRI)) + return; + + MachineOperand *Src1 = TII->getNamedOperand(*VCmp, AMDGPU::OpName::src1); + if (Src1->isReg() && TRI->isSGPRReg(*MRI, Src1->getReg()) && + MI.modifiesRegister(Src1->getReg(), TRI)) + return; + + // Don't do the transformation if the destination operand is included in + // it's MBB Live-outs, meaning it's used in any of it's successors, leading + // to incorrect code if the v_cmp and therefore the def of + // the dest operand is removed. + if (isLiveOut(*VCmp->getParent(), VCmpDest->getReg())) + return; + + // If the v_cmp target is in use between v_cmp and s_and_saveexec or after the + // s_and_saveexec, skip the optimization. + if (isRegisterInUseBetween(*VCmp, MI, VCmpDest->getReg(), false, true) || + isRegisterInUseAfter(MI, VCmpDest->getReg())) + return; + + // Try to determine if there is a write to any of the VCmp + // operands between the saveexec and the vcmp. + // If yes, additional VGPR spilling might need to be inserted. In this case, + // it's not worth replacing the instruction sequence. + SmallVector<MCRegister, 2> NonDefRegs; + if (Src0->isReg()) + NonDefRegs.push_back(Src0->getReg()); + + if (Src1->isReg()) + NonDefRegs.push_back(Src1->getReg()); + + if (!findInstrBackwards( + MI, [&](MachineInstr *Check) { return Check == VCmp; }, NonDefRegs)) + return; + + if (VCmp) + SaveExecVCmpMapping[&MI] = VCmp; +} + +// Record occurences of +// s_or_saveexec s_o, s_i +// s_xor exec, exec, s_o +// to be replaced with +// s_andn2_saveexec s_o, s_i. +void SIOptimizeExecMasking::tryRecordOrSaveexecXorSequence(MachineInstr &MI) { + const unsigned XorOpcode = + ST->isWave32() ? AMDGPU::S_XOR_B32 : AMDGPU::S_XOR_B64; + + if (MI.getOpcode() == XorOpcode && &MI != &MI.getParent()->front()) { + const MachineOperand &XorDst = MI.getOperand(0); + const MachineOperand &XorSrc0 = MI.getOperand(1); + const MachineOperand &XorSrc1 = MI.getOperand(2); + + if (XorDst.isReg() && XorDst.getReg() == Exec && XorSrc0.isReg() && + XorSrc1.isReg() && + (XorSrc0.getReg() == Exec || XorSrc1.getReg() == Exec)) { + const unsigned OrSaveexecOpcode = ST->isWave32() + ? AMDGPU::S_OR_SAVEEXEC_B32 + : AMDGPU::S_OR_SAVEEXEC_B64; + + // Peek at the previous instruction and check if this is a relevant + // s_or_saveexec instruction. + MachineInstr &PossibleOrSaveexec = *MI.getPrevNode(); + if (PossibleOrSaveexec.getOpcode() != OrSaveexecOpcode) + return; + + const MachineOperand &OrDst = PossibleOrSaveexec.getOperand(0); + const MachineOperand &OrSrc0 = PossibleOrSaveexec.getOperand(1); + if (OrDst.isReg() && OrSrc0.isReg()) { + if ((XorSrc0.getReg() == Exec && XorSrc1.getReg() == OrDst.getReg()) || + (XorSrc0.getReg() == OrDst.getReg() && XorSrc1.getReg() == Exec)) { + OrXors.emplace_back(&PossibleOrSaveexec, &MI); + } + } } } +} - for (const auto &Entry : SaveExecVCmpMapping) { - MachineInstr *SaveExecInstr = Entry.getFirst(); - MachineInstr *VCmpInstr = Entry.getSecond(); +bool SIOptimizeExecMasking::optimizeOrSaveexecXorSequences() { + if (OrXors.empty()) { + return false; + } - if (optimizeSingleVCMPSaveExecSequence(*SaveExecInstr, *VCmpInstr, Exec)) { - SaveExecInstr->eraseFromParent(); - VCmpInstr->eraseFromParent(); + bool Changed = false; + const unsigned Andn2Opcode = ST->isWave32() ? AMDGPU::S_ANDN2_SAVEEXEC_B32 + : AMDGPU::S_ANDN2_SAVEEXEC_B64; - Changed = true; - } + for (const auto &Pair : OrXors) { + MachineInstr *Or = nullptr; + MachineInstr *Xor = nullptr; + std::tie(Or, Xor) = Pair; + BuildMI(*Or->getParent(), Or->getIterator(), Or->getDebugLoc(), + TII->get(Andn2Opcode), Or->getOperand(0).getReg()) + .addReg(Or->getOperand(1).getReg()); + + Or->eraseFromParent(); + Xor->eraseFromParent(); + + Changed = true; } return Changed; @@ -736,9 +774,42 @@ bool SIOptimizeExecMasking::runOnMachineFunction(MachineFunction &MF) { TRI = ST->getRegisterInfo(); TII = ST->getInstrInfo(); MRI = &MF.getRegInfo(); + Exec = TRI->getExec(); bool Changed = optimizeExecSequence(); - Changed |= optimizeVCmpxAndSaveexecSequence(); + + OrXors.clear(); + SaveExecVCmpMapping.clear(); + static unsigned SearchWindow = 10; + for (MachineBasicBlock &MBB : MF) { + unsigned SearchCount = 0; + + for (auto &MI : llvm::reverse(MBB)) { + if (MI.isDebugInstr()) + continue; + + if (SearchCount >= SearchWindow) { + break; + } + + tryRecordOrSaveexecXorSequence(MI); + tryRecordVCmpxAndSaveexecSequence(MI); + + if (MI.modifiesRegister(Exec, TRI)) { + break; + } + + ++SearchCount; + } + } + + Changed |= optimizeOrSaveexecXorSequences(); + for (const auto &Entry : SaveExecVCmpMapping) { + MachineInstr *SaveExecInstr = Entry.getFirst(); + MachineInstr *VCmpInstr = Entry.getSecond(); + + Changed |= optimizeVCMPSaveExecSequence(*SaveExecInstr, *VCmpInstr, Exec); + } return Changed; } diff --git a/llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp b/llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp index 57dbad468de8..aed84437b890 100644 --- a/llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp +++ b/llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp @@ -184,6 +184,16 @@ bool SIOptimizeExecMaskingPreRA::optimizeVcndVcmpPair(MachineBasicBlock &MBB) { if (isDefBetween(*TRI, LIS, CCReg, *Sel, *And)) return false; + // Cannot safely mirror live intervals with PHI nodes, so check for these + // before optimization. + SlotIndex SelIdx = LIS->getInstructionIndex(*Sel); + LiveInterval *SelLI = &LIS->getInterval(SelReg); + if (llvm::any_of(SelLI->vnis(), + [](const VNInfo *VNI) { + return VNI->isPHIDef(); + })) + return false; + // TODO: Guard against implicit def operands? LLVM_DEBUG(dbgs() << "Folding sequence:\n\t" << *Sel << '\t' << *Cmp << '\t' << *And); @@ -204,31 +214,34 @@ bool SIOptimizeExecMaskingPreRA::optimizeVcndVcmpPair(MachineBasicBlock &MBB) { LLVM_DEBUG(dbgs() << "=>\n\t" << *Andn2 << '\n'); - SlotIndex CmpIdx = LIS->getInstructionIndex(*Cmp); - SlotIndex SelIdx = LIS->getInstructionIndex(*Sel); - - LiveInterval *CmpLI = - CmpReg.isVirtual() ? &LIS->getInterval(CmpReg) : nullptr; - LiveInterval *SelLI = - SelReg.isVirtual() ? &LIS->getInterval(SelReg) : nullptr; - // Update live intervals for CCReg before potentially removing CmpReg/SelReg, // and their associated liveness information. + SlotIndex CmpIdx = LIS->getInstructionIndex(*Cmp); if (CCReg.isVirtual()) { - // Note: this ignores that SelLI might have multiple internal values - // or splits and simply extends the live range to cover all cases - // where the result of the v_cndmask_b32 was live (e.g. loops). - // This could yield worse register allocation in rare edge cases. - SlotIndex EndIdx = AndIdx.getRegSlot(); - if (SelLI && SelLI->endIndex() > EndIdx && SelLI->endIndex().isBlock()) - EndIdx = SelLI->endIndex(); + // Apply live ranges from SelLI to CCReg potentially matching splits + // and extending to loop boundaries. + + auto applyLiveRanges = [&](LiveRange &Dst, VNInfo *VNI) { + // Copy live ranges from SelLI, adjusting start and end as required + auto DefSegment = SelLI->FindSegmentContaining(SelIdx.getRegSlot()); + assert(DefSegment != SelLI->end() && + "No live interval segment covering definition?"); + for (auto I = DefSegment; I != SelLI->end(); ++I) { + SlotIndex Start = I->start < SelIdx.getRegSlot() ? + SelIdx.getRegSlot() : I->start; + SlotIndex End = I->end < AndIdx.getRegSlot() || I->end.isBlock() ? + I->end : AndIdx.getRegSlot(); + Dst.addSegment(LiveRange::Segment(Start, End, VNI)); + } + // If SelLI does not cover AndIdx (because Cmp killed Sel) then extend. + if (!SelLI->getSegmentContaining(AndIdx.getRegSlot())) + Dst.addSegment(LiveRange::Segment(CmpIdx.getRegSlot(), AndIdx.getRegSlot(), VNI)); + }; LiveInterval &CCLI = LIS->getInterval(CCReg); auto CCQ = CCLI.Query(SelIdx.getRegSlot()); - if (CCQ.valueIn()) { - CCLI.addSegment(LiveRange::Segment(SelIdx.getRegSlot(), - EndIdx, CCQ.valueIn())); - } + if (CCQ.valueIn()) + applyLiveRanges(CCLI, CCQ.valueIn()); if (CC->getSubReg()) { LaneBitmask Mask = TRI->getSubRegIndexLaneMask(CC->getSubReg()); @@ -237,10 +250,8 @@ bool SIOptimizeExecMaskingPreRA::optimizeVcndVcmpPair(MachineBasicBlock &MBB) { Allocator, Mask, [=](LiveInterval::SubRange &SR) { auto CCQS = SR.Query(SelIdx.getRegSlot()); - if (CCQS.valueIn()) { - SR.addSegment(LiveRange::Segment( - SelIdx.getRegSlot(), EndIdx, CCQS.valueIn())); - } + if (CCQS.valueIn()) + applyLiveRanges(SR, CCQS.valueIn()); }, *LIS->getSlotIndexes(), *TRI); CCLI.removeEmptySubRanges(); @@ -253,7 +264,8 @@ bool SIOptimizeExecMaskingPreRA::optimizeVcndVcmpPair(MachineBasicBlock &MBB) { // Try to remove compare. Cmp value should not used in between of cmp // and s_and_b64 if VCC or just unused if any other register. - if ((CmpReg.isVirtual() && CmpLI && CmpLI->Query(AndIdx.getRegSlot()).isKill()) || + LiveInterval *CmpLI = CmpReg.isVirtual() ? &LIS->getInterval(CmpReg) : nullptr; + if ((CmpLI && CmpLI->Query(AndIdx.getRegSlot()).isKill()) || (CmpReg == Register(CondReg) && std::none_of(std::next(Cmp->getIterator()), Andn2->getIterator(), [&](const MachineInstr &MI) { @@ -266,18 +278,16 @@ bool SIOptimizeExecMaskingPreRA::optimizeVcndVcmpPair(MachineBasicBlock &MBB) { Cmp->eraseFromParent(); // Try to remove v_cndmask_b32. - if (SelLI) { - // Kill status must be checked before shrinking the live range. - bool IsKill = SelLI->Query(CmpIdx.getRegSlot()).isKill(); - LIS->shrinkToUses(SelLI); - bool IsDead = SelLI->Query(SelIdx.getRegSlot()).isDeadDef(); - if (MRI->use_nodbg_empty(SelReg) && (IsKill || IsDead)) { - LLVM_DEBUG(dbgs() << "Erasing: " << *Sel << '\n'); - - LIS->removeVRegDefAt(*SelLI, SelIdx.getRegSlot()); - LIS->RemoveMachineInstrFromMaps(*Sel); - Sel->eraseFromParent(); - } + // Kill status must be checked before shrinking the live range. + bool IsKill = SelLI->Query(CmpIdx.getRegSlot()).isKill(); + LIS->shrinkToUses(SelLI); + bool IsDead = SelLI->Query(SelIdx.getRegSlot()).isDeadDef(); + if (MRI->use_nodbg_empty(SelReg) && (IsKill || IsDead)) { + LLVM_DEBUG(dbgs() << "Erasing: " << *Sel << '\n'); + + LIS->removeVRegDefAt(*SelLI, SelIdx.getRegSlot()); + LIS->RemoveMachineInstrFromMaps(*Sel); + Sel->eraseFromParent(); } } diff --git a/llvm/lib/Target/AMDGPU/SIProgramInfo.h b/llvm/lib/Target/AMDGPU/SIProgramInfo.h index b13afceba20e..553fb4cf496c 100644 --- a/llvm/lib/Target/AMDGPU/SIProgramInfo.h +++ b/llvm/lib/Target/AMDGPU/SIProgramInfo.h @@ -49,6 +49,8 @@ struct SIProgramInfo { uint32_t AccumOffset = 0; uint32_t TgSplit = 0; uint32_t NumSGPR = 0; + unsigned SGPRSpill = 0; + unsigned VGPRSpill = 0; uint32_t LDSSize = 0; bool FlatUsed = false; diff --git a/llvm/lib/Target/AMDGPU/SMInstructions.td b/llvm/lib/Target/AMDGPU/SMInstructions.td index 882d13402a19..b7e8eadfe71d 100644 --- a/llvm/lib/Target/AMDGPU/SMInstructions.td +++ b/llvm/lib/Target/AMDGPU/SMInstructions.td @@ -119,13 +119,19 @@ class SM_Probe_Pseudo <string opName, string variant, RegisterClass baseClass, let PseudoInstr = opName # variant; } -class SM_Load_Pseudo <string opName, dag outs, dag ins, string asmOps, list<dag> pattern=[]> - : SM_Pseudo<opName, outs, ins, asmOps, pattern> { - RegisterClass BaseClass; +class SM_Load_Pseudo <string opName, RegisterClass baseClass, + RegisterClass dstClass, OffsetMode offsets> + : SM_Pseudo<opName, (outs dstClass:$sdst), + !con((ins baseClass:$sbase), offsets.Ins, (ins CPol:$cpol)), + " $sdst, $sbase, " # offsets.Asm # "$cpol", []> { + RegisterClass BaseClass = baseClass; let mayLoad = 1; let mayStore = 0; let has_glc = 1; let has_dlc = 1; + let has_offset = offsets.HasOffset; + let has_soffset = offsets.HasSOffset; + let PseudoInstr = opName # offsets.Variant; } class SM_Store_Pseudo <string opName, RegisterClass baseClass, @@ -158,40 +164,9 @@ class SM_Discard_Pseudo <string opName, string variant, dag offsets, multiclass SM_Pseudo_Loads<string opName, RegisterClass baseClass, RegisterClass dstClass> { - def _IMM : SM_Load_Pseudo <opName, - (outs dstClass:$sdst), - (ins baseClass:$sbase, i32imm:$offset, CPol:$cpol), - " $sdst, $sbase, $offset$cpol", []> { - let has_offset = 1; - let BaseClass = baseClass; - let PseudoInstr = opName # "_IMM"; - let has_glc = 1; - let has_dlc = 1; - } - - def _SGPR : SM_Load_Pseudo <opName, - (outs dstClass:$sdst), - (ins baseClass:$sbase, SReg_32:$soffset, CPol:$cpol), - " $sdst, $sbase, $soffset$cpol", []> { - let has_soffset = 1; - let BaseClass = baseClass; - let PseudoInstr = opName # "_SGPR"; - let has_glc = 1; - let has_dlc = 1; - } - - def _SGPR_IMM : SM_Load_Pseudo <opName, - (outs dstClass:$sdst), - (ins baseClass:$sbase, SReg_32:$soffset, - i32imm:$offset, CPol:$cpol), - " $sdst, $sbase, $soffset$offset$cpol", []> { - let has_offset = 1; - let has_soffset = 1; - let BaseClass = baseClass; - let PseudoInstr = opName # "_SGPR_IMM"; - let has_glc = 1; - let has_dlc = 1; - } + def _IMM : SM_Load_Pseudo <opName, baseClass, dstClass, IMM_Offset>; + def _SGPR : SM_Load_Pseudo <opName, baseClass, dstClass, SGPR_Offset>; + def _SGPR_IMM : SM_Load_Pseudo <opName, baseClass, dstClass, SGPR_IMM_Offset>; } multiclass SM_Pseudo_Stores<string opName, @@ -596,10 +571,10 @@ class SMEM_Real_vi <bits<8> op, SM_Pseudo ps> soffset{6-0}, ?); } -class SMEM_Real_Load_vi<bits<8> op, string ps, dag offsets> - : SMEM_Real_vi<op, !cast<SM_Pseudo>(ps)> { - RegisterClass BaseClass = !cast<SM_Load_Pseudo>(ps).BaseClass; - let InOperandList = !con((ins BaseClass:$sbase), offsets, (ins CPol:$cpol)); +class SMEM_Real_Load_vi<bits<8> op, string ps, OffsetMode offsets> + : SMEM_Real_vi<op, !cast<SM_Pseudo>(ps # offsets.Variant)> { + RegisterClass BaseClass = !cast<SM_Load_Pseudo>(ps # offsets.Variant).BaseClass; + let InOperandList = !con((ins BaseClass:$sbase), offsets.Ins, (ins CPol:$cpol)); } // The alternative GFX9 SGPR encoding using soffset to encode the @@ -614,14 +589,12 @@ class SMEM_Real_SGPR_alt_gfx9 { } multiclass SM_Real_Loads_vi<bits<8> op, string ps> { - def _IMM_vi : SMEM_Real_Load_vi <op, ps#"_IMM", (ins smem_offset:$offset)>; - def _SGPR_vi : SMEM_Real_Load_vi <op, ps#"_SGPR", (ins SReg_32:$soffset)>; - def _SGPR_alt_gfx9 : SMEM_Real_Load_vi <op, ps#"_SGPR", - (ins SReg_32:$soffset)>, + def _IMM_vi : SMEM_Real_Load_vi <op, ps, IMM_Offset>; + def _SGPR_vi : SMEM_Real_Load_vi <op, ps, SGPR_Offset>; + def _SGPR_alt_gfx9 : SMEM_Real_Load_vi <op, ps, SGPR_Offset>, SMEM_Real_SGPR_alt_gfx9; let IsGFX9SpecificEncoding = true in - def _SGPR_IMM_gfx9 : SMEM_Real_Load_vi < - op, ps#"_SGPR_IMM", (ins SReg_32:$soffset, smem_offset_mod:$offset)>; + def _SGPR_IMM_gfx9 : SMEM_Real_Load_vi <op, ps, SGPR_IMM_Offset>; } class SMEM_Real_Store_Base_vi <bits<8> op, SM_Pseudo ps> : SMEM_Real_vi <op, ps> { @@ -883,6 +856,7 @@ def smrd_load : PatFrag <(ops node:$ptr), (load node:$ptr), [{ return isUniformL def SMRDImm : ComplexPattern<iPTR, 2, "SelectSMRDImm">; def SMRDImm32 : ComplexPattern<iPTR, 2, "SelectSMRDImm32">; def SMRDSgpr : ComplexPattern<iPTR, 2, "SelectSMRDSgpr">; +def SMRDSgprImm : ComplexPattern<iPTR, 3, "SelectSMRDSgprImm">; def SMRDBufferImm : ComplexPattern<iPTR, 1, "SelectSMRDBufferImm">; def SMRDBufferImm32 : ComplexPattern<iPTR, 1, "SelectSMRDBufferImm32">; @@ -903,11 +877,18 @@ multiclass SMRD_Pattern <string Instr, ValueType vt> { // 3. SGPR offset def : GCNPat < - (smrd_load (SMRDSgpr i64:$sbase, i32:$offset)), - (vt (!cast<SM_Pseudo>(Instr#"_SGPR") $sbase, $offset, 0)) + (smrd_load (SMRDSgpr i64:$sbase, i32:$soffset)), + (vt (!cast<SM_Pseudo>(Instr#"_SGPR") $sbase, $soffset, 0)) >; - // 4. No offset + // 4. SGPR+IMM offset + def : GCNPat < + (smrd_load (SMRDSgprImm i64:$sbase, i32:$soffset, i32:$offset)), + (vt (!cast<SM_Pseudo>(Instr#"_SGPR_IMM") $sbase, $soffset, $offset, 0))> { + let OtherPredicates = [isGFX9Plus]; + } + + // 5. No offset def : GCNPat < (vt (smrd_load (i64 SReg_64:$sbase))), (vt (!cast<SM_Pseudo>(Instr#"_IMM") i64:$sbase, 0, 0)) @@ -1021,19 +1002,16 @@ class SMEM_Real_gfx10<bits<8> op, SM_Pseudo ps> let Inst{16} = !if(ps.has_glc, cpol{CPolBit.GLC}, ?); } -multiclass SM_Real_Loads_gfx10<bits<8> op, string ps, - SM_Load_Pseudo immPs = !cast<SM_Load_Pseudo>(ps#_IMM), - SM_Load_Pseudo sgprPs = !cast<SM_Load_Pseudo>(ps#_SGPR)> { - def _IMM_gfx10 : SMEM_Real_gfx10<op, immPs> { - let InOperandList = (ins immPs.BaseClass:$sbase, smem_offset:$offset, CPol:$cpol); - } - def _SGPR_gfx10 : SMEM_Real_gfx10<op, sgprPs> { - let InOperandList = (ins sgprPs.BaseClass:$sbase, SReg_32:$soffset, CPol:$cpol); - } - def _SGPR_IMM_gfx10 : SMEM_Real_gfx10<op, !cast<SM_Load_Pseudo>(ps#_SGPR_IMM)> { - let InOperandList = (ins sgprPs.BaseClass:$sbase, SReg_32:$soffset, - smem_offset_mod:$offset, CPol:$cpol); - } +class SMEM_Real_Load_gfx10<bits<8> op, string ps, OffsetMode offsets> + : SMEM_Real_gfx10<op, !cast<SM_Pseudo>(ps # offsets.Variant)> { + RegisterClass BaseClass = !cast<SM_Load_Pseudo>(ps # offsets.Variant).BaseClass; + let InOperandList = !con((ins BaseClass:$sbase), offsets.Ins, (ins CPol:$cpol)); +} + +multiclass SM_Real_Loads_gfx10<bits<8> op, string ps> { + def _IMM_gfx10 : SMEM_Real_Load_gfx10<op, ps, IMM_Offset>; + def _SGPR_gfx10 : SMEM_Real_Load_gfx10<op, ps, SGPR_Offset>; + def _SGPR_IMM_gfx10 : SMEM_Real_Load_gfx10<op, ps, SGPR_IMM_Offset>; } class SMEM_Real_Store_gfx10<bits<8> op, SM_Pseudo ps> : SMEM_Real_gfx10<op, ps> { @@ -1227,17 +1205,16 @@ class SMEM_Real_gfx11<bits<8> op, SM_Pseudo ps, string opName = ps.Mnemonic> : let Inst{14} = !if(ps.has_glc, cpol{CPolBit.GLC}, 0); } -class SMEM_Real_Load_gfx11<bits<8> op, string ps, string opName, dag offsets> : - SMEM_Real_gfx11<op, !cast<SM_Pseudo>(ps), opName> { - RegisterClass BaseClass = !cast<SM_Load_Pseudo>(ps).BaseClass; - let InOperandList = !con((ins BaseClass:$sbase), offsets, (ins CPol:$cpol)); +class SMEM_Real_Load_gfx11<bits<8> op, string ps, string opName, OffsetMode offsets> : + SMEM_Real_gfx11<op, !cast<SM_Pseudo>(ps # offsets.Variant), opName> { + RegisterClass BaseClass = !cast<SM_Load_Pseudo>(ps # offsets.Variant).BaseClass; + let InOperandList = !con((ins BaseClass:$sbase), offsets.Ins, (ins CPol:$cpol)); } multiclass SM_Real_Loads_gfx11<bits<8> op, string ps, string opName> { - def _IMM_gfx11 : SMEM_Real_Load_gfx11<op, ps#"_IMM", opName, (ins smem_offset:$offset)>; - def _SGPR_gfx11 : SMEM_Real_Load_gfx11<op, ps#"_SGPR", opName, (ins SReg_32:$soffset)>; - def _SGPR_IMM_gfx11 : SMEM_Real_Load_gfx11< - op, ps#"_SGPR_IMM", opName, (ins SReg_32:$soffset, smem_offset_mod:$offset)>; + def _IMM_gfx11 : SMEM_Real_Load_gfx11<op, ps, opName, IMM_Offset>; + def _SGPR_gfx11 : SMEM_Real_Load_gfx11<op, ps, opName, SGPR_Offset>; + def _SGPR_IMM_gfx11 : SMEM_Real_Load_gfx11<op, ps, opName, SGPR_IMM_Offset>; def : MnemonicAlias<!cast<SM_Pseudo>(ps#"_IMM").Mnemonic, opName>, Requires<[isGFX11Plus]>; } diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp index 2f334e211181..b5fb390c08e1 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp @@ -417,9 +417,9 @@ bool getMAIIsGFX940XDL(unsigned Opc) { CanBeVOPD getCanBeVOPD(unsigned Opc) { const VOPDComponentInfo *Info = getVOPDComponentHelper(Opc); if (Info) - return {Info->CanBeVOPDX, 1}; + return {Info->CanBeVOPDX, true}; else - return {0, 0}; + return {false, false}; } unsigned getVOPDOpcode(unsigned Opc) { diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUMemoryUtils.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUMemoryUtils.h index 65ed02ca62de..a2d59abd3abb 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUMemoryUtils.h +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUMemoryUtils.h @@ -30,7 +30,7 @@ namespace AMDGPU { Align getAlign(DataLayout const &DL, const GlobalVariable *GV); std::vector<GlobalVariable *> findVariablesToLower(Module &M, - const Function *F = nullptr); + const Function *F); /// Replace all uses of constant \p C with instructions in \p F. void replaceConstantUsesInFunction(ConstantExpr *C, const Function *F); diff --git a/llvm/lib/Target/AMDGPU/VOP1Instructions.td b/llvm/lib/Target/AMDGPU/VOP1Instructions.td index 1d374a9f90ba..73e4eb8cdc24 100644 --- a/llvm/lib/Target/AMDGPU/VOP1Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP1Instructions.td @@ -499,6 +499,59 @@ let SubtargetPredicate = isGFX9Only in { defm V_SCREEN_PARTITION_4SE_B32 : VOP1Inst <"v_screen_partition_4se_b32", VOP_I32_I32>; } // End SubtargetPredicate = isGFX9Only +class VOPProfile_Base_CVT_F32_F8<ValueType vt> : VOPProfileI2F <vt, i32> { + let HasExtSDWA = 1; + let HasExtSDWA9 = 1; + let HasExt = 1; + let DstRCSDWA = getVALUDstForVT<vt>.ret; + let InsSDWA = (ins Bin32SDWAInputMods:$src0_modifiers, Src0SDWA:$src0, + clampmod:$clamp, omod:$omod, src0_sel:$src0_sel); + let AsmSDWA = "$vdst, $src0_modifiers$clamp$omod $src0_sel"; // No dst_sel + let AsmSDWA9 = AsmSDWA; + let EmitDstSel = 0; +} + +def VOPProfileCVT_F32_F8 : VOPProfile_Base_CVT_F32_F8 <f32>; +def VOPProfileCVT_PK_F32_F8 : VOPProfile_Base_CVT_F32_F8 <v2f32>; + +let SubtargetPredicate = HasFP8Insts, mayRaiseFPException = 0, + SchedRW = [WriteFloatCvt] in { + defm V_CVT_F32_FP8 : VOP1Inst<"v_cvt_f32_fp8", VOPProfileCVT_F32_F8>; + defm V_CVT_F32_BF8 : VOP1Inst<"v_cvt_f32_bf8", VOPProfileCVT_F32_F8>; + defm V_CVT_PK_F32_FP8 : VOP1Inst<"v_cvt_pk_f32_fp8", VOPProfileCVT_PK_F32_F8>; + defm V_CVT_PK_F32_BF8 : VOP1Inst<"v_cvt_pk_f32_bf8", VOPProfileCVT_PK_F32_F8>; +} + +class Cvt_F32_F8_Pat<SDPatternOperator node, int index, + VOP1_Pseudo inst_e32, VOP1_SDWA_Pseudo inst_sdwa> : GCNPat< + (f32 (node i32:$src, index)), + !if (index, + (inst_sdwa 0, $src, 0, 0, index), + (inst_e32 $src)) +>; + +foreach Index = [0, 1, 2, 3] in { + def : Cvt_F32_F8_Pat<int_amdgcn_cvt_f32_fp8, Index, + V_CVT_F32_FP8_e32, V_CVT_F32_FP8_sdwa>; + def : Cvt_F32_F8_Pat<int_amdgcn_cvt_f32_bf8, Index, + V_CVT_F32_BF8_e32, V_CVT_F32_BF8_sdwa>; +} + +class Cvt_PK_F32_F8_Pat<SDPatternOperator node, int index, + VOP1_Pseudo inst_e32, VOP1_SDWA_Pseudo inst_sdwa> : GCNPat< + (v2f32 (node i32:$src, index)), + !if (index, + (inst_sdwa 0, $src, 0, 0, SDWA.WORD_1), + (inst_e32 $src)) +>; + +foreach Index = [0, -1] in { + def : Cvt_PK_F32_F8_Pat<int_amdgcn_cvt_pk_f32_fp8, Index, + V_CVT_PK_F32_FP8_e32, V_CVT_PK_F32_FP8_sdwa>; + def : Cvt_PK_F32_F8_Pat<int_amdgcn_cvt_pk_f32_bf8, Index, + V_CVT_PK_F32_BF8_e32, V_CVT_PK_F32_BF8_sdwa>; +} + let SubtargetPredicate = isGFX10Plus in { defm V_PIPEFLUSH : VOP1Inst<"v_pipeflush", VOP_NO_EXT<VOP_NONE>>; @@ -1106,11 +1159,36 @@ multiclass VOP1_Real_gfx9 <bits<10> op> { } +multiclass VOP1_Real_NoDstSel_SDWA_gfx9 <bits<10> op> { + let AssemblerPredicate = isGFX9Only, DecoderNamespace = "GFX9" in { + defm NAME : VOP1_Real_e32e64_vi <op>; + } + + foreach _ = BoolToList<!cast<VOP1_Pseudo>(NAME#"_e32").Pfl.HasExtSDWA9>.ret in + def _sdwa_gfx9 : + VOP_SDWA9_Real <!cast<VOP1_SDWA_Pseudo>(NAME#"_sdwa")>, + VOP1_SDWA9Ae <op{7-0}, !cast<VOP1_SDWA_Pseudo>(NAME#"_sdwa").Pfl> { + let Inst{42-40} = 6; + } + + foreach _ = BoolToList<!cast<VOP1_Pseudo>(NAME#"_e32").Pfl.HasExtDPP>.ret in + def _dpp_gfx9 : + VOP_DPP_Real<!cast<VOP1_DPP_Pseudo>(NAME#"_dpp"), SIEncodingFamily.GFX9>, + VOP1_DPPe<op{7-0}, !cast<VOP1_DPP_Pseudo>(NAME#"_dpp")>; +} + defm V_SCREEN_PARTITION_4SE_B32 : VOP1_Real_gfx9 <0x37>; let AssemblerPredicate = isGFX940Plus, DecoderNamespace = "GFX9" in defm V_MOV_B64 : VOP1_Real_gfx9 <0x38>; +let OtherPredicates = [HasFP8Insts] in { +defm V_CVT_F32_FP8 : VOP1_Real_NoDstSel_SDWA_gfx9<0x54>; +defm V_CVT_F32_BF8 : VOP1_Real_NoDstSel_SDWA_gfx9<0x55>; +defm V_CVT_PK_F32_FP8 : VOP1_Real_NoDstSel_SDWA_gfx9<0x56>; +defm V_CVT_PK_F32_BF8 : VOP1_Real_NoDstSel_SDWA_gfx9<0x57>; +} + //===----------------------------------------------------------------------===// // GFX10 //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/llvm/lib/Target/AMDGPU/VOP3Instructions.td index dddd0aacc140..a911483cade5 100644 --- a/llvm/lib/Target/AMDGPU/VOP3Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP3Instructions.td @@ -481,6 +481,30 @@ def shl_0_to_4 : PatFrag< }]; } +def VOP3_CVT_PK_F8_F32_Profile : VOP3_Profile<VOP_I32_F32_F32, VOP3_OPSEL> { + let InsVOP3OpSel = (ins FP32InputMods:$src0_modifiers, Src0RC64:$src0, + FP32InputMods:$src1_modifiers, Src1RC64:$src1, + VGPR_32:$vdst_in, op_sel0:$op_sel); + let HasClamp = 0; + let HasExtVOP3DPP = 0; +} + +def VOP3_CVT_SR_F8_F32_Profile : VOP3_Profile<VOPProfile<[i32, f32, i32, f32]>, + VOP3_OPSEL> { + let InsVOP3OpSel = (ins FP32InputMods:$src0_modifiers, Src0RC64:$src0, + FP32InputMods:$src1_modifiers, Src1RC64:$src1, + FP32InputMods:$src2_modifiers, VGPR_32:$src2, + op_sel0:$op_sel); + let HasClamp = 0; + let HasSrc2 = 0; + let HasSrc2Mods = 1; + let AsmVOP3OpSel = !subst(", $src2_modifiers", "", + getAsmVOP3OpSel<3, HasClamp, + HasSrc0FloatMods, HasSrc1FloatMods, + HasSrc2FloatMods>.ret); + let HasExtVOP3DPP = 0; +} + let SubtargetPredicate = isGFX9Plus in { let isCommutable = 1, isReMaterializable = 1 in { defm V_ADD3_U32 : VOP3Inst <"v_add3_u32", VOP3_Profile<VOP_I32_I32_I32_I32>>; @@ -526,6 +550,43 @@ defm V_LSHL_OR_B32 : VOP3Inst <"v_lshl_or_b32", VOP3_Profile<VOP_I32_I32_I32_I32 let SubtargetPredicate = isGFX940Plus in defm V_LSHL_ADD_U64 : VOP3Inst <"v_lshl_add_u64", VOP3_Profile<VOP_I64_I64_I32_I64>>; +let SubtargetPredicate = HasFP8Insts, mayRaiseFPException = 0, + SchedRW = [WriteFloatCvt] in { + let Constraints = "$vdst = $vdst_in", DisableEncoding = "$vdst_in" in { + defm V_CVT_PK_FP8_F32 : VOP3Inst<"v_cvt_pk_fp8_f32", VOP3_CVT_PK_F8_F32_Profile>; + defm V_CVT_PK_BF8_F32 : VOP3Inst<"v_cvt_pk_bf8_f32", VOP3_CVT_PK_F8_F32_Profile>; + } + + // These instructions have non-standard use of op_sel. In particular they are + // using op_sel bits 2 and 3 while only having two sources. Therefore dummy + // src2 is used to hold the op_sel value. + let Constraints = "$vdst = $src2", DisableEncoding = "$src2" in { + defm V_CVT_SR_FP8_F32 : VOP3Inst<"v_cvt_sr_fp8_f32", VOP3_CVT_SR_F8_F32_Profile>; + defm V_CVT_SR_BF8_F32 : VOP3Inst<"v_cvt_sr_bf8_f32", VOP3_CVT_SR_F8_F32_Profile>; + } +} + +class Cvt_PK_F8_F32_Pat<SDPatternOperator node, int index, VOP3_Pseudo inst> : GCNPat< + (i32 (node f32:$src0, f32:$src1, i32:$old, index)), + (inst !if(index, SRCMODS.DST_OP_SEL, 0), $src0, 0, $src1, $old, !if(index, SRCMODS.OP_SEL_0, 0)) +>; + +class Cvt_SR_F8_F32_Pat<SDPatternOperator node, bits<2> index, VOP3_Pseudo inst> : GCNPat< + (i32 (node f32:$src0, i32:$src1, i32:$old, index)), + (inst !if(index{1}, SRCMODS.DST_OP_SEL, 0), $src0, 0, $src1, + !if(index{0}, SRCMODS.OP_SEL_0, 0), $old, !if(index{1}, SRCMODS.OP_SEL_0, 0)) +>; + +foreach Index = [0, -1] in { + def : Cvt_PK_F8_F32_Pat<int_amdgcn_cvt_pk_fp8_f32, Index, V_CVT_PK_FP8_F32_e64>; + def : Cvt_PK_F8_F32_Pat<int_amdgcn_cvt_pk_bf8_f32, Index, V_CVT_PK_BF8_F32_e64>; +} + +foreach Index = [0, 1, 2, 3] in { + def : Cvt_SR_F8_F32_Pat<int_amdgcn_cvt_sr_fp8_f32, Index, V_CVT_SR_FP8_F32_e64>; + def : Cvt_SR_F8_F32_Pat<int_amdgcn_cvt_sr_bf8_f32, Index, V_CVT_SR_BF8_F32_e64>; +} + class ThreeOp_i32_Pats <SDPatternOperator op1, SDPatternOperator op2, Instruction inst> : GCNPat < // This matches (op2 (op1 i32:$src0, i32:$src1), i32:$src2) with conditions. (ThreeOpFrag<op1, op2> i32:$src0, i32:$src1, i32:$src2), @@ -699,15 +760,19 @@ def : DivFmasPat<f64, V_DIV_FMAS_F64_e64, VCC_LO>; } class VOP3_DOT_Profile<VOPProfile P, VOP3Features Features = VOP3_REGULAR> : VOP3_Profile<P, Features> { - // FIXME VOP3 DPP versions are unsupported - let HasExtVOP3DPP = 0; let HasClamp = 0; let HasOMod = 0; - let InsVOP3OpSel = getInsVOP3OpSel<Src0RC64, Src1RC64, Src2RC64, - NumSrcArgs, HasClamp, HasOMod, - !if(isFloatType<Src0VT>.ret, FPVRegInputMods, IntOpSelMods), - !if(isFloatType<Src1VT>.ret, FPVRegInputMods, IntOpSelMods), - !if(isFloatType<Src2VT>.ret, FPVRegInputMods, IntOpSelMods)>.ret; + // Override modifiers for bf16(i16) (same as float modifiers). + let HasSrc0Mods = 1; + let HasSrc1Mods = 1; + let HasSrc2Mods = 1; + let Src0ModDPP = FPVRegInputMods; + let Src1ModDPP = FPVRegInputMods; + let Src2ModVOP3DPP = FPVRegInputMods; + let InsVOP3OpSel = getInsVOP3OpSel<Src0RC64, Src1RC64, Src2RC64, NumSrcArgs, + HasClamp, HasOMod, FPVRegInputMods, + FPVRegInputMods, FPVRegInputMods>.ret; + let AsmVOP3OpSel = getAsmVOP3OpSel<NumSrcArgs, HasClamp, 1, 1, 1>.ret; } let SubtargetPredicate = isGFX11Plus in { @@ -723,7 +788,7 @@ let SubtargetPredicate = isGFX11Plus in { defm V_CVT_PK_U16_F32 : VOP3Inst<"v_cvt_pk_u16_f32", VOP3_Profile<VOP_V2I16_F32_F32>>; } // End SubtargetPredicate = isGFX11Plus -let SubtargetPredicate = HasDot8Insts in { +let SubtargetPredicate = HasDot8Insts, IsDOT=1 in { defm V_DOT2_F16_F16 : VOP3Inst<"v_dot2_f16_f16", VOP3_DOT_Profile<VOP_F16_V2F16_V2F16_F16>, int_amdgcn_fdot2_f16_f16>; defm V_DOT2_BF16_BF16 : VOP3Inst<"v_dot2_bf16_bf16", VOP3_DOT_Profile<VOP_I16_V2I16_V2I16_I16>, int_amdgcn_fdot2_bf16_bf16>; } @@ -848,9 +913,8 @@ defm V_MAXMIN_U32 : VOP3_Realtriple_gfx11<0x262>; defm V_MINMAX_U32 : VOP3_Realtriple_gfx11<0x263>; defm V_MAXMIN_I32 : VOP3_Realtriple_gfx11<0x264>; defm V_MINMAX_I32 : VOP3_Realtriple_gfx11<0x265>; -// FIXME VOP3 DPP Dot instructions are unsupported -defm V_DOT2_F16_F16 : VOP3_Real_Base_gfx11<0x266>; -defm V_DOT2_BF16_BF16 : VOP3_Real_Base_gfx11<0x267>; +defm V_DOT2_F16_F16 : VOP3Dot_Realtriple_gfx11<0x266>; +defm V_DOT2_BF16_BF16 : VOP3Dot_Realtriple_gfx11<0x267>; defm V_DIV_SCALE_F32 : VOP3be_Real_gfx11<0x2fc, "V_DIV_SCALE_F32", "v_div_scale_f32">; defm V_DIV_SCALE_F64 : VOP3be_Real_gfx11<0x2fd, "V_DIV_SCALE_F64", "v_div_scale_f64">; defm V_MAD_U64_U32_gfx11 : VOP3be_Real_gfx11<0x2fe, "V_MAD_U64_U32_gfx11", "v_mad_u64_u32">; @@ -1161,6 +1225,13 @@ multiclass VOP3OpSel_Real_gfx9<bits<10> op> { VOP3OpSel_gfx9 <op, !cast<VOP_Pseudo>(NAME#"_e64").Pfl>; } +multiclass VOP3OpSel_Real_gfx9_forced_opsel2<bits<10> op> { + def _vi : VOP3_Real<!cast<VOP_Pseudo>(NAME#"_e64"), SIEncodingFamily.VI>, + VOP3OpSel_gfx9 <op, !cast<VOP_Pseudo>(NAME#"_e64").Pfl> { + let Inst{13} = src2_modifiers{2}; // op_sel(2) + } +} + multiclass VOP3Interp_Real_vi<bits<10> op> { def _vi : VOP3_Real<!cast<VOP_Pseudo>(NAME), SIEncodingFamily.VI>, VOP3Interp_vi <op, !cast<VOP_Pseudo>(NAME).Pfl>; @@ -1352,3 +1423,10 @@ defm V_CVT_PKNORM_I16_F16 : VOP3OpSel_Real_gfx9 <0x299>; defm V_CVT_PKNORM_U16_F16 : VOP3OpSel_Real_gfx9 <0x29a>; defm V_LSHL_ADD_U64 : VOP3_Real_vi <0x208>; + +let OtherPredicates = [HasFP8Insts] in { +defm V_CVT_PK_FP8_F32 : VOP3OpSel_Real_gfx9 <0x2a2>; +defm V_CVT_PK_BF8_F32 : VOP3OpSel_Real_gfx9 <0x2a3>; +defm V_CVT_SR_FP8_F32 : VOP3OpSel_Real_gfx9_forced_opsel2 <0x2a4>; +defm V_CVT_SR_BF8_F32 : VOP3OpSel_Real_gfx9_forced_opsel2 <0x2a5>; +} diff --git a/llvm/lib/Target/AMDGPU/VOP3PInstructions.td b/llvm/lib/Target/AMDGPU/VOP3PInstructions.td index 59ce532af59b..f1ce613d613b 100644 --- a/llvm/lib/Target/AMDGPU/VOP3PInstructions.td +++ b/llvm/lib/Target/AMDGPU/VOP3PInstructions.td @@ -493,6 +493,8 @@ def VOPProfileMAI_I32_I64_X16 : VOPProfileMAI<VOP_V4I32_I64_I64_V4I32, A def VOPProfileMAI_I32_I64_X32 : VOPProfileMAI<VOP_V16I32_I64_I64_V16I32, AISrc_512_b32, ADst_512, AVSrc_64>; def VOPProfileMAI_F32_V2F32_X16 : VOPProfileMAI<VOP_V4F32_V2F32_V2F32_V4F32, AISrc_128_b32, ADst_128, AVSrc_64>; def VOPProfileMAI_F32_V2F32_X32 : VOPProfileMAI<VOP_V16F32_V2F32_V2F32_V16F32, AISrc_512_b32, ADst_512, AVSrc_64>; +def VOPProfileMAI_F32_I64_X32 : VOPProfileMAI<VOP_V4F32_I64_I64_V4F32, AISrc_128_b32, ADst_128, AVSrc_64>; +def VOPProfileMAI_F32_I64_X16 : VOPProfileMAI<VOP_V16F32_I64_I64_V16F32, AISrc_512_b32, ADst_512, AVSrc_64>; def VOPProfileMAI_F32_F32_X4_VCD : VOPProfileMAI<VOP_V4F32_F32_F32_V4F32, VISrc_128_f32, VDst_128>; def VOPProfileMAI_F32_F32_X16_VCD : VOPProfileMAI<VOP_V16F32_F32_F32_V16F32, VISrc_512_f32, VDst_512>; @@ -515,6 +517,8 @@ def VOPProfileMAI_I32_I64_X16_VCD : VOPProfileMAI<VOP_V4I32_I64_I64_V4I32, def VOPProfileMAI_I32_I64_X32_VCD : VOPProfileMAI<VOP_V16I32_I64_I64_V16I32, VISrc_512_b32, VDst_512, AVSrc_64>; def VOPProfileMAI_F32_V2F32_X16_VCD : VOPProfileMAI<VOP_V4F32_V2F32_V2F32_V4F32, VISrc_128_b32, VDst_128, AVSrc_64>; def VOPProfileMAI_F32_V2F32_X32_VCD : VOPProfileMAI<VOP_V16F32_V2F32_V2F32_V16F32, VISrc_512_b32, VDst_512, AVSrc_64>; +def VOPProfileMAI_F32_I64_X32_VCD : VOPProfileMAI<VOP_V4F32_I64_I64_V4F32, VISrc_128_b32, VDst_128, AVSrc_64>; +def VOPProfileMAI_F32_I64_X16_VCD : VOPProfileMAI<VOP_V16F32_I64_I64_V16F32, VISrc_512_b32, VDst_512, AVSrc_64>; def VOPProfileSMFMAC_F32_16X16X32_F16 : VOPProfileSMFMAC<VOP_V4F32_V4F16_V8F16_I32, AVDst_128, AVSrc_64, AVSrc_128>; def VOPProfileSMFMAC_F32_32X32X16_F16 : VOPProfileSMFMAC<VOP_V16F32_V4F16_V8F16_I32, AVDst_512, AVSrc_64, AVSrc_128>; @@ -522,6 +526,8 @@ def VOPProfileSMFMAC_F32_16X16X32_I16 : VOPProfileSMFMAC<VOP_V4F32_V4I16_V8I16_I def VOPProfileSMFMAC_F32_32X32X16_I16 : VOPProfileSMFMAC<VOP_V16F32_V4I16_V8I16_I32, AVDst_512, AVSrc_64, AVSrc_128>; def VOPProfileSMFMAC_I32_16X16X64_I8 : VOPProfileSMFMAC<VOP_V4I32_V2I32_V4I32_I32, AVDst_128, AVSrc_64, AVSrc_128>; def VOPProfileSMFMAC_I32_32X32X32_I8 : VOPProfileSMFMAC<VOP_V16I32_V2I32_V4I32_I32, AVDst_512, AVSrc_64, AVSrc_128>; +def VOPProfileSMFMAC_F32_16X16X64_F8 : VOPProfileSMFMAC<VOP_V4F32_V2I32_V4I32_I32, AVDst_128, AVSrc_64, AVSrc_128>; +def VOPProfileSMFMAC_F32_32X32X32_F8 : VOPProfileSMFMAC<VOP_V16F32_V2I32_V4I32_I32, AVDst_512, AVSrc_64, AVSrc_128>; class MFMATable <bit is_mac, string Name> { bit IsMac = is_mac; @@ -638,6 +644,14 @@ let Predicates = [isGFX940Plus], is_gfx940_xdl = 1 in { defm V_MFMA_I32_16X16X32I8 : MAIInst<"v_mfma_i32_16x16x32i8", "I32_I64_X16", int_amdgcn_mfma_i32_16x16x32_i8>; defm V_MFMA_F32_16X16X8XF32 : MAIInst<"v_mfma_f32_16x16x8xf32", "F32_V2F32_X16", int_amdgcn_mfma_f32_16x16x8_xf32>; defm V_MFMA_F32_32X32X4XF32 : MAIInst<"v_mfma_f32_32x32x4xf32", "F32_V2F32_X32", int_amdgcn_mfma_f32_32x32x4_xf32>; + defm V_MFMA_F32_16X16X32_BF8_BF8 : MAIInst<"v_mfma_f32_16x16x32_bf8_bf8", "F32_I64_X32", int_amdgcn_mfma_f32_16x16x32_bf8_bf8>; + defm V_MFMA_F32_16X16X32_BF8_FP8 : MAIInst<"v_mfma_f32_16x16x32_bf8_fp8", "F32_I64_X32", int_amdgcn_mfma_f32_16x16x32_bf8_fp8>; + defm V_MFMA_F32_16X16X32_FP8_BF8 : MAIInst<"v_mfma_f32_16x16x32_fp8_bf8", "F32_I64_X32", int_amdgcn_mfma_f32_16x16x32_fp8_bf8>; + defm V_MFMA_F32_16X16X32_FP8_FP8 : MAIInst<"v_mfma_f32_16x16x32_fp8_fp8", "F32_I64_X32", int_amdgcn_mfma_f32_16x16x32_fp8_fp8>; + defm V_MFMA_F32_32X32X16_BF8_BF8 : MAIInst<"v_mfma_f32_32x32x16_bf8_bf8", "F32_I64_X16", int_amdgcn_mfma_f32_32x32x16_bf8_bf8>; + defm V_MFMA_F32_32X32X16_BF8_FP8 : MAIInst<"v_mfma_f32_32x32x16_bf8_fp8", "F32_I64_X16", int_amdgcn_mfma_f32_32x32x16_bf8_fp8>; + defm V_MFMA_F32_32X32X16_FP8_BF8 : MAIInst<"v_mfma_f32_32x32x16_fp8_bf8", "F32_I64_X16", int_amdgcn_mfma_f32_32x32x16_fp8_bf8>; + defm V_MFMA_F32_32X32X16_FP8_FP8 : MAIInst<"v_mfma_f32_32x32x16_fp8_fp8", "F32_I64_X16", int_amdgcn_mfma_f32_32x32x16_fp8_fp8>; } // End Predicates = [isGFX940Plus], is_gfx940_xdl = 1 multiclass SMFMACInst<string OpName, string P, SDPatternOperator node> { @@ -654,6 +668,14 @@ defm V_SMFMAC_F32_16X16X32_BF16 : SMFMACInst<"v_smfmac_f32_16x16x32_bf16", defm V_SMFMAC_F32_32X32X16_BF16 : SMFMACInst<"v_smfmac_f32_32x32x16_bf16", "F32_32X32X16_I16", int_amdgcn_smfmac_f32_32x32x16_bf16>; defm V_SMFMAC_I32_16X16X64_I8 : SMFMACInst<"v_smfmac_i32_16x16x64_i8", "I32_16X16X64_I8", int_amdgcn_smfmac_i32_16x16x64_i8>; defm V_SMFMAC_I32_32X32X32_I8 : SMFMACInst<"v_smfmac_i32_32x32x32_i8", "I32_32X32X32_I8", int_amdgcn_smfmac_i32_32x32x32_i8>; +defm V_SMFMAC_F32_16X16X64_BF8_BF8 : SMFMACInst<"v_smfmac_f32_16x16x64_bf8_bf8", "F32_16X16X64_F8", int_amdgcn_smfmac_f32_16x16x64_bf8_bf8>; +defm V_SMFMAC_F32_16X16X64_BF8_FP8 : SMFMACInst<"v_smfmac_f32_16x16x64_bf8_fp8", "F32_16X16X64_F8", int_amdgcn_smfmac_f32_16x16x64_bf8_fp8>; +defm V_SMFMAC_F32_16X16X64_FP8_BF8 : SMFMACInst<"v_smfmac_f32_16x16x64_fp8_bf8", "F32_16X16X64_F8", int_amdgcn_smfmac_f32_16x16x64_fp8_bf8>; +defm V_SMFMAC_F32_16X16X64_FP8_FP8 : SMFMACInst<"v_smfmac_f32_16x16x64_fp8_fp8", "F32_16X16X64_F8", int_amdgcn_smfmac_f32_16x16x64_fp8_fp8>; +defm V_SMFMAC_F32_32X32X32_BF8_BF8 : SMFMACInst<"v_smfmac_f32_32x32x32_bf8_bf8", "F32_32X32X32_F8", int_amdgcn_smfmac_f32_32x32x32_bf8_bf8>; +defm V_SMFMAC_F32_32X32X32_BF8_FP8 : SMFMACInst<"v_smfmac_f32_32x32x32_bf8_fp8", "F32_32X32X32_F8", int_amdgcn_smfmac_f32_32x32x32_bf8_fp8>; +defm V_SMFMAC_F32_32X32X32_FP8_BF8 : SMFMACInst<"v_smfmac_f32_32x32x32_fp8_bf8", "F32_32X32X32_F8", int_amdgcn_smfmac_f32_32x32x32_fp8_bf8>; +defm V_SMFMAC_F32_32X32X32_FP8_FP8 : SMFMACInst<"v_smfmac_f32_32x32x32_fp8_fp8", "F32_32X32X32_F8", int_amdgcn_smfmac_f32_32x32x32_fp8_fp8>; } def MAIInstInfoTable : GenericTable { @@ -1121,6 +1143,14 @@ defm V_MFMA_I32_32X32X16I8 : VOP3P_Real_MFMA_gfx940 <0x56, "v_mfma_i32_32x defm V_MFMA_I32_16X16X32I8 : VOP3P_Real_MFMA_gfx940 <0x57, "v_mfma_i32_16x16x32_i8">; defm V_MFMA_F32_16X16X8XF32 : VOP3P_Real_MFMA_gfx940 <0x3e, "v_mfma_f32_16x16x8_xf32">; defm V_MFMA_F32_32X32X4XF32 : VOP3P_Real_MFMA_gfx940 <0x3f, "v_mfma_f32_32x32x4_xf32">; +defm V_MFMA_F32_16X16X32_BF8_BF8 : VOP3P_Real_MFMA_gfx940 <0x70>; +defm V_MFMA_F32_16X16X32_BF8_FP8 : VOP3P_Real_MFMA_gfx940 <0x71>; +defm V_MFMA_F32_16X16X32_FP8_BF8 : VOP3P_Real_MFMA_gfx940 <0x72>; +defm V_MFMA_F32_16X16X32_FP8_FP8 : VOP3P_Real_MFMA_gfx940 <0x73>; +defm V_MFMA_F32_32X32X16_BF8_BF8 : VOP3P_Real_MFMA_gfx940 <0x74>; +defm V_MFMA_F32_32X32X16_BF8_FP8 : VOP3P_Real_MFMA_gfx940 <0x75>; +defm V_MFMA_F32_32X32X16_FP8_BF8 : VOP3P_Real_MFMA_gfx940 <0x76>; +defm V_MFMA_F32_32X32X16_FP8_FP8 : VOP3P_Real_MFMA_gfx940 <0x77>; defm V_MFMA_F32_32X32X4BF16_1K : VOP3P_Real_MFMA_gfx940 <0x5d, "v_mfma_f32_32x32x4_2b_bf16">; defm V_MFMA_F32_16X16X4BF16_1K : VOP3P_Real_MFMA_gfx940 <0x5e, "v_mfma_f32_16x16x4_4b_bf16">; @@ -1137,6 +1167,14 @@ defm V_SMFMAC_F32_16X16X32_BF16 : VOP3P_Real_SMFMAC <0x66, "v_smfmac_f32_16x1 defm V_SMFMAC_F32_32X32X16_BF16 : VOP3P_Real_SMFMAC <0x68, "v_smfmac_f32_32x32x16bf16">; defm V_SMFMAC_I32_16X16X64_I8 : VOP3P_Real_SMFMAC <0x6a, "v_smfmac_i32_16x16x64i8">; defm V_SMFMAC_I32_32X32X32_I8 : VOP3P_Real_SMFMAC <0x6c, "v_smfmac_i32_32x32x32i8">; +defm V_SMFMAC_F32_16X16X64_BF8_BF8 : VOP3P_Real_SMFMAC <0x78, "v_smfmac_f32_16x16x64bf8bf8">; +defm V_SMFMAC_F32_16X16X64_BF8_FP8 : VOP3P_Real_SMFMAC <0x79, "v_smfmac_f32_16x16x64bf8fp8">; +defm V_SMFMAC_F32_16X16X64_FP8_BF8 : VOP3P_Real_SMFMAC <0x7a, "v_smfmac_f32_16x16x64fp8bf8">; +defm V_SMFMAC_F32_16X16X64_FP8_FP8 : VOP3P_Real_SMFMAC <0x7b, "v_smfmac_f32_16x16x64fp8fp8">; +defm V_SMFMAC_F32_32X32X32_BF8_BF8 : VOP3P_Real_SMFMAC <0x7c, "v_smfmac_f32_32x32x32bf8bf8">; +defm V_SMFMAC_F32_32X32X32_BF8_FP8 : VOP3P_Real_SMFMAC <0x7d, "v_smfmac_f32_32x32x32bf8fp8">; +defm V_SMFMAC_F32_32X32X32_FP8_BF8 : VOP3P_Real_SMFMAC <0x7e, "v_smfmac_f32_32x32x32fp8bf8">; +defm V_SMFMAC_F32_32X32X32_FP8_FP8 : VOP3P_Real_SMFMAC <0x7f, "v_smfmac_f32_32x32x32fp8fp8">; let SubtargetPredicate = HasPackedFP32Ops in { defm V_PK_FMA_F32 : VOP3P_Real_vi <0x30>; diff --git a/llvm/lib/Target/AMDGPU/VOPCInstructions.td b/llvm/lib/Target/AMDGPU/VOPCInstructions.td index 33d3441e94c2..d489a089ac78 100644 --- a/llvm/lib/Target/AMDGPU/VOPCInstructions.td +++ b/llvm/lib/Target/AMDGPU/VOPCInstructions.td @@ -59,15 +59,17 @@ class VOPC_Profile<list<SchedReadWrite> sched, ValueType vt0, ValueType vt1 = vt "$src0, $src1 $dpp_ctrl$row_mask$bank_mask$bound_ctrl"); let AsmDPP8 = "$src0, $src1 $dpp8$fi"; let AsmDPP16 = AsmDPP#"$fi"; + // VOPC DPP Instructions do not need an old operand + let TieRegDPP = ""; let InsDPP = getInsDPP<VOPDstOperand<Src0DPP>, Src0DPP, Src1DPP, Src2DPP, NumSrcArgs, HasModifiers, Src0ModDPP, Src1ModDPP, - Src2ModDPP>.ret; + Src2ModDPP, 0/*HasOld*/>.ret; let InsDPP16 = getInsDPP16<VOPDstOperand<Src0DPP>, Src0DPP, Src1DPP, Src2DPP, NumSrcArgs, HasModifiers, Src0ModDPP, Src1ModDPP, - Src2ModDPP>.ret; + Src2ModDPP, 0/*HasOld*/>.ret; let InsDPP8 = getInsDPP8<VOPDstOperand<Src0DPP>, Src0DPP, Src1DPP, Src2DPP, NumSrcArgs, HasModifiers, Src0ModDPP, Src1ModDPP, - Src2ModDPP>.ret; + Src2ModDPP, 0/*HasOld*/>.ret; // The destination for 32-bit encoding is implicit. let HasDst32 = 0; @@ -76,9 +78,9 @@ class VOPC_Profile<list<SchedReadWrite> sched, ValueType vt0, ValueType vt1 = vt let Outs64 = (outs VOPDstS64orS32:$sdst); let OutsVOP3DPP = Outs64; let OutsVOP3DPP8 = Outs64; - let InsVOP3DPP = getInsVOP3DPP<InsVOP3Base, Src0VOP3DPP, NumSrcArgs>.ret; - let InsVOP3DPP16 = getInsVOP3DPP16<InsVOP3Base, Src0VOP3DPP, NumSrcArgs>.ret; - let InsVOP3DPP8 = getInsVOP3DPP8<InsVOP3Base, Src0VOP3DPP, NumSrcArgs>.ret; + let InsVOP3DPP = getInsVOP3DPP<InsVOP3Base, Src0VOP3DPP, NumSrcArgs, 0/*HasOld*/>.ret; + let InsVOP3DPP16 = getInsVOP3DPP16<InsVOP3Base, Src0VOP3DPP, NumSrcArgs, 0/*HasOld*/>.ret; + let InsVOP3DPP8 = getInsVOP3DPP8<InsVOP3Base, Src0VOP3DPP, NumSrcArgs, 0/*HasOld*/>.ret; list<SchedReadWrite> Schedule = sched; } @@ -293,7 +295,7 @@ multiclass VOPC_Pseudos <string opName, let Defs = !if(DefExec, [EXEC], []); let SchedRW = P.Schedule; let isCompare = 1; - let Constraints = !if(P.NumSrcArgs, P.TieRegDPP # " = $sdst", ""); + let Constraints = ""; } } // end SubtargetPredicate = isGFX11Plus @@ -711,7 +713,7 @@ class VOPC_Class_Profile<list<SchedReadWrite> sched, ValueType vt> : VOPC_Profile<sched, vt, i32> { let AsmDPP = "$src0_modifiers, $src1 $dpp_ctrl$row_mask$bank_mask$bound_ctrl"; let AsmDPP16 = AsmDPP#"$fi"; - let InsDPP = (ins VGPR_32:$old, FPVRegInputMods:$src0_modifiers, VGPR_32:$src0, VGPR_32:$src1, dpp_ctrl:$dpp_ctrl, row_mask:$row_mask, bank_mask:$bank_mask, bound_ctrl:$bound_ctrl); + let InsDPP = (ins FPVRegInputMods:$src0_modifiers, VGPR_32:$src0, VGPR_32:$src1, dpp_ctrl:$dpp_ctrl, row_mask:$row_mask, bank_mask:$bank_mask, bound_ctrl:$bound_ctrl); let InsDPP16 = !con(InsDPP, (ins FI:$fi)); // DPP8 forbids modifiers and can inherit from VOPC_Profile @@ -793,7 +795,7 @@ multiclass VOPC_Class_Pseudos <string opName, VOPC_Profile p, bit DefExec, def _e64_dpp : VOP3_DPP_Pseudo<opName, p> { let Defs = !if(DefExec, [EXEC], []); let SchedRW = p.Schedule; - let Constraints = !if(p.NumSrcArgs, p.TieRegDPP # " = $sdst", ""); + let Constraints = ""; } } // end SubtargetPredicate = isGFX11Plus } @@ -1068,7 +1070,6 @@ class VOPC_DPP16<bits<8> op, VOP_DPP_Pseudo ps, string opName = ps.OpName> let Uses = ps.Uses; let OtherPredicates = ps.OtherPredicates; let Constraints = ps.Constraints; - let AsmMatchConverter = "cvtVOPCNoDstDPP"; } class VOPC_DPP16_SIMC<bits<8> op, VOP_DPP_Pseudo ps, int subtarget, @@ -1084,7 +1085,6 @@ class VOPC_DPP8<bits<8> op, VOPC_Pseudo ps, string opName = ps.OpName> let Uses = ps.Uses; let OtherPredicates = ps.OtherPredicates; let Constraints = ""; - let AsmMatchConverter = "cvtVOPCNoDstDPP8"; } // VOPC64 @@ -1133,7 +1133,6 @@ class VOPC64_DPP16_NoDst<bits<10> op, VOP_DPP_Pseudo ps, string opName = ps.OpName> : VOPC64_DPP16<op, ps, opName> { let Inst{7-0} = ? ; - let AsmMatchConverter = "cvtVOPC64NoDstDPP"; } class VOPC64_DPP8_Base<bits<10> op, string OpName, VOPProfile P> @@ -1163,13 +1162,12 @@ class VOPC64_DPP8_Dst<bits<10> op, VOP_Pseudo ps, string opName = ps.OpName> : VOPC64_DPP8<op, ps, opName> { bits<8> sdst; let Inst{7-0} = sdst; - let Constraints = "$old = $sdst"; + let Constraints = ""; } class VOPC64_DPP8_NoDst<bits<10> op, VOP_Pseudo ps, string opName = ps.OpName> : VOPC64_DPP8<op, ps, opName> { let Inst{7-0} = ? ; - let AsmMatchConverter = "cvtVOPC64NoDstDPP8"; let Constraints = ""; } diff --git a/llvm/lib/Target/AMDGPU/VOPInstructions.td b/llvm/lib/Target/AMDGPU/VOPInstructions.td index 187485ffa3ae..b65ca2d6b1b3 100644 --- a/llvm/lib/Target/AMDGPU/VOPInstructions.td +++ b/llvm/lib/Target/AMDGPU/VOPInstructions.td @@ -269,6 +269,10 @@ class VOP3OpSel_gfx10<bits<10> op, VOPProfile p> : VOP3e_gfx10<op, p> { class VOP3OpSel_gfx11<bits<10> op, VOPProfile p> : VOP3OpSel_gfx10<op, p>; +class VOP3DotOpSel_gfx11<bits<10> op, VOPProfile p> : VOP3OpSel_gfx11<op, p>{ + let Inst{11} = ?; + let Inst{12} = ?; +} // NB: For V_INTERP* opcodes, src0 is encoded as src1 and vice versa class VOP3Interp_vi <bits<10> op, VOPProfile P> : VOP3e_vi <op, P> { @@ -1270,6 +1274,8 @@ multiclass VOP3Inst<string OpName, VOPProfile P, SDPatternOperator node = null_f class Base_VOP3_DPP16<bits<10> op, VOP_DPP_Pseudo ps, string opName = ps.OpName> : VOP3_DPP<op, opName, ps.Pfl, 1> { + let VOP3_OPSEL = ps.Pfl.HasOpSel; + let IsDOT = ps.IsDOT; let hasSideEffects = ps.hasSideEffects; let Defs = ps.Defs; let SchedRW = ps.SchedRW; @@ -1285,6 +1291,8 @@ class VOP3_DPP16<bits<10> op, VOP_DPP_Pseudo ps, int subtarget, class Base_VOP3_DPP8<bits<10> op, VOP_Pseudo ps, string opName = ps.OpName> : VOP3_DPP8<op, opName, ps.Pfl> { + let VOP3_OPSEL = ps.Pfl.HasOpSel; + let IsDOT = ps.IsDOT; let hasSideEffects = ps.hasSideEffects; let Defs = ps.Defs; let SchedRW = ps.SchedRW; @@ -1326,6 +1334,15 @@ let AssemblerPredicate = isGFX11Only, VOP3e_gfx11<op, ps.Pfl>; } } + multiclass VOP3Dot_Real_Base_gfx11<bits<10> op, string opName = NAME, + bit isSingle = 0> { + defvar ps = !cast<VOP_Pseudo>(opName#"_e64"); + let IsSingle = !or(isSingle, ps.Pfl.IsSingle) in { + def _e64_gfx11 : + VOP3_Real<ps, SIEncodingFamily.GFX11>, + VOP3DotOpSel_gfx11<op, ps.Pfl>; + } + } multiclass VOP3_Real_with_name_gfx11<bits<10> op, string opName, string asmName, bit isSingle = 0> { defvar ps = !cast<VOP_Pseudo>(opName#"_e64"); @@ -1355,6 +1372,15 @@ let AssemblerPredicate = isGFX11Only, let DecoderNamespace = "DPPGFX11"; } } + + multiclass VOP3Dot_Real_dpp_Base_gfx11<bits<10> op, string opName = NAME> { + def _e64_dpp_gfx11 : VOP3_DPP16<op, !cast<VOP_DPP_Pseudo>(opName#"_e64"#"_dpp"), SIEncodingFamily.GFX11> { + let Inst{11} = ?; + let Inst{12} = ?; + let DecoderNamespace = "DPPGFX11"; + } + } + multiclass VOP3_Real_dpp_with_name_gfx11<bits<10> op, string opName, string asmName> { defvar ps = !cast<VOP3_Pseudo>(opName#"_e64"); @@ -1368,6 +1394,16 @@ let AssemblerPredicate = isGFX11Only, let DecoderNamespace = "DPP8GFX11"; } } + + multiclass VOP3Dot_Real_dpp8_Base_gfx11<bits<10> op, string opName = NAME> { + defvar ps = !cast<VOP3_Pseudo>(opName#"_e64"); + def _e64_dpp8_gfx11 : Base_VOP3_DPP8<op, ps> { + let Inst{11} = ?; + let Inst{12} = ?; + let DecoderNamespace = "DPP8GFX11"; + } + } + multiclass VOP3_Real_dpp8_with_name_gfx11<bits<10> op, string opName, string asmName> { defvar ps = !cast<VOP3_Pseudo>(opName#"_e64"); @@ -1406,6 +1442,12 @@ multiclass VOP3_Realtriple_gfx11<bits<10> op, VOP3_Real_dpp_Base_gfx11<op, opName>, VOP3_Real_dpp8_Base_gfx11<op, opName>; +multiclass VOP3Dot_Realtriple_gfx11<bits<10> op, + bit isSingle = 0, string opName = NAME> : + VOP3Dot_Real_Base_gfx11<op, opName, isSingle>, + VOP3Dot_Real_dpp_Base_gfx11<op, opName>, + VOP3Dot_Real_dpp8_Base_gfx11<op, opName>; + multiclass VOP3Only_Realtriple_gfx11<bits<10> op> : VOP3_Realtriple_gfx11<op, 1>; diff --git a/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp b/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp index 80ba7b5f0d2e..183febe756c1 100644 --- a/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -6726,8 +6726,8 @@ bool ARMBaseInstrInfo::shouldOutlineFromFunctionByDefault( return Subtarget.isMClass() && MF.getFunction().hasMinSize(); } -bool ARMBaseInstrInfo::isReallyTriviallyReMaterializable(const MachineInstr &MI, - AAResults *AA) const { +bool ARMBaseInstrInfo::isReallyTriviallyReMaterializable( + const MachineInstr &MI) const { // Try hard to rematerialize any VCTPs because if we spill P0, it will block // the tail predication conversion. This means that the element count // register has to be live for longer, but that has to be better than diff --git a/llvm/lib/Target/ARM/ARMBaseInstrInfo.h b/llvm/lib/Target/ARM/ARMBaseInstrInfo.h index 3b8f3403e3c3..453e3fa1b99b 100644 --- a/llvm/lib/Target/ARM/ARMBaseInstrInfo.h +++ b/llvm/lib/Target/ARM/ARMBaseInstrInfo.h @@ -480,8 +480,7 @@ private: MachineInstr *canFoldIntoMOVCC(Register Reg, const MachineRegisterInfo &MRI, const TargetInstrInfo *TII) const; - bool isReallyTriviallyReMaterializable(const MachineInstr &MI, - AAResults *AA) const override; + bool isReallyTriviallyReMaterializable(const MachineInstr &MI) const override; private: /// Modeling special VFP / NEON fp MLA / MLS hazards. diff --git a/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp index 613904f702f0..e5347ed8e53a 100644 --- a/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp +++ b/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp @@ -1720,6 +1720,7 @@ bool ARMExpandPseudo::ExpandCMP_SWAP(MachineBasicBlock &MBB, unsigned UxtOp, MachineBasicBlock::iterator &NextMBBI) { bool IsThumb = STI->isThumb(); + bool IsThumb1Only = STI->isThumb1Only(); MachineInstr &MI = *MBBI; DebugLoc DL = MI.getDebugLoc(); const MachineOperand &Dest = MI.getOperand(0); @@ -1794,7 +1795,8 @@ bool ARMExpandPseudo::ExpandCMP_SWAP(MachineBasicBlock &MBB, MIB.addImm(0); // a 32-bit Thumb strex (only) allows an offset. MIB.add(predOps(ARMCC::AL)); - unsigned CMPri = IsThumb ? ARM::t2CMPri : ARM::CMPri; + unsigned CMPri = + IsThumb ? (IsThumb1Only ? ARM::tCMPi8 : ARM::t2CMPri) : ARM::CMPri; BuildMI(StoreBB, DL, TII->get(CMPri)) .addReg(TempReg, RegState::Kill) .addImm(0) @@ -1848,6 +1850,7 @@ bool ARMExpandPseudo::ExpandCMP_SWAP_64(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, MachineBasicBlock::iterator &NextMBBI) { bool IsThumb = STI->isThumb(); + assert(!STI->isThumb1Only() && "CMP_SWAP_64 unsupported under Thumb1!"); MachineInstr &MI = *MBBI; DebugLoc DL = MI.getDebugLoc(); MachineOperand &Dest = MI.getOperand(0); @@ -3044,6 +3047,9 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, assert(STI->isThumb()); return ExpandCMP_SWAP(MBB, MBBI, ARM::t2LDREXH, ARM::t2STREXH, ARM::tUXTH, NextMBBI); + case ARM::tCMP_SWAP_32: + assert(STI->isThumb()); + return ExpandCMP_SWAP(MBB, MBBI, ARM::t2LDREX, ARM::t2STREX, 0, NextMBBI); case ARM::CMP_SWAP_8: assert(!STI->isThumb()); @@ -3054,11 +3060,8 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, return ExpandCMP_SWAP(MBB, MBBI, ARM::LDREXH, ARM::STREXH, ARM::UXTH, NextMBBI); case ARM::CMP_SWAP_32: - if (STI->isThumb()) - return ExpandCMP_SWAP(MBB, MBBI, ARM::t2LDREX, ARM::t2STREX, 0, - NextMBBI); - else - return ExpandCMP_SWAP(MBB, MBBI, ARM::LDREX, ARM::STREX, 0, NextMBBI); + assert(!STI->isThumb()); + return ExpandCMP_SWAP(MBB, MBBI, ARM::LDREX, ARM::STREX, 0, NextMBBI); case ARM::CMP_SWAP_64: return ExpandCMP_SWAP_64(MBB, MBBI, NextMBBI); diff --git a/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp b/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp index e0e4ffd90e0e..afe16a3cd55c 100644 --- a/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp +++ b/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -3131,7 +3131,7 @@ bool ARMDAGToDAGISel::tryInsertVectorElt(SDNode *N) { // Else v8i16 pattern of an extract and an insert, with a optional vmovx for // extracting odd lanes. - if (VT == MVT::v8i16) { + if (VT == MVT::v8i16 && Subtarget->hasFullFP16()) { SDValue Inp1 = CurDAG->getTargetExtractSubreg( ARM::ssub_0 + ExtractLane1 / 2, dl, MVT::f32, Val1.getOperand(0)); SDValue Inp2 = CurDAG->getTargetExtractSubreg( @@ -3151,7 +3151,7 @@ bool ARMDAGToDAGISel::tryInsertVectorElt(SDNode *N) { // The inserted values are not extracted - if they are f16 then insert them // directly using a VINS. - if (VT == MVT::v8f16) { + if (VT == MVT::v8f16 && Subtarget->hasFullFP16()) { SDNode *VINS = CurDAG->getMachineNode(ARM::VINSH, dl, MVT::f32, Val2, Val1); SDValue NewIns = CurDAG->getTargetInsertSubreg(ARM::ssub_0 + Lane2 / 2, dl, MVT::v4f32, @@ -3512,7 +3512,7 @@ void ARMDAGToDAGISel::SelectCMP_SWAP(SDNode *N) { else if (MemTy == MVT::i16) Opcode = Subtarget->isThumb() ? ARM::tCMP_SWAP_16 : ARM::CMP_SWAP_16; else if (MemTy == MVT::i32) - Opcode = ARM::CMP_SWAP_32; + Opcode = Subtarget->isThumb() ? ARM::tCMP_SWAP_32 : ARM::CMP_SWAP_32; else llvm_unreachable("Unknown AtomicCmpSwap type"); diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp index e6be93e6480a..743cca9ff71f 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -13572,6 +13572,10 @@ static SDValue PerformADDVecReduce(SDNode *N, SelectionDAG &DAG, bool ARMTargetLowering::isDesirableToCommuteWithShift(const SDNode *N, CombineLevel Level) const { + assert((N->getOpcode() == ISD::SHL || N->getOpcode() == ISD::SRA || + N->getOpcode() == ISD::SRL) && + "Expected shift op"); + if (Level == BeforeLegalizeTypes) return true; @@ -13605,8 +13609,38 @@ ARMTargetLowering::isDesirableToCommuteWithShift(const SDNode *N, return false; } +bool ARMTargetLowering::isDesirableToCommuteXorWithShift( + const SDNode *N) const { + assert(N->getOpcode() == ISD::XOR && + (N->getOperand(0).getOpcode() == ISD::SHL || + N->getOperand(0).getOpcode() == ISD::SRL) && + "Expected XOR(SHIFT) pattern"); + + // Only commute if the entire NOT mask is a hidden shifted mask. + auto *XorC = dyn_cast<ConstantSDNode>(N->getOperand(1)); + auto *ShiftC = dyn_cast<ConstantSDNode>(N->getOperand(0).getOperand(1)); + if (XorC && ShiftC) { + unsigned MaskIdx, MaskLen; + if (XorC->getAPIntValue().isShiftedMask(MaskIdx, MaskLen)) { + unsigned ShiftAmt = ShiftC->getZExtValue(); + unsigned BitWidth = N->getValueType(0).getScalarSizeInBits(); + if (N->getOperand(0).getOpcode() == ISD::SHL) + return MaskIdx == ShiftAmt && MaskLen == (BitWidth - ShiftAmt); + return MaskIdx == 0 && MaskLen == (BitWidth - ShiftAmt); + } + } + + return false; +} + bool ARMTargetLowering::shouldFoldConstantShiftPairToMask( const SDNode *N, CombineLevel Level) const { + assert(((N->getOpcode() == ISD::SHL && + N->getOperand(0).getOpcode() == ISD::SRL) || + (N->getOpcode() == ISD::SRL && + N->getOperand(0).getOpcode() == ISD::SHL)) && + "Expected shift-shift mask"); + if (!Subtarget->isThumb1Only()) return true; @@ -19962,6 +19996,14 @@ bool ARMTargetLowering::SimplifyDemandedBitsForTargetNode( } break; } + case ARMISD::VBICIMM: { + SDValue Op0 = Op.getOperand(0); + unsigned ModImm = Op.getConstantOperandVal(1); + unsigned EltBits = 0; + uint64_t Mask = ARM_AM::decodeVMOVModImm(ModImm, EltBits); + if ((OriginalDemandedBits & Mask) == 0) + return TLO.CombineTo(Op, Op0); + } } return TargetLowering::SimplifyDemandedBitsForTargetNode( diff --git a/llvm/lib/Target/ARM/ARMISelLowering.h b/llvm/lib/Target/ARM/ARMISelLowering.h index 10f60ab93ae3..fae279ea7569 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.h +++ b/llvm/lib/Target/ARM/ARMISelLowering.h @@ -733,6 +733,8 @@ class VectorType; bool isDesirableToCommuteWithShift(const SDNode *N, CombineLevel Level) const override; + bool isDesirableToCommuteXorWithShift(const SDNode *N) const override; + bool shouldFoldConstantShiftPairToMask(const SDNode *N, CombineLevel Level) const override; diff --git a/llvm/lib/Target/ARM/ARMInstrMVE.td b/llvm/lib/Target/ARM/ARMInstrMVE.td index 15c33014e988..9c03f72fe6ae 100644 --- a/llvm/lib/Target/ARM/ARMInstrMVE.td +++ b/llvm/lib/Target/ARM/ARMInstrMVE.td @@ -1882,6 +1882,7 @@ let Predicates = [HasMVEInt] in { def : Pat<(ARMvgetlaneu (v8f16 MQPR:$src), imm:$lane), (MVE_VMOV_from_lane_u16 MQPR:$src, imm:$lane)>; // For i16's inserts being extracted from low lanes, then may use VINS. + let Predicates = [HasFullFP16] in { def : Pat<(ARMinsertelt (v8i16 MQPR:$src1), (ARMvgetlaneu (v8i16 MQPR:$src2), imm_even:$extlane), imm_odd:$inslane), @@ -1889,6 +1890,7 @@ let Predicates = [HasMVEInt] in { (VINSH (EXTRACT_SUBREG MQPR:$src1, (SSubReg_f16_reg imm_odd:$inslane)), (EXTRACT_SUBREG MQPR:$src2, (SSubReg_f16_reg imm_even:$extlane))), (SSubReg_f16_reg imm_odd:$inslane)), MQPR)>; + } def : Pat<(v16i8 (scalar_to_vector GPR:$src)), (MVE_VMOV_to_lane_8 (v16i8 (IMPLICIT_DEF)), rGPR:$src, (i32 0))>; @@ -1905,17 +1907,21 @@ let Predicates = [HasMVEInt] in { def : Pat<(insertelt (v8f16 MQPR:$src1), (f16 HPR:$src2), imm_even:$lane), (MVE_VMOV_to_lane_16 MQPR:$src1, (COPY_TO_REGCLASS (f16 HPR:$src2), rGPR), imm:$lane)>; + let Predicates = [HasFullFP16] in { def : Pat<(insertelt (v8f16 MQPR:$src1), (f16 HPR:$src2), imm_odd:$lane), (COPY_TO_REGCLASS (INSERT_SUBREG (v4f32 (COPY_TO_REGCLASS MQPR:$src1, MQPR)), (VINSH (EXTRACT_SUBREG MQPR:$src1, (SSubReg_f16_reg imm_odd:$lane)), (COPY_TO_REGCLASS HPR:$src2, SPR)), (SSubReg_f16_reg imm_odd:$lane)), MQPR)>; + } def : Pat<(extractelt (v8f16 MQPR:$src), imm_even:$lane), (EXTRACT_SUBREG MQPR:$src, (SSubReg_f16_reg imm_even:$lane))>; + let Predicates = [HasFullFP16] in { def : Pat<(extractelt (v8f16 MQPR:$src), imm_odd:$lane), (COPY_TO_REGCLASS (VMOVH (EXTRACT_SUBREG MQPR:$src, (SSubReg_f16_reg imm_odd:$lane))), HPR)>; + } def : Pat<(v2f64 (scalar_to_vector (f64 DPR:$src))), (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), DPR:$src, dsub_0)>; diff --git a/llvm/lib/Target/ARM/ARMInstrThumb.td b/llvm/lib/Target/ARM/ARMInstrThumb.td index 71527ae1ab11..8f7039a327b3 100644 --- a/llvm/lib/Target/ARM/ARMInstrThumb.td +++ b/llvm/lib/Target/ARM/ARMInstrThumb.td @@ -1782,11 +1782,15 @@ def tLDRConstPool let Constraints = "@earlyclobber $Rd,@earlyclobber $temp", mayLoad = 1, mayStore = 1 in { -def tCMP_SWAP_8 : PseudoInst<(outs GPR:$Rd, GPR:$temp), +def tCMP_SWAP_8 : PseudoInst<(outs GPR:$Rd, tGPR:$temp), (ins GPR:$addr, tGPR:$desired, GPR:$new), NoItinerary, []>, Sched<[]>; -def tCMP_SWAP_16 : PseudoInst<(outs GPR:$Rd, GPR:$temp), +def tCMP_SWAP_16 : PseudoInst<(outs GPR:$Rd, tGPR:$temp), (ins GPR:$addr, tGPR:$desired, GPR:$new), NoItinerary, []>, Sched<[]>; + +def tCMP_SWAP_32 : PseudoInst<(outs GPR:$Rd, tGPR:$temp), + (ins GPR:$addr, GPR:$desired, GPR:$new), + NoItinerary, []>, Sched<[]>; } diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp index ba1d806c8d81..3c102463ba08 100644 --- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp +++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp @@ -20,8 +20,8 @@ #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" -#include "llvm/IR/Intrinsics.h" #include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Intrinsics.h" #include "llvm/IR/IntrinsicsARM.h" #include "llvm/IR/PatternMatch.h" #include "llvm/IR/Type.h" @@ -33,6 +33,7 @@ #include "llvm/Transforms/InstCombine/InstCombiner.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/LoopUtils.h" +#include "llvm/Transforms/Vectorize/LoopVectorizationLegality.h" #include <algorithm> #include <cassert> #include <cstdint> @@ -2197,12 +2198,9 @@ static bool canTailPredicateLoop(Loop *L, LoopInfo *LI, ScalarEvolution &SE, return true; } -bool ARMTTIImpl::preferPredicateOverEpilogue(Loop *L, LoopInfo *LI, - ScalarEvolution &SE, - AssumptionCache &AC, - TargetLibraryInfo *TLI, - DominatorTree *DT, - const LoopAccessInfo *LAI) { +bool ARMTTIImpl::preferPredicateOverEpilogue( + Loop *L, LoopInfo *LI, ScalarEvolution &SE, AssumptionCache &AC, + TargetLibraryInfo *TLI, DominatorTree *DT, LoopVectorizationLegality *LVL) { if (!EnableTailPredication) { LLVM_DEBUG(dbgs() << "Tail-predication not enabled.\n"); return false; @@ -2244,7 +2242,7 @@ bool ARMTTIImpl::preferPredicateOverEpilogue(Loop *L, LoopInfo *LI, return false; } - return canTailPredicateLoop(L, LI, SE, DL, LAI); + return canTailPredicateLoop(L, LI, SE, DL, LVL->getLAI()); } PredicationStyle ARMTTIImpl::emitGetActiveLaneMask() const { diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.h b/llvm/lib/Target/ARM/ARMTargetTransformInfo.h index dcf82e703a7f..9c3980d79e60 100644 --- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.h +++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.h @@ -288,12 +288,10 @@ public: AssumptionCache &AC, TargetLibraryInfo *LibInfo, HardwareLoopInfo &HWLoopInfo); - bool preferPredicateOverEpilogue(Loop *L, LoopInfo *LI, - ScalarEvolution &SE, - AssumptionCache &AC, - TargetLibraryInfo *TLI, + bool preferPredicateOverEpilogue(Loop *L, LoopInfo *LI, ScalarEvolution &SE, + AssumptionCache &AC, TargetLibraryInfo *TLI, DominatorTree *DT, - const LoopAccessInfo *LAI); + LoopVectorizationLegality *LVL); void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE); diff --git a/llvm/lib/Target/AVR/AVRSubtarget.h b/llvm/lib/Target/AVR/AVRSubtarget.h index 2325193bac0a..3dd71243387b 100644 --- a/llvm/lib/Target/AVR/AVRSubtarget.h +++ b/llvm/lib/Target/AVR/AVRSubtarget.h @@ -92,15 +92,15 @@ public: } /// Get I/O register addresses. - int getIORegRAMPZ(void) const { return hasELPM() ? 0x3b : -1; } - int getIORegEIND(void) const { return hasEIJMPCALL() ? 0x3c : -1; } - int getIORegSPL(void) const { return 0x3d; } - int getIORegSPH(void) const { return hasSmallStack() ? -1 : 0x3e; } - int getIORegSREG(void) const { return 0x3f; } + int getIORegRAMPZ() const { return hasELPM() ? 0x3b : -1; } + int getIORegEIND() const { return hasEIJMPCALL() ? 0x3c : -1; } + int getIORegSPL() const { return 0x3d; } + int getIORegSPH() const { return hasSmallStack() ? -1 : 0x3e; } + int getIORegSREG() const { return 0x3f; } /// Get GPR aliases. - int getRegTmpIndex(void) const { return hasTinyEncoding() ? 16 : 0; } - int getRegZeroIndex(void) const { return hasTinyEncoding() ? 17 : 1; } + int getRegTmpIndex() const { return hasTinyEncoding() ? 16 : 0; } + int getRegZeroIndex() const { return hasTinyEncoding() ? 17 : 1; } private: /// The ELF e_flags architecture. diff --git a/llvm/lib/Target/CSKY/CSKYInstrInfo.cpp b/llvm/lib/Target/CSKY/CSKYInstrInfo.cpp index d490b385ac16..0bf739452fd2 100644 --- a/llvm/lib/Target/CSKY/CSKYInstrInfo.cpp +++ b/llvm/lib/Target/CSKY/CSKYInstrInfo.cpp @@ -518,7 +518,7 @@ void CSKYInstrInfo::copyPhysReg(MachineBasicBlock &MBB, unsigned Opcode = 0; if (CSKY::GPRRegClass.contains(DestReg, SrcReg)) - Opcode = CSKY::MOV32; + Opcode = STI.hasE2() ? CSKY::MOV32 : CSKY::MOV16; else if (v2sf && CSKY::sFPR32RegClass.contains(DestReg, SrcReg)) Opcode = CSKY::FMOV_S; else if (v3sf && CSKY::FPR32RegClass.contains(DestReg, SrcReg)) diff --git a/llvm/lib/Target/DirectX/DXILWriter/DXILBitcodeWriter.cpp b/llvm/lib/Target/DirectX/DXILWriter/DXILBitcodeWriter.cpp index 3e09270a66d0..869433613620 100644 --- a/llvm/lib/Target/DirectX/DXILWriter/DXILBitcodeWriter.cpp +++ b/llvm/lib/Target/DirectX/DXILWriter/DXILBitcodeWriter.cpp @@ -13,6 +13,7 @@ #include "DXILBitcodeWriter.h" #include "DXILValueEnumerator.h" #include "PointerTypeAnalysis.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/Triple.h" #include "llvm/Bitcode/BitcodeCommon.h" #include "llvm/Bitcode/BitcodeReader.h" @@ -2580,10 +2581,9 @@ void DXILBitcodeWriter::writeFunctionLevelValueSymbolTable( SortedTable.push_back(VI.second->getValueName()); } // The keys are unique, so there shouldn't be stability issues. - std::sort(SortedTable.begin(), SortedTable.end(), - [](const ValueName *A, const ValueName *B) { - return A->first() < B->first(); - }); + llvm::sort(SortedTable, [](const ValueName *A, const ValueName *B) { + return A->first() < B->first(); + }); for (const ValueName *SI : SortedTable) { auto &Name = *SI; diff --git a/llvm/lib/Target/DirectX/DXILWriter/DXILValueEnumerator.cpp b/llvm/lib/Target/DirectX/DXILWriter/DXILValueEnumerator.cpp index 08944ee3f1fe..e2a41515de38 100644 --- a/llvm/lib/Target/DirectX/DXILWriter/DXILValueEnumerator.cpp +++ b/llvm/lib/Target/DirectX/DXILWriter/DXILValueEnumerator.cpp @@ -809,7 +809,7 @@ void ValueEnumerator::organizeMetadata() { // - by function, then // - by isa<MDString> // and then sort by the original/current ID. Since the IDs are guaranteed to - // be unique, the result of std::sort will be deterministic. There's no need + // be unique, the result of llvm::sort will be deterministic. There's no need // for std::stable_sort. llvm::sort(Order, [this](MDIndex LHS, MDIndex RHS) { return std::make_tuple(LHS.F, getMetadataTypeOrder(LHS.get(MDs)), LHS.ID) < diff --git a/llvm/lib/Target/Hexagon/HexagonVectorCombine.cpp b/llvm/lib/Target/Hexagon/HexagonVectorCombine.cpp index abd84a188cfa..bd0232c71d48 100644 --- a/llvm/lib/Target/Hexagon/HexagonVectorCombine.cpp +++ b/llvm/lib/Target/Hexagon/HexagonVectorCombine.cpp @@ -85,7 +85,6 @@ public: int getAllocSizeOf(const Type *Ty) const; int getTypeAlignment(Type *Ty) const; - VectorType *getByteVectorTy(int ScLen) const; Constant *getNullValue(Type *Ty) const; Constant *getFullValue(Type *Ty) const; diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp index 4acf90bd9788..93c8864347bb 100644 --- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp @@ -217,9 +217,8 @@ SDValue LoongArchTargetLowering::lowerGlobalAddress(SDValue Op, const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal(); unsigned ADDIOp = Subtarget.is64Bit() ? LoongArch::ADDI_D : LoongArch::ADDI_W; - // FIXME: Only support PC-relative addressing to access the symbol. - // TODO: Add target flags. - if (!isPositionIndependent()) { + // TODO: Support dso_preemptable and target flags. + if (GV->isDSOLocal()) { SDValue GA = DAG.getTargetGlobalAddress(GV, DL, Ty); SDValue AddrHi(DAG.getMachineNode(LoongArch::PCALAU12I, DL, Ty, GA), 0); SDValue Addr(DAG.getMachineNode(ADDIOp, DL, Ty, AddrHi, GA), 0); diff --git a/llvm/lib/Target/LoongArch/LoongArchTargetMachine.cpp b/llvm/lib/Target/LoongArch/LoongArchTargetMachine.cpp index 468c4f43cb90..2d08d5c674bc 100644 --- a/llvm/lib/Target/LoongArch/LoongArchTargetMachine.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchTargetMachine.cpp @@ -38,9 +38,7 @@ static std::string computeDataLayout(const Triple &TT) { static Reloc::Model getEffectiveRelocModel(const Triple &TT, Optional<Reloc::Model> RM) { - if (!RM.hasValue()) - return Reloc::Static; - return *RM; + return RM.value_or(Reloc::Static); } LoongArchTargetMachine::LoongArchTargetMachine( diff --git a/llvm/lib/Target/Mips/MipsISelLowering.cpp b/llvm/lib/Target/Mips/MipsISelLowering.cpp index b98be4ae4b75..4dfc16526a00 100644 --- a/llvm/lib/Target/Mips/MipsISelLowering.cpp +++ b/llvm/lib/Target/Mips/MipsISelLowering.cpp @@ -1192,6 +1192,12 @@ bool MipsTargetLowering::hasBitTest(SDValue X, SDValue Y) const { bool MipsTargetLowering::shouldFoldConstantShiftPairToMask( const SDNode *N, CombineLevel Level) const { + assert(((N->getOpcode() == ISD::SHL && + N->getOperand(0).getOpcode() == ISD::SRL) || + (N->getOpcode() == ISD::SRL && + N->getOperand(0).getOpcode() == ISD::SHL)) && + "Expected shift-shift mask"); + if (N->getOperand(0).getValueType().isVector()) return false; return true; diff --git a/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp b/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp index 9977d8ba0300..45e82e935772 100644 --- a/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp @@ -73,8 +73,10 @@ #include "llvm/MC/TargetRegistry.h" #include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Support/Endian.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MachineValueType.h" +#include "llvm/Support/NativeFormatting.h" #include "llvm/Support/Path.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetLoweringObjectFile.h" @@ -354,8 +356,7 @@ void NVPTXAsmPrinter::printReturnValStr(const Function *F, raw_ostream &O) { // PTX ABI requires all scalar return values to be at least 32 // bits in size. fp16 normally uses .b16 as its storage type in // PTX, so its size must be adjusted here, too. - if (size < 32) - size = 32; + size = promoteScalarArgumentSize(size); O << ".param .b" << size << " func_retval0"; } else if (isa<PointerType>(Ty)) { @@ -384,8 +385,8 @@ void NVPTXAsmPrinter::printReturnValStr(const Function *F, raw_ostream &O) { for (unsigned j = 0, je = elems; j != je; ++j) { unsigned sz = elemtype.getSizeInBits(); - if (elemtype.isInteger() && (sz < 32)) - sz = 32; + if (elemtype.isInteger()) + sz = promoteScalarArgumentSize(sz); O << ".reg .b" << sz << " func_retval" << idx; if (j < je - 1) O << ", "; @@ -1168,31 +1169,37 @@ void NVPTXAsmPrinter::printModuleLevelGV(const GlobalVariable *GVar, GVar->hasInitializer()) { const Constant *Initializer = GVar->getInitializer(); if (!isa<UndefValue>(Initializer) && !Initializer->isNullValue()) { - AggBuffer aggBuffer(ElementSize, O, *this); + AggBuffer aggBuffer(ElementSize, *this); bufferAggregateConstant(Initializer, &aggBuffer); - if (aggBuffer.numSymbols) { - if (static_cast<const NVPTXTargetMachine &>(TM).is64Bit()) { - O << " .u64 "; + if (aggBuffer.numSymbols()) { + unsigned int ptrSize = MAI->getCodePointerSize(); + if (ElementSize % ptrSize || + !aggBuffer.allSymbolsAligned(ptrSize)) { + // Print in bytes and use the mask() operator for pointers. + if (!STI.hasMaskOperator()) + report_fatal_error( + "initialized packed aggregate with pointers '" + + GVar->getName() + + "' requires at least PTX ISA version 7.1"); + O << " .u8 "; getSymbol(GVar)->print(O, MAI); - O << "["; - O << ElementSize / 8; + O << "[" << ElementSize << "] = {"; + aggBuffer.printBytes(O); + O << "}"; } else { - O << " .u32 "; + O << " .u" << ptrSize * 8 << " "; getSymbol(GVar)->print(O, MAI); - O << "["; - O << ElementSize / 4; + O << "[" << ElementSize / ptrSize << "] = {"; + aggBuffer.printWords(O); + O << "}"; } - O << "]"; } else { O << " .b8 "; getSymbol(GVar)->print(O, MAI); - O << "["; - O << ElementSize; - O << "]"; + O << "[" << ElementSize << "] = {"; + aggBuffer.printBytes(O); + O << "}"; } - O << " = {"; - aggBuffer.print(); - O << "}"; } else { O << " .b8 "; getSymbol(GVar)->print(O, MAI); @@ -1219,6 +1226,80 @@ void NVPTXAsmPrinter::printModuleLevelGV(const GlobalVariable *GVar, O << ";\n"; } +void NVPTXAsmPrinter::AggBuffer::printSymbol(unsigned nSym, raw_ostream &os) { + const Value *v = Symbols[nSym]; + const Value *v0 = SymbolsBeforeStripping[nSym]; + if (const GlobalValue *GVar = dyn_cast<GlobalValue>(v)) { + MCSymbol *Name = AP.getSymbol(GVar); + PointerType *PTy = dyn_cast<PointerType>(v0->getType()); + // Is v0 a generic pointer? + bool isGenericPointer = PTy && PTy->getAddressSpace() == 0; + if (EmitGeneric && isGenericPointer && !isa<Function>(v)) { + os << "generic("; + Name->print(os, AP.MAI); + os << ")"; + } else { + Name->print(os, AP.MAI); + } + } else if (const ConstantExpr *CExpr = dyn_cast<ConstantExpr>(v0)) { + const MCExpr *Expr = AP.lowerConstantForGV(cast<Constant>(CExpr), false); + AP.printMCExpr(*Expr, os); + } else + llvm_unreachable("symbol type unknown"); +} + +void NVPTXAsmPrinter::AggBuffer::printBytes(raw_ostream &os) { + unsigned int ptrSize = AP.MAI->getCodePointerSize(); + symbolPosInBuffer.push_back(size); + unsigned int nSym = 0; + unsigned int nextSymbolPos = symbolPosInBuffer[nSym]; + for (unsigned int pos = 0; pos < size;) { + if (pos) + os << ", "; + if (pos != nextSymbolPos) { + os << (unsigned int)buffer[pos]; + ++pos; + continue; + } + // Generate a per-byte mask() operator for the symbol, which looks like: + // .global .u8 addr[] = {0xFF(foo), 0xFF00(foo), 0xFF0000(foo), ...}; + // See https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#initializers + std::string symText; + llvm::raw_string_ostream oss(symText); + printSymbol(nSym, oss); + for (unsigned i = 0; i < ptrSize; ++i) { + if (i) + os << ", "; + llvm::write_hex(os, 0xFFULL << i * 8, HexPrintStyle::PrefixUpper); + os << "(" << symText << ")"; + } + pos += ptrSize; + nextSymbolPos = symbolPosInBuffer[++nSym]; + assert(nextSymbolPos >= pos); + } +} + +void NVPTXAsmPrinter::AggBuffer::printWords(raw_ostream &os) { + unsigned int ptrSize = AP.MAI->getCodePointerSize(); + symbolPosInBuffer.push_back(size); + unsigned int nSym = 0; + unsigned int nextSymbolPos = symbolPosInBuffer[nSym]; + assert(nextSymbolPos % ptrSize == 0); + for (unsigned int pos = 0; pos < size; pos += ptrSize) { + if (pos) + os << ", "; + if (pos == nextSymbolPos) { + printSymbol(nSym, os); + nextSymbolPos = symbolPosInBuffer[++nSym]; + assert(nextSymbolPos % ptrSize == 0); + assert(nextSymbolPos >= pos + ptrSize); + } else if (ptrSize == 4) + os << support::endian::read32le(&buffer[pos]); + else + os << support::endian::read64le(&buffer[pos]); + } +} + void NVPTXAsmPrinter::emitDemotedVars(const Function *f, raw_ostream &O) { if (localDecls.find(f) == localDecls.end()) return; @@ -1494,8 +1575,7 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) { unsigned sz = 0; if (isa<IntegerType>(Ty)) { sz = cast<IntegerType>(Ty)->getBitWidth(); - if (sz < 32) - sz = 32; + sz = promoteScalarArgumentSize(sz); } else if (isa<PointerType>(Ty)) sz = thePointerTy.getSizeInBits(); else if (Ty->isHalfTy()) @@ -1559,8 +1639,8 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) { for (unsigned j = 0, je = elems; j != je; ++j) { unsigned sz = elemtype.getSizeInBits(); - if (elemtype.isInteger() && (sz < 32)) - sz = 32; + if (elemtype.isInteger()) + sz = promoteScalarArgumentSize(sz); O << "\t.reg .b" << sz << " "; printParamName(I, paramIndex, O); if (j < je - 1) diff --git a/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.h b/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.h index cd61e99a103a..710c089e3325 100644 --- a/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.h +++ b/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.h @@ -61,24 +61,30 @@ class MCOperand; class LLVM_LIBRARY_VISIBILITY NVPTXAsmPrinter : public AsmPrinter { class AggBuffer { - // Used to buffer the emitted string for initializing global - // aggregates. + // Used to buffer the emitted string for initializing global aggregates. // - // Normally an aggregate (array, vector or structure) is emitted - // as a u8[]. However, if one element/field of the aggregate - // is a non-NULL address, then the aggregate is emitted as u32[] - // or u64[]. + // Normally an aggregate (array, vector, or structure) is emitted as a u8[]. + // However, if either element/field of the aggregate is a non-NULL address, + // and all such addresses are properly aligned, then the aggregate is + // emitted as u32[] or u64[]. In the case of unaligned addresses, the + // aggregate is emitted as u8[], and the mask() operator is used for all + // pointers. // - // We first layout the aggregate in 'buffer' in bytes, except for - // those symbol addresses. For the i-th symbol address in the - //aggregate, its corresponding 4-byte or 8-byte elements in 'buffer' - // are filled with 0s. symbolPosInBuffer[i-1] records its position - // in 'buffer', and Symbols[i-1] records the Value*. + // We first layout the aggregate in 'buffer' in bytes, except for those + // symbol addresses. For the i-th symbol address in the aggregate, its + // corresponding 4-byte or 8-byte elements in 'buffer' are filled with 0s. + // symbolPosInBuffer[i-1] records its position in 'buffer', and Symbols[i-1] + // records the Value*. // - // Once we have this AggBuffer setup, we can choose how to print - // it out. + // Once we have this AggBuffer setup, we can choose how to print it out. public: - unsigned numSymbols; // number of symbol addresses + // number of symbol addresses + unsigned numSymbols() const { return Symbols.size(); } + + bool allSymbolsAligned(unsigned ptrSize) const { + return llvm::all_of(symbolPosInBuffer, + [=](unsigned pos) { return pos % ptrSize == 0; }); + } private: const unsigned size; // size of the buffer in bytes @@ -94,15 +100,13 @@ class LLVM_LIBRARY_VISIBILITY NVPTXAsmPrinter : public AsmPrinter { // SymbolsBeforeStripping[i]. SmallVector<const Value *, 4> SymbolsBeforeStripping; unsigned curpos; - raw_ostream &O; NVPTXAsmPrinter &AP; bool EmitGeneric; public: - AggBuffer(unsigned size, raw_ostream &O, NVPTXAsmPrinter &AP) - : size(size), buffer(size), O(O), AP(AP) { + AggBuffer(unsigned size, NVPTXAsmPrinter &AP) + : size(size), buffer(size), AP(AP) { curpos = 0; - numSymbols = 0; EmitGeneric = AP.EmitGeneric; } @@ -135,63 +139,13 @@ class LLVM_LIBRARY_VISIBILITY NVPTXAsmPrinter : public AsmPrinter { symbolPosInBuffer.push_back(curpos); Symbols.push_back(GVar); SymbolsBeforeStripping.push_back(GVarBeforeStripping); - numSymbols++; } - void print() { - if (numSymbols == 0) { - // print out in bytes - for (unsigned i = 0; i < size; i++) { - if (i) - O << ", "; - O << (unsigned int) buffer[i]; - } - } else { - // print out in 4-bytes or 8-bytes - unsigned int pos = 0; - unsigned int nSym = 0; - unsigned int nextSymbolPos = symbolPosInBuffer[nSym]; - unsigned int nBytes = 4; - if (static_cast<const NVPTXTargetMachine &>(AP.TM).is64Bit()) - nBytes = 8; - for (pos = 0; pos < size; pos += nBytes) { - if (pos) - O << ", "; - if (pos == nextSymbolPos) { - const Value *v = Symbols[nSym]; - const Value *v0 = SymbolsBeforeStripping[nSym]; - if (const GlobalValue *GVar = dyn_cast<GlobalValue>(v)) { - MCSymbol *Name = AP.getSymbol(GVar); - PointerType *PTy = dyn_cast<PointerType>(v0->getType()); - bool IsNonGenericPointer = false; // Is v0 a non-generic pointer? - if (PTy && PTy->getAddressSpace() != 0) { - IsNonGenericPointer = true; - } - if (EmitGeneric && !isa<Function>(v) && !IsNonGenericPointer) { - O << "generic("; - Name->print(O, AP.MAI); - O << ")"; - } else { - Name->print(O, AP.MAI); - } - } else if (const ConstantExpr *CExpr = dyn_cast<ConstantExpr>(v0)) { - const MCExpr *Expr = - AP.lowerConstantForGV(cast<Constant>(CExpr), false); - AP.printMCExpr(*Expr, O); - } else - llvm_unreachable("symbol type unknown"); - nSym++; - if (nSym >= numSymbols) - nextSymbolPos = size + 1; - else - nextSymbolPos = symbolPosInBuffer[nSym]; - } else if (nBytes == 4) - O << *(unsigned int *)(&buffer[pos]); - else - O << *(unsigned long long *)(&buffer[pos]); - } - } - } + void printBytes(raw_ostream &os); + void printWords(raw_ostream &os); + + private: + void printSymbol(unsigned nSym, raw_ostream &os); }; friend class AggBuffer; diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp index 6ad016dfa0a7..8264032b765a 100644 --- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp @@ -206,6 +206,40 @@ static void ComputePTXValueVTs(const TargetLowering &TLI, const DataLayout &DL, } } +/// PromoteScalarIntegerPTX +/// Used to make sure the arguments/returns are suitable for passing +/// and promote them to a larger size if they're not. +/// +/// The promoted type is placed in \p PromoteVT if the function returns true. +static bool PromoteScalarIntegerPTX(const EVT &VT, MVT *PromotedVT) { + if (VT.isScalarInteger()) { + switch (PowerOf2Ceil(VT.getFixedSizeInBits())) { + default: + llvm_unreachable( + "Promotion is not suitable for scalars of size larger than 64-bits"); + case 1: + *PromotedVT = MVT::i1; + break; + case 2: + case 4: + case 8: + *PromotedVT = MVT::i8; + break; + case 16: + *PromotedVT = MVT::i16; + break; + case 32: + *PromotedVT = MVT::i32; + break; + case 64: + *PromotedVT = MVT::i64; + break; + } + return EVT(*PromotedVT) != VT; + } + return false; +} + // Check whether we can merge loads/stores of some of the pieces of a // flattened function parameter or return value into a single vector // load/store. @@ -1291,8 +1325,7 @@ std::string NVPTXTargetLowering::getPrototype( // PTX ABI requires all scalar return values to be at least 32 // bits in size. fp16 normally uses .b16 as its storage type in // PTX, so its size must be adjusted here, too. - if (size < 32) - size = 32; + size = promoteScalarArgumentSize(size); O << ".param .b" << size << " _"; } else if (isa<PointerType>(retTy)) { @@ -1343,8 +1376,7 @@ std::string NVPTXTargetLowering::getPrototype( unsigned sz = 0; if (isa<IntegerType>(Ty)) { sz = cast<IntegerType>(Ty)->getBitWidth(); - if (sz < 32) - sz = 32; + sz = promoteScalarArgumentSize(sz); } else if (isa<PointerType>(Ty)) { sz = PtrVT.getSizeInBits(); } else if (Ty->isHalfTy()) @@ -1515,11 +1547,11 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, NeedAlign = true; } else { // declare .param .b<size> .param<n>; - if ((VT.isInteger() || VT.isFloatingPoint()) && TypeSize < 4) { + if (VT.isInteger() || VT.isFloatingPoint()) { // PTX ABI requires integral types to be at least 32 bits in // size. FP16 is loaded/stored using i16, so it's handled // here as well. - TypeSize = 4; + TypeSize = promoteScalarArgumentSize(TypeSize * 8) / 8; } SDValue DeclareScalarParamOps[] = { Chain, DAG.getConstant(ParamCount, dl, MVT::i32), @@ -1556,6 +1588,17 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, } SDValue StVal = OutVals[OIdx]; + + MVT PromotedVT; + if (PromoteScalarIntegerPTX(EltVT, &PromotedVT)) { + EltVT = EVT(PromotedVT); + } + if (PromoteScalarIntegerPTX(StVal.getValueType(), &PromotedVT)) { + llvm::ISD::NodeType Ext = + Outs[OIdx].Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND; + StVal = DAG.getNode(Ext, dl, PromotedVT, StVal); + } + if (IsByVal) { auto PtrVT = getPointerTy(DL); SDValue srcAddr = DAG.getNode(ISD::ADD, dl, PtrVT, StVal, @@ -1638,9 +1681,7 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, // Plus, this behavior is consistent with nvcc's. if (RetTy->isFloatingPointTy() || RetTy->isPointerTy() || (RetTy->isIntegerTy() && !RetTy->isIntegerTy(128))) { - // Scalar needs to be at least 32bit wide - if (resultsz < 32) - resultsz = 32; + resultsz = promoteScalarArgumentSize(resultsz); SDVTList DeclareRetVTs = DAG.getVTList(MVT::Other, MVT::Glue); SDValue DeclareRetOps[] = { Chain, DAG.getConstant(1, dl, MVT::i32), DAG.getConstant(resultsz, dl, MVT::i32), @@ -1778,6 +1819,14 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, EVT TheLoadType = VTs[i]; EVT EltType = Ins[i].VT; Align EltAlign = commonAlignment(RetAlign, Offsets[i]); + MVT PromotedVT; + + if (PromoteScalarIntegerPTX(TheLoadType, &PromotedVT)) { + TheLoadType = EVT(PromotedVT); + EltType = EVT(PromotedVT); + needTruncate = true; + } + if (ExtendIntegerRetVal) { TheLoadType = MVT::i32; EltType = MVT::i32; @@ -2558,6 +2607,13 @@ SDValue NVPTXTargetLowering::LowerFormalArguments( // v2f16 was loaded as an i32. Now we must bitcast it back. else if (EltVT == MVT::v2f16) Elt = DAG.getNode(ISD::BITCAST, dl, MVT::v2f16, Elt); + + // If a promoted integer type is used, truncate down to the original + MVT PromotedVT; + if (PromoteScalarIntegerPTX(EltVT, &PromotedVT)) { + Elt = DAG.getNode(ISD::TRUNCATE, dl, EltVT, Elt); + } + // Extend the element if necessary (e.g. an i8 is loaded // into an i16 register) if (Ins[InsIdx].VT.isInteger() && @@ -2627,11 +2683,26 @@ NVPTXTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, return Chain; const DataLayout &DL = DAG.getDataLayout(); + SmallVector<SDValue, 16> PromotedOutVals; SmallVector<EVT, 16> VTs; SmallVector<uint64_t, 16> Offsets; ComputePTXValueVTs(*this, DL, RetTy, VTs, &Offsets); assert(VTs.size() == OutVals.size() && "Bad return value decomposition"); + for (unsigned i = 0, e = VTs.size(); i != e; ++i) { + SDValue PromotedOutVal = OutVals[i]; + MVT PromotedVT; + if (PromoteScalarIntegerPTX(VTs[i], &PromotedVT)) { + VTs[i] = EVT(PromotedVT); + } + if (PromoteScalarIntegerPTX(PromotedOutVal.getValueType(), &PromotedVT)) { + llvm::ISD::NodeType Ext = + Outs[i].Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND; + PromotedOutVal = DAG.getNode(Ext, dl, PromotedVT, PromotedOutVal); + } + PromotedOutVals.push_back(PromotedOutVal); + } + auto VectorInfo = VectorizePTXValueVTs( VTs, Offsets, RetTy->isSized() ? getFunctionParamOptimizedAlign(&F, RetTy, DL) @@ -2652,12 +2723,14 @@ NVPTXTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, StoreOperands.push_back(DAG.getConstant(Offsets[i], dl, MVT::i32)); } - SDValue RetVal = OutVals[i]; + SDValue OutVal = OutVals[i]; + SDValue RetVal = PromotedOutVals[i]; + if (ExtendIntegerRetVal) { RetVal = DAG.getNode(Outs[i].Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND, dl, MVT::i32, RetVal); - } else if (RetVal.getValueSizeInBits() < 16) { + } else if (OutVal.getValueSizeInBits() < 16) { // Use 16-bit registers for small load-stores as it's the // smallest general purpose register size supported by NVPTX. RetVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i16, RetVal); diff --git a/llvm/lib/Target/NVPTX/NVPTXSubtarget.h b/llvm/lib/Target/NVPTX/NVPTXSubtarget.h index 9a249d3da3d5..cea3dce3f1c5 100644 --- a/llvm/lib/Target/NVPTX/NVPTXSubtarget.h +++ b/llvm/lib/Target/NVPTX/NVPTXSubtarget.h @@ -77,6 +77,7 @@ public: bool hasImageHandles() const; bool hasFP16Math() const { return SmVersion >= 53; } bool allowFP16Math() const; + bool hasMaskOperator() const { return PTXVersion >= 71; } unsigned int getSmVersion() const { return SmVersion; } std::string getTargetName() const { return TargetName; } diff --git a/llvm/lib/Target/NVPTX/NVPTXUtilities.h b/llvm/lib/Target/NVPTX/NVPTXUtilities.h index bf1524194cfb..6fee57b4664e 100644 --- a/llvm/lib/Target/NVPTX/NVPTXUtilities.h +++ b/llvm/lib/Target/NVPTX/NVPTXUtilities.h @@ -59,6 +59,16 @@ bool isKernelFunction(const Function &); bool getAlign(const Function &, unsigned index, unsigned &); bool getAlign(const CallInst &, unsigned index, unsigned &); +// PTX ABI requires all scalar argument/return values to have +// bit-size as a power of two of at least 32 bits. +inline unsigned promoteScalarArgumentSize(unsigned size) { + if (size <= 32) + return 32; + else if (size <= 64) + return 64; + else + return size; +} } #endif diff --git a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp index 4247cf557c2a..14c4fd3a9ffa 100644 --- a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp @@ -5473,7 +5473,8 @@ void PPCDAGToDAGISel::Select(SDNode *N) { } case ISD::MUL: { SDValue Op1 = N->getOperand(1); - if (Op1.getOpcode() != ISD::Constant || Op1.getValueType() != MVT::i64) + if (Op1.getOpcode() != ISD::Constant || + (Op1.getValueType() != MVT::i64 && Op1.getValueType() != MVT::i32)) break; // If the multiplier fits int16, we can handle it with mulli. @@ -5486,13 +5487,27 @@ void PPCDAGToDAGISel::Select(SDNode *N) { // (mul X, c1 << c2) -> (rldicr (mulli X, c1) c2). We do this in ISEL due to // DAGCombiner prefers (shl (mul X, c1), c2) -> (mul X, c1 << c2). uint64_t ImmSh = Imm >> Shift; - if (isInt<16>(ImmSh)) { - uint64_t SextImm = SignExtend64(ImmSh & 0xFFFF, 16); + if (!isInt<16>(ImmSh)) + break; + + uint64_t SextImm = SignExtend64(ImmSh & 0xFFFF, 16); + if (Op1.getValueType() == MVT::i64) { SDValue SDImm = CurDAG->getTargetConstant(SextImm, dl, MVT::i64); SDNode *MulNode = CurDAG->getMachineNode(PPC::MULLI8, dl, MVT::i64, N->getOperand(0), SDImm); - CurDAG->SelectNodeTo(N, PPC::RLDICR, MVT::i64, SDValue(MulNode, 0), - getI32Imm(Shift, dl), getI32Imm(63 - Shift, dl)); + + SDValue Ops[] = {SDValue(MulNode, 0), getI32Imm(Shift, dl), + getI32Imm(63 - Shift, dl)}; + CurDAG->SelectNodeTo(N, PPC::RLDICR, MVT::i64, Ops); + return; + } else { + SDValue SDImm = CurDAG->getTargetConstant(SextImm, dl, MVT::i32); + SDNode *MulNode = CurDAG->getMachineNode(PPC::MULLI, dl, MVT::i32, + N->getOperand(0), SDImm); + + SDValue Ops[] = {SDValue(MulNode, 0), getI32Imm(Shift, dl), + getI32Imm(0, dl), getI32Imm(31 - Shift, dl)}; + CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops); return; } break; diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp index 59486c323567..c85f57f04c7d 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp +++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp @@ -1086,8 +1086,8 @@ unsigned PPCInstrInfo::isLoadFromStackSlot(const MachineInstr &MI, // For opcodes with the ReMaterializable flag set, this function is called to // verify the instruction is really rematable. -bool PPCInstrInfo::isReallyTriviallyReMaterializable(const MachineInstr &MI, - AliasAnalysis *AA) const { +bool PPCInstrInfo::isReallyTriviallyReMaterializable( + const MachineInstr &MI) const { switch (MI.getOpcode()) { default: // This function should only be called for opcodes with the ReMaterializable diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.h b/llvm/lib/Target/PowerPC/PPCInstrInfo.h index e22b0086bde8..980bb3107a8b 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrInfo.h +++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.h @@ -495,8 +495,7 @@ public: unsigned &SubIdx) const override; unsigned isLoadFromStackSlot(const MachineInstr &MI, int &FrameIndex) const override; - bool isReallyTriviallyReMaterializable(const MachineInstr &MI, - AAResults *AA) const override; + bool isReallyTriviallyReMaterializable(const MachineInstr &MI) const override; unsigned isStoreToStackSlot(const MachineInstr &MI, int &FrameIndex) const override; diff --git a/llvm/lib/Target/PowerPC/PPCLoopInstrFormPrep.cpp b/llvm/lib/Target/PowerPC/PPCLoopInstrFormPrep.cpp index 4689c0638ca6..23703ac54d0e 100644 --- a/llvm/lib/Target/PowerPC/PPCLoopInstrFormPrep.cpp +++ b/llvm/lib/Target/PowerPC/PPCLoopInstrFormPrep.cpp @@ -568,7 +568,7 @@ bool PPCLoopInstrFormPrep::rewriteLoadStoresForCommoningChains( const SCEVAddRecExpr *BasePtrSCEV = cast<SCEVAddRecExpr>(BaseSCEV); // Make sure the base is able to expand. - if (!isSafeToExpand(BasePtrSCEV->getStart(), *SE)) + if (!SCEVE.isSafeToExpand(BasePtrSCEV->getStart())) return MadeChange; assert(BasePtrSCEV->isAffine() && @@ -602,7 +602,7 @@ bool PPCLoopInstrFormPrep::rewriteLoadStoresForCommoningChains( // Make sure offset is able to expand. Only need to check one time as the // offsets are reused between different chains. if (!BaseElemIdx) - if (!isSafeToExpand(OffsetSCEV, *SE)) + if (!SCEVE.isSafeToExpand(OffsetSCEV)) return false; Value *OffsetValue = SCEVE.expandCodeFor( @@ -1018,14 +1018,13 @@ bool PPCLoopInstrFormPrep::rewriteLoadStores( if (!BasePtrSCEV->isAffine()) return MadeChange; - if (!isSafeToExpand(BasePtrSCEV->getStart(), *SE)) - return MadeChange; - - SmallPtrSet<Value *, 16> DeletedPtrs; - BasicBlock *Header = L->getHeader(); SCEVExpander SCEVE(*SE, Header->getModule()->getDataLayout(), "loopprepare-formrewrite"); + if (!SCEVE.isSafeToExpand(BasePtrSCEV->getStart())) + return MadeChange; + + SmallPtrSet<Value *, 16> DeletedPtrs; // For some DS form load/store instructions, it can also be an update form, // if the stride is constant and is a multipler of 4. Use update form if diff --git a/llvm/lib/Target/RISCV/RISCV.h b/llvm/lib/Target/RISCV/RISCV.h index 917837a307ad..e6140edc8403 100644 --- a/llvm/lib/Target/RISCV/RISCV.h +++ b/llvm/lib/Target/RISCV/RISCV.h @@ -30,6 +30,9 @@ class MachineInstr; class MachineOperand; class PassRegistry; +FunctionPass *createRISCVCodeGenPreparePass(); +void initializeRISCVCodeGenPreparePass(PassRegistry &); + bool lowerRISCVMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI, AsmPrinter &AP); bool lowerRISCVMachineOperandToMCOperand(const MachineOperand &MO, diff --git a/llvm/lib/Target/RISCV/RISCV.td b/llvm/lib/Target/RISCV/RISCV.td index e783ef38b448..8a6f69c7f7ca 100644 --- a/llvm/lib/Target/RISCV/RISCV.td +++ b/llvm/lib/Target/RISCV/RISCV.td @@ -19,6 +19,19 @@ def HasStdExtM : Predicate<"Subtarget->hasStdExtM()">, AssemblerPredicate<(all_of FeatureStdExtM), "'M' (Integer Multiplication and Division)">; +def FeatureStdExtZmmul + : SubtargetFeature<"zmmul", "HasStdExtZmmul", "true", + "'Zmmul' (Integer Multiplication)">; +def HasStdExtZmmul : Predicate<"Subtarget->hasStdExtZmmul()">, + AssemblerPredicate<(all_of FeatureStdExtZmmul), + "'Zmmul' (Integer Multiplication)">; + +def HasStdExtMOrZmmul + : Predicate<"Subtarget->hasStdExtM() || Subtarget->hasStdExtZmmul()">, + AssemblerPredicate<(any_of FeatureStdExtM, FeatureStdExtZmmul), + "'M' (Integer Multiplication and Division) or " + "'Zmmul' (Integer Multiplication)">; + def FeatureStdExtA : SubtargetFeature<"a", "HasStdExtA", "true", "'A' (Atomic Instructions)">; @@ -465,7 +478,8 @@ def TuneNoDefaultUnroll "Disable default unroll preference.">; def TuneSiFive7 : SubtargetFeature<"sifive7", "RISCVProcFamily", "SiFive7", - "SiFive 7-Series processors">; + "SiFive 7-Series processors", + [TuneNoDefaultUnroll]>; //===----------------------------------------------------------------------===// // Named operands for CSR instructions. @@ -499,9 +513,9 @@ def : ProcessorModel<"rocket-rv32", RocketModel, []>; def : ProcessorModel<"rocket-rv64", RocketModel, [Feature64Bit]>; def : ProcessorModel<"sifive-7-rv32", SiFive7Model, [], - [TuneSiFive7, TuneNoDefaultUnroll]>; + [TuneSiFive7]>; def : ProcessorModel<"sifive-7-rv64", SiFive7Model, [Feature64Bit], - [TuneSiFive7, TuneNoDefaultUnroll]>; + [TuneSiFive7]>; def : ProcessorModel<"sifive-e20", RocketModel, [FeatureStdExtM, FeatureStdExtC]>; @@ -528,7 +542,7 @@ def : ProcessorModel<"sifive-e76", SiFive7Model, [FeatureStdExtM, FeatureStdExtA, FeatureStdExtF, FeatureStdExtC], - [TuneSiFive7, TuneNoDefaultUnroll]>; + [TuneSiFive7]>; def : ProcessorModel<"sifive-s21", RocketModel, [Feature64Bit, FeatureStdExtM, @@ -553,7 +567,7 @@ def : ProcessorModel<"sifive-s76", SiFive7Model, [Feature64Bit, FeatureStdExtF, FeatureStdExtD, FeatureStdExtC], - [TuneSiFive7, TuneNoDefaultUnroll]>; + [TuneSiFive7]>; def : ProcessorModel<"sifive-u54", RocketModel, [Feature64Bit, FeatureStdExtM, @@ -568,7 +582,7 @@ def : ProcessorModel<"sifive-u74", SiFive7Model, [Feature64Bit, FeatureStdExtF, FeatureStdExtD, FeatureStdExtC], - [TuneSiFive7, TuneNoDefaultUnroll]>; + [TuneSiFive7]>; //===----------------------------------------------------------------------===// // Define the RISC-V target. diff --git a/llvm/lib/Target/RISCV/RISCVCodeGenPrepare.cpp b/llvm/lib/Target/RISCV/RISCVCodeGenPrepare.cpp new file mode 100644 index 000000000000..b700a9ede39b --- /dev/null +++ b/llvm/lib/Target/RISCV/RISCVCodeGenPrepare.cpp @@ -0,0 +1,169 @@ +//===----- RISCVCodeGenPrepare.cpp ----------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This is a RISCV specific version of CodeGenPrepare. +// It munges the code in the input function to better prepare it for +// SelectionDAG-based code generation. This works around limitations in it's +// basic-block-at-a-time approach. +// +//===----------------------------------------------------------------------===// + +#include "RISCV.h" +#include "RISCVTargetMachine.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/CodeGen/TargetPassConfig.h" +#include "llvm/InitializePasses.h" +#include "llvm/Pass.h" + +using namespace llvm; + +#define DEBUG_TYPE "riscv-codegenprepare" +#define PASS_NAME "RISCV CodeGenPrepare" + +STATISTIC(NumZExtToSExt, "Number of SExt instructions converted to ZExt"); + +namespace { + +class RISCVCodeGenPrepare : public FunctionPass { + const DataLayout *DL; + const RISCVSubtarget *ST; + +public: + static char ID; + + RISCVCodeGenPrepare() : FunctionPass(ID) {} + + bool runOnFunction(Function &F) override; + + StringRef getPassName() const override { return PASS_NAME; } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesCFG(); + AU.addRequired<TargetPassConfig>(); + } + +private: + bool optimizeZExt(ZExtInst *I); + bool optimizeAndExt(BinaryOperator *BO); +}; + +} // end anonymous namespace + +bool RISCVCodeGenPrepare::optimizeZExt(ZExtInst *ZExt) { + if (!ST->is64Bit()) + return false; + + Value *Src = ZExt->getOperand(0); + + // We only care about ZExt from i32 to i64. + if (!ZExt->getType()->isIntegerTy(64) || !Src->getType()->isIntegerTy(32)) + return false; + + // Look for an opportunity to replace (i64 (zext (i32 X))) with a sext if we + // can determine that the sign bit of X is zero via a dominating condition. + // This often occurs with widened induction variables. + if (isImpliedByDomCondition(ICmpInst::ICMP_SGE, Src, + Constant::getNullValue(Src->getType()), ZExt, + *DL)) { + auto *SExt = new SExtInst(Src, ZExt->getType(), "", ZExt); + SExt->takeName(ZExt); + SExt->setDebugLoc(ZExt->getDebugLoc()); + + ZExt->replaceAllUsesWith(SExt); + ZExt->eraseFromParent(); + ++NumZExtToSExt; + return true; + } + + return false; +} + +// Try to optimize (i64 (and (zext/sext (i32 X), C1))) if C1 has bit 31 set, +// but bits 63:32 are zero. If we can prove that bit 31 of X is 0, we can fill +// the upper 32 bits with ones. A separate transform will turn (zext X) into +// (sext X) for the same condition. +bool RISCVCodeGenPrepare::optimizeAndExt(BinaryOperator *BO) { + if (!ST->is64Bit()) + return false; + + if (BO->getOpcode() != Instruction::And) + return false; + + if (!BO->getType()->isIntegerTy(64)) + return false; + + // Left hand side should be sext or zext. + Instruction *LHS = dyn_cast<Instruction>(BO->getOperand(0)); + if (!LHS || (!isa<SExtInst>(LHS) && !isa<ZExtInst>(LHS))) + return false; + + Value *LHSSrc = LHS->getOperand(0); + if (!LHSSrc->getType()->isIntegerTy(32)) + return false; + + // Right hand side should be a constant. + Value *RHS = BO->getOperand(1); + + auto *CI = dyn_cast<ConstantInt>(RHS); + if (!CI) + return false; + uint64_t C = CI->getZExtValue(); + + // Look for constants that fit in 32 bits but not simm12, and can be made + // into simm12 by sign extending bit 31. This will allow use of ANDI. + // TODO: Is worth making simm32? + if (!isUInt<32>(C) || isInt<12>(C) || !isInt<12>(SignExtend64<32>(C))) + return false; + + // If we can determine the sign bit of the input is 0, we can replace the + // And mask constant. + if (!isImpliedByDomCondition(ICmpInst::ICMP_SGE, LHSSrc, + Constant::getNullValue(LHSSrc->getType()), + LHS, *DL)) + return false; + + // Sign extend the constant and replace the And operand. + C = SignExtend64<32>(C); + BO->setOperand(1, ConstantInt::get(LHS->getType(), C)); + + return true; +} + +bool RISCVCodeGenPrepare::runOnFunction(Function &F) { + if (skipFunction(F)) + return false; + + auto &TPC = getAnalysis<TargetPassConfig>(); + auto &TM = TPC.getTM<RISCVTargetMachine>(); + ST = &TM.getSubtarget<RISCVSubtarget>(F); + + DL = &F.getParent()->getDataLayout(); + + bool MadeChange = false; + for (auto &BB : F) { + for (Instruction &I : llvm::make_early_inc_range(BB)) { + if (auto *ZExt = dyn_cast<ZExtInst>(&I)) + MadeChange |= optimizeZExt(ZExt); + else if (I.getOpcode() == Instruction::And) + MadeChange |= optimizeAndExt(cast<BinaryOperator>(&I)); + } + } + + return MadeChange; +} + +INITIALIZE_PASS_BEGIN(RISCVCodeGenPrepare, DEBUG_TYPE, PASS_NAME, false, false) +INITIALIZE_PASS_DEPENDENCY(TargetPassConfig) +INITIALIZE_PASS_END(RISCVCodeGenPrepare, DEBUG_TYPE, PASS_NAME, false, false) + +char RISCVCodeGenPrepare::ID = 0; + +FunctionPass *llvm::createRISCVCodeGenPreparePass() { + return new RISCVCodeGenPrepare(); +} diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp index 5b823af1e9b8..d5826b46d738 100644 --- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp @@ -690,6 +690,14 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) { // 32 trailing ones should use srliw via tablegen pattern. if (TrailingOnes == 32 || ShAmt >= TrailingOnes) break; + // If C2 is (1 << ShAmt) use bexti if possible. + if (Subtarget->hasStdExtZbs() && ShAmt + 1 == TrailingOnes) { + SDNode *BEXTI = + CurDAG->getMachineNode(RISCV::BEXTI, DL, VT, N0->getOperand(0), + CurDAG->getTargetConstant(ShAmt, DL, VT)); + ReplaceNode(Node, BEXTI); + return; + } unsigned LShAmt = Subtarget->getXLen() - TrailingOnes; SDNode *SLLI = CurDAG->getMachineNode(RISCV::SLLI, DL, VT, N0->getOperand(0), @@ -939,18 +947,17 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) { if (!isMask_64(C2)) break; - // This should be the only use of the AND unless we will use - // (SRLI (SLLI X, 32), 32). We don't use a shift pair for other AND - // constants. - if (!N0.hasOneUse() && C2 != UINT64_C(0xFFFFFFFF)) - break; - - // If this can be an ANDI, ZEXT.H or ZEXT.W we don't need to do this - // optimization. - if (isInt<12>(C2) || + // If this can be an ANDI, ZEXT.H or ZEXT.W, don't do this if the ANDI/ZEXT + // has multiple users or the constant is a simm12. This prevents inserting + // a shift and still have uses of the AND/ZEXT. Shifting a simm12 will + // likely make it more costly to materialize. Otherwise, using a SLLI + // might allow it to be compressed. + bool IsANDIOrZExt = + isInt<12>(C2) || (C2 == UINT64_C(0xFFFF) && (Subtarget->hasStdExtZbb() || Subtarget->hasStdExtZbp())) || - (C2 == UINT64_C(0xFFFFFFFF) && Subtarget->hasStdExtZba())) + (C2 == UINT64_C(0xFFFFFFFF) && Subtarget->hasStdExtZba()); + if (IsANDIOrZExt && (isInt<12>(N1C->getSExtValue()) || !N0.hasOneUse())) break; // We need to shift left the AND input and C1 by a total of XLen bits. diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 658865703079..1702546b58a6 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -215,21 +215,26 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, setLibcallName(RTLIB::MULO_I64, nullptr); } - if (!Subtarget.hasStdExtM()) { - setOperationAction({ISD::MUL, ISD::MULHS, ISD::MULHU, ISD::SDIV, ISD::UDIV, - ISD::SREM, ISD::UREM}, - XLenVT, Expand); + if (!Subtarget.hasStdExtM() && !Subtarget.hasStdExtZmmul()) { + setOperationAction({ISD::MUL, ISD::MULHS, ISD::MULHU}, XLenVT, Expand); } else { if (Subtarget.is64Bit()) { setOperationAction(ISD::MUL, {MVT::i32, MVT::i128}, Custom); - - setOperationAction({ISD::SDIV, ISD::UDIV, ISD::UREM}, - {MVT::i8, MVT::i16, MVT::i32}, Custom); } else { setOperationAction(ISD::MUL, MVT::i64, Custom); } } + if (!Subtarget.hasStdExtM()) { + setOperationAction({ISD::SDIV, ISD::UDIV, ISD::SREM, ISD::UREM}, + XLenVT, Expand); + } else { + if (Subtarget.is64Bit()) { + setOperationAction({ISD::SDIV, ISD::UDIV, ISD::UREM}, + {MVT::i8, MVT::i16, MVT::i32}, Custom); + } + } + setOperationAction( {ISD::SDIVREM, ISD::UDIVREM, ISD::SMUL_LOHI, ISD::UMUL_LOHI}, XLenVT, Expand); @@ -294,7 +299,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, setOperationAction(ISD::SELECT, XLenVT, Custom); } - static constexpr ISD::NodeType FPLegalNodeTypes[] = { + static const unsigned FPLegalNodeTypes[] = { ISD::FMINNUM, ISD::FMAXNUM, ISD::LRINT, ISD::LLRINT, ISD::LROUND, ISD::LLROUND, ISD::STRICT_LRINT, ISD::STRICT_LLRINT, ISD::STRICT_LROUND, @@ -307,7 +312,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, ISD::SETUGE, ISD::SETULT, ISD::SETULE, ISD::SETUNE, ISD::SETGT, ISD::SETGE, ISD::SETNE, ISD::SETO, ISD::SETUO}; - static const ISD::NodeType FPOpToExpand[] = { + static const unsigned FPOpToExpand[] = { ISD::FSIN, ISD::FCOS, ISD::FSINCOS, ISD::FPOW, ISD::FREM, ISD::FP16_TO_FP, ISD::FP_TO_FP16}; @@ -315,8 +320,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, setOperationAction(ISD::BITCAST, MVT::i16, Custom); if (Subtarget.hasStdExtZfh()) { - for (auto NT : FPLegalNodeTypes) - setOperationAction(NT, MVT::f16, Legal); + setOperationAction(FPLegalNodeTypes, MVT::f16, Legal); setOperationAction(ISD::STRICT_FP_ROUND, MVT::f16, Legal); setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f32, Legal); setCondCodeAction(FPCCToExpand, MVT::f16, Expand); @@ -340,14 +344,12 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, } if (Subtarget.hasStdExtF()) { - for (auto NT : FPLegalNodeTypes) - setOperationAction(NT, MVT::f32, Legal); + setOperationAction(FPLegalNodeTypes, MVT::f32, Legal); setCondCodeAction(FPCCToExpand, MVT::f32, Expand); setOperationAction(ISD::SELECT_CC, MVT::f32, Expand); setOperationAction(ISD::SELECT, MVT::f32, Custom); setOperationAction(ISD::BR_CC, MVT::f32, Expand); - for (auto Op : FPOpToExpand) - setOperationAction(Op, MVT::f32, Expand); + setOperationAction(FPOpToExpand, MVT::f32, Expand); setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand); setTruncStoreAction(MVT::f32, MVT::f16, Expand); } @@ -356,8 +358,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, setOperationAction(ISD::BITCAST, MVT::i32, Custom); if (Subtarget.hasStdExtD()) { - for (auto NT : FPLegalNodeTypes) - setOperationAction(NT, MVT::f64, Legal); + setOperationAction(FPLegalNodeTypes, MVT::f64, Legal); setOperationAction(ISD::STRICT_FP_ROUND, MVT::f32, Legal); setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f64, Legal); setCondCodeAction(FPCCToExpand, MVT::f64, Expand); @@ -366,8 +367,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, setOperationAction(ISD::BR_CC, MVT::f64, Expand); setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand); setTruncStoreAction(MVT::f64, MVT::f32, Expand); - for (auto Op : FPOpToExpand) - setOperationAction(Op, MVT::f64, Expand); + setOperationAction(FPOpToExpand, MVT::f64, Expand); setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand); setTruncStoreAction(MVT::f64, MVT::f16, Expand); } @@ -458,17 +458,22 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, ISD::VP_SETCC, ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND}; + static const unsigned IntegerVecReduceOps[] = { + ISD::VECREDUCE_ADD, ISD::VECREDUCE_AND, ISD::VECREDUCE_OR, + ISD::VECREDUCE_XOR, ISD::VECREDUCE_SMAX, ISD::VECREDUCE_SMIN, + ISD::VECREDUCE_UMAX, ISD::VECREDUCE_UMIN}; + + static const unsigned FloatingPointVecReduceOps[] = { + ISD::VECREDUCE_FADD, ISD::VECREDUCE_SEQ_FADD, ISD::VECREDUCE_FMIN, + ISD::VECREDUCE_FMAX}; + if (!Subtarget.is64Bit()) { // We must custom-lower certain vXi64 operations on RV32 due to the vector // element type being illegal. setOperationAction({ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT}, MVT::i64, Custom); - setOperationAction({ISD::VECREDUCE_ADD, ISD::VECREDUCE_AND, - ISD::VECREDUCE_OR, ISD::VECREDUCE_XOR, - ISD::VECREDUCE_SMAX, ISD::VECREDUCE_SMIN, - ISD::VECREDUCE_UMAX, ISD::VECREDUCE_UMIN}, - MVT::i64, Custom); + setOperationAction(IntegerVecReduceOps, MVT::i64, Custom); setOperationAction({ISD::VP_REDUCE_ADD, ISD::VP_REDUCE_AND, ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR, @@ -581,11 +586,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, // Custom-lower reduction operations to set up the corresponding custom // nodes' operands. - setOperationAction({ISD::VECREDUCE_ADD, ISD::VECREDUCE_AND, - ISD::VECREDUCE_OR, ISD::VECREDUCE_XOR, - ISD::VECREDUCE_SMAX, ISD::VECREDUCE_SMIN, - ISD::VECREDUCE_UMAX, ISD::VECREDUCE_UMIN}, - VT, Custom); + setOperationAction(IntegerVecReduceOps, VT, Custom); setOperationAction(IntegerVPOps, VT, Custom); @@ -661,9 +662,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, setOperationAction({ISD::FTRUNC, ISD::FCEIL, ISD::FFLOOR, ISD::FROUND}, VT, Custom); - setOperationAction({ISD::VECREDUCE_FADD, ISD::VECREDUCE_SEQ_FADD, - ISD::VECREDUCE_FMIN, ISD::VECREDUCE_FMAX}, - VT, Custom); + setOperationAction(FloatingPointVecReduceOps, VT, Custom); // Expand FP operations that need libcalls. setOperationAction(ISD::FREM, VT, Expand); @@ -905,17 +904,14 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, setOperationAction({ISD::FTRUNC, ISD::FCEIL, ISD::FFLOOR, ISD::FROUND}, VT, Custom); - for (auto CC : VFPCCToExpand) - setCondCodeAction(CC, VT, Expand); + setCondCodeAction(VFPCCToExpand, VT, Expand); setOperationAction({ISD::VSELECT, ISD::SELECT}, VT, Custom); setOperationAction(ISD::SELECT_CC, VT, Expand); setOperationAction(ISD::BITCAST, VT, Custom); - setOperationAction({ISD::VECREDUCE_FADD, ISD::VECREDUCE_SEQ_FADD, - ISD::VECREDUCE_FMIN, ISD::VECREDUCE_FMAX}, - VT, Custom); + setOperationAction(FloatingPointVecReduceOps, VT, Custom); setOperationAction(FloatingPointVPOps, VT, Custom); } @@ -943,7 +939,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, setJumpIsExpensive(); setTargetDAGCombine({ISD::INTRINSIC_WO_CHAIN, ISD::ADD, ISD::SUB, ISD::AND, - ISD::OR, ISD::XOR}); + ISD::OR, ISD::XOR, ISD::SETCC}); if (Subtarget.is64Bit()) setTargetDAGCombine(ISD::SRA); @@ -1374,6 +1370,23 @@ unsigned RISCVTargetLowering::getNumRegistersForCallingConv(LLVMContext &Context // with 1/-1. static void translateSetCCForBranch(const SDLoc &DL, SDValue &LHS, SDValue &RHS, ISD::CondCode &CC, SelectionDAG &DAG) { + // If this is a single bit test that can't be handled by ANDI, shift the + // bit to be tested to the MSB and perform a signed compare with 0. + if (isIntEqualitySetCC(CC) && isNullConstant(RHS) && + LHS.getOpcode() == ISD::AND && LHS.hasOneUse() && + isa<ConstantSDNode>(LHS.getOperand(1))) { + uint64_t Mask = LHS.getConstantOperandVal(1); + if (isPowerOf2_64(Mask) && !isInt<12>(Mask)) { + CC = CC == ISD::SETEQ ? ISD::SETGE : ISD::SETLT; + unsigned ShAmt = LHS.getValueSizeInBits() - 1 - Log2_64(Mask); + LHS = LHS.getOperand(0); + if (ShAmt != 0) + LHS = DAG.getNode(ISD::SHL, DL, LHS.getValueType(), LHS, + DAG.getConstant(ShAmt, DL, LHS.getValueType())); + return; + } + } + // Convert X > -1 to X >= 0. if (CC == ISD::SETGT && isAllOnesConstant(RHS)) { RHS = DAG.getConstant(0, DL, RHS.getValueType()); @@ -3707,10 +3720,7 @@ SDValue RISCVTargetLowering::lowerGlobalAddress(SDValue Op, SDLoc DL(Op); GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op); assert(N->getOffset() == 0 && "unexpected offset in global node"); - - const GlobalValue *GV = N->getGlobal(); - bool IsLocal = getTargetMachine().shouldAssumeDSOLocal(*GV->getParent(), GV); - return getAddr(N, DAG, IsLocal); + return getAddr(N, DAG, N->getGlobal()->isDSOLocal()); } SDValue RISCVTargetLowering::lowerBlockAddress(SDValue Op, @@ -8130,6 +8140,50 @@ static SDValue performXORCombine(SDNode *N, SelectionDAG &DAG) { return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ false); } +// Replace (seteq (i64 (and X, 0xffffffff)), C1) with +// (seteq (i64 (sext_inreg (X, i32)), C1')) where C1' is C1 sign extended from +// bit 31. Same for setne. C1' may be cheaper to materialize and the sext_inreg +// can become a sext.w instead of a shift pair. +static SDValue performSETCCCombine(SDNode *N, SelectionDAG &DAG, + const RISCVSubtarget &Subtarget) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + EVT VT = N->getValueType(0); + EVT OpVT = N0.getValueType(); + + if (OpVT != MVT::i64 || !Subtarget.is64Bit()) + return SDValue(); + + // RHS needs to be a constant. + auto *N1C = dyn_cast<ConstantSDNode>(N1); + if (!N1C) + return SDValue(); + + // LHS needs to be (and X, 0xffffffff). + if (N0.getOpcode() != ISD::AND || !N0.hasOneUse() || + !isa<ConstantSDNode>(N0.getOperand(1)) || + N0.getConstantOperandVal(1) != UINT64_C(0xffffffff)) + return SDValue(); + + // Looking for an equality compare. + ISD::CondCode Cond = cast<CondCodeSDNode>(N->getOperand(2))->get(); + if (!isIntEqualitySetCC(Cond)) + return SDValue(); + + const APInt &C1 = cast<ConstantSDNode>(N1)->getAPIntValue(); + + SDLoc dl(N); + // If the constant is larger than 2^32 - 1 it is impossible for both sides + // to be equal. + if (C1.getActiveBits() > 32) + return DAG.getBoolConstant(Cond == ISD::SETNE, dl, VT, OpVT); + + SDValue SExtOp = DAG.getNode(ISD::SIGN_EXTEND_INREG, N, OpVT, + N0.getOperand(0), DAG.getValueType(MVT::i32)); + return DAG.getSetCC(dl, VT, SExtOp, DAG.getConstant(C1.trunc(32).sext(64), + dl, OpVT), Cond); +} + static SDValue performSIGN_EXTEND_INREGCombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget) { @@ -8658,6 +8712,75 @@ static SDValue performSRACombine(SDNode *N, SelectionDAG &DAG, DAG.getConstant(32 - ShAmt, DL, MVT::i64)); } +// Perform common combines for BR_CC and SELECT_CC condtions. +static bool combine_CC(SDValue &LHS, SDValue &RHS, SDValue &CC, const SDLoc &DL, + SelectionDAG &DAG, const RISCVSubtarget &Subtarget) { + ISD::CondCode CCVal = cast<CondCodeSDNode>(CC)->get(); + if (!ISD::isIntEqualitySetCC(CCVal)) + return false; + + // Fold ((setlt X, Y), 0, ne) -> (X, Y, lt) + // Sometimes the setcc is introduced after br_cc/select_cc has been formed. + if (LHS.getOpcode() == ISD::SETCC && isNullConstant(RHS) && + LHS.getOperand(0).getValueType() == Subtarget.getXLenVT()) { + // If we're looking for eq 0 instead of ne 0, we need to invert the + // condition. + bool Invert = CCVal == ISD::SETEQ; + CCVal = cast<CondCodeSDNode>(LHS.getOperand(2))->get(); + if (Invert) + CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType()); + + RHS = LHS.getOperand(1); + LHS = LHS.getOperand(0); + translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG); + + CC = DAG.getCondCode(CCVal); + return true; + } + + // Fold ((xor X, Y), 0, eq/ne) -> (X, Y, eq/ne) + if (LHS.getOpcode() == ISD::XOR && isNullConstant(RHS)) { + RHS = LHS.getOperand(1); + LHS = LHS.getOperand(0); + return true; + } + + // Fold ((srl (and X, 1<<C), C), 0, eq/ne) -> ((shl X, XLen-1-C), 0, ge/lt) + if (isNullConstant(RHS) && LHS.getOpcode() == ISD::SRL && LHS.hasOneUse() && + LHS.getOperand(1).getOpcode() == ISD::Constant) { + SDValue LHS0 = LHS.getOperand(0); + if (LHS0.getOpcode() == ISD::AND && + LHS0.getOperand(1).getOpcode() == ISD::Constant) { + uint64_t Mask = LHS0.getConstantOperandVal(1); + uint64_t ShAmt = LHS.getConstantOperandVal(1); + if (isPowerOf2_64(Mask) && Log2_64(Mask) == ShAmt) { + CCVal = CCVal == ISD::SETEQ ? ISD::SETGE : ISD::SETLT; + CC = DAG.getCondCode(CCVal); + + ShAmt = LHS.getValueSizeInBits() - 1 - ShAmt; + LHS = LHS0.getOperand(0); + if (ShAmt != 0) + LHS = + DAG.getNode(ISD::SHL, DL, LHS.getValueType(), LHS0.getOperand(0), + DAG.getConstant(ShAmt, DL, LHS.getValueType())); + return true; + } + } + } + + // (X, 1, setne) -> // (X, 0, seteq) if we can prove X is 0/1. + // This can occur when legalizing some floating point comparisons. + APInt Mask = APInt::getBitsSetFrom(LHS.getValueSizeInBits(), 1); + if (isOneConstant(RHS) && DAG.MaskedValueIsZero(LHS, Mask)) { + CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType()); + CC = DAG.getCondCode(CCVal); + RHS = DAG.getConstant(0, DL, LHS.getValueType()); + return true; + } + + return false; +} + SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const { SelectionDAG &DAG = DCI.DAG; @@ -8872,6 +8995,8 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N, case ISD::FMAXNUM: case ISD::FMINNUM: return combineBinOpToReduce(N, DAG); + case ISD::SETCC: + return performSETCCCombine(N, DAG, Subtarget); case ISD::SIGN_EXTEND_INREG: return performSIGN_EXTEND_INREGCombine(N, DAG, Subtarget); case ISD::ZERO_EXTEND: @@ -8900,110 +9025,32 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N, // Transform SDValue LHS = N->getOperand(0); SDValue RHS = N->getOperand(1); + SDValue CC = N->getOperand(2); SDValue TrueV = N->getOperand(3); SDValue FalseV = N->getOperand(4); + SDLoc DL(N); // If the True and False values are the same, we don't need a select_cc. if (TrueV == FalseV) return TrueV; - ISD::CondCode CCVal = cast<CondCodeSDNode>(N->getOperand(2))->get(); - if (!ISD::isIntEqualitySetCC(CCVal)) - break; - - // Fold (select_cc (setlt X, Y), 0, ne, trueV, falseV) -> - // (select_cc X, Y, lt, trueV, falseV) - // Sometimes the setcc is introduced after select_cc has been formed. - if (LHS.getOpcode() == ISD::SETCC && isNullConstant(RHS) && - LHS.getOperand(0).getValueType() == Subtarget.getXLenVT()) { - // If we're looking for eq 0 instead of ne 0, we need to invert the - // condition. - bool Invert = CCVal == ISD::SETEQ; - CCVal = cast<CondCodeSDNode>(LHS.getOperand(2))->get(); - if (Invert) - CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType()); - - SDLoc DL(N); - RHS = LHS.getOperand(1); - LHS = LHS.getOperand(0); - translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG); - - SDValue TargetCC = DAG.getCondCode(CCVal); - return DAG.getNode(RISCVISD::SELECT_CC, DL, N->getValueType(0), - {LHS, RHS, TargetCC, TrueV, FalseV}); - } - - // Fold (select_cc (xor X, Y), 0, eq/ne, trueV, falseV) -> - // (select_cc X, Y, eq/ne, trueV, falseV) - if (LHS.getOpcode() == ISD::XOR && isNullConstant(RHS)) - return DAG.getNode(RISCVISD::SELECT_CC, SDLoc(N), N->getValueType(0), - {LHS.getOperand(0), LHS.getOperand(1), - N->getOperand(2), TrueV, FalseV}); - // (select_cc X, 1, setne, trueV, falseV) -> - // (select_cc X, 0, seteq, trueV, falseV) if we can prove X is 0/1. - // This can occur when legalizing some floating point comparisons. - APInt Mask = APInt::getBitsSetFrom(LHS.getValueSizeInBits(), 1); - if (isOneConstant(RHS) && DAG.MaskedValueIsZero(LHS, Mask)) { - SDLoc DL(N); - CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType()); - SDValue TargetCC = DAG.getCondCode(CCVal); - RHS = DAG.getConstant(0, DL, LHS.getValueType()); + if (combine_CC(LHS, RHS, CC, DL, DAG, Subtarget)) return DAG.getNode(RISCVISD::SELECT_CC, DL, N->getValueType(0), - {LHS, RHS, TargetCC, TrueV, FalseV}); - } + {LHS, RHS, CC, TrueV, FalseV}); - break; + return SDValue(); } case RISCVISD::BR_CC: { SDValue LHS = N->getOperand(1); SDValue RHS = N->getOperand(2); - ISD::CondCode CCVal = cast<CondCodeSDNode>(N->getOperand(3))->get(); - if (!ISD::isIntEqualitySetCC(CCVal)) - break; - - // Fold (br_cc (setlt X, Y), 0, ne, dest) -> - // (br_cc X, Y, lt, dest) - // Sometimes the setcc is introduced after br_cc has been formed. - if (LHS.getOpcode() == ISD::SETCC && isNullConstant(RHS) && - LHS.getOperand(0).getValueType() == Subtarget.getXLenVT()) { - // If we're looking for eq 0 instead of ne 0, we need to invert the - // condition. - bool Invert = CCVal == ISD::SETEQ; - CCVal = cast<CondCodeSDNode>(LHS.getOperand(2))->get(); - if (Invert) - CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType()); - - SDLoc DL(N); - RHS = LHS.getOperand(1); - LHS = LHS.getOperand(0); - translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG); + SDValue CC = N->getOperand(3); + SDLoc DL(N); + if (combine_CC(LHS, RHS, CC, DL, DAG, Subtarget)) return DAG.getNode(RISCVISD::BR_CC, DL, N->getValueType(0), - N->getOperand(0), LHS, RHS, DAG.getCondCode(CCVal), - N->getOperand(4)); - } - - // Fold (br_cc (xor X, Y), 0, eq/ne, dest) -> - // (br_cc X, Y, eq/ne, trueV, falseV) - if (LHS.getOpcode() == ISD::XOR && isNullConstant(RHS)) - return DAG.getNode(RISCVISD::BR_CC, SDLoc(N), N->getValueType(0), - N->getOperand(0), LHS.getOperand(0), LHS.getOperand(1), - N->getOperand(3), N->getOperand(4)); - - // (br_cc X, 1, setne, br_cc) -> - // (br_cc X, 0, seteq, br_cc) if we can prove X is 0/1. - // This can occur when legalizing some floating point comparisons. - APInt Mask = APInt::getBitsSetFrom(LHS.getValueSizeInBits(), 1); - if (isOneConstant(RHS) && DAG.MaskedValueIsZero(LHS, Mask)) { - SDLoc DL(N); - CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType()); - SDValue TargetCC = DAG.getCondCode(CCVal); - RHS = DAG.getConstant(0, DL, LHS.getValueType()); - return DAG.getNode(RISCVISD::BR_CC, DL, N->getValueType(0), - N->getOperand(0), LHS, RHS, TargetCC, - N->getOperand(4)); - } - break; + N->getOperand(0), LHS, RHS, CC, N->getOperand(4)); + + return SDValue(); } case ISD::BITREVERSE: return performBITREVERSECombine(N, DAG, Subtarget); @@ -9299,6 +9346,10 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N, bool RISCVTargetLowering::isDesirableToCommuteWithShift( const SDNode *N, CombineLevel Level) const { + assert((N->getOpcode() == ISD::SHL || N->getOpcode() == ISD::SRA || + N->getOpcode() == ISD::SRL) && + "Expected shift op"); + // The following folds are only desirable if `(OP _, c1 << c2)` can be // materialised in fewer instructions than `(OP _, c1)`: // @@ -9357,7 +9408,8 @@ bool RISCVTargetLowering::targetShrinkDemandedConstant( return false; // Only handle AND for now. - if (Op.getOpcode() != ISD::AND) + unsigned Opcode = Op.getOpcode(); + if (Opcode != ISD::AND && Opcode != ISD::OR && Opcode != ISD::XOR) return false; ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1)); @@ -9376,12 +9428,13 @@ bool RISCVTargetLowering::targetShrinkDemandedConstant( auto IsLegalMask = [ShrunkMask, ExpandedMask](const APInt &Mask) -> bool { return ShrunkMask.isSubsetOf(Mask) && Mask.isSubsetOf(ExpandedMask); }; - auto UseMask = [Mask, Op, VT, &TLO](const APInt &NewMask) -> bool { + auto UseMask = [Mask, Op, &TLO](const APInt &NewMask) -> bool { if (NewMask == Mask) return true; SDLoc DL(Op); - SDValue NewC = TLO.DAG.getConstant(NewMask, DL, VT); - SDValue NewOp = TLO.DAG.getNode(ISD::AND, DL, VT, Op.getOperand(0), NewC); + SDValue NewC = TLO.DAG.getConstant(NewMask, DL, Op.getValueType()); + SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), DL, Op.getValueType(), + Op.getOperand(0), NewC); return TLO.CombineTo(Op, NewOp); }; @@ -9390,18 +9443,21 @@ bool RISCVTargetLowering::targetShrinkDemandedConstant( if (ShrunkMask.isSignedIntN(12)) return false; - // Preserve (and X, 0xffff) when zext.h is supported. - if (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbp()) { - APInt NewMask = APInt(Mask.getBitWidth(), 0xffff); - if (IsLegalMask(NewMask)) - return UseMask(NewMask); - } + // And has a few special cases for zext. + if (Opcode == ISD::AND) { + // Preserve (and X, 0xffff) when zext.h is supported. + if (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbp()) { + APInt NewMask = APInt(Mask.getBitWidth(), 0xffff); + if (IsLegalMask(NewMask)) + return UseMask(NewMask); + } - // Try to preserve (and X, 0xffffffff), the (zext_inreg X, i32) pattern. - if (VT == MVT::i64) { - APInt NewMask = APInt(64, 0xffffffff); - if (IsLegalMask(NewMask)) - return UseMask(NewMask); + // Try to preserve (and X, 0xffffffff), the (zext_inreg X, i32) pattern. + if (VT == MVT::i64) { + APInt NewMask = APInt(64, 0xffffffff); + if (IsLegalMask(NewMask)) + return UseMask(NewMask); + } } // For the remaining optimizations, we need to be able to make a negative @@ -9414,10 +9470,11 @@ bool RISCVTargetLowering::targetShrinkDemandedConstant( // Try to make a 12 bit negative immediate. If that fails try to make a 32 // bit negative immediate unless the shrunk immediate already fits in 32 bits. + // If we can't create a simm12, we shouldn't change opaque constants. APInt NewMask = ShrunkMask; if (MinSignedBits <= 12) NewMask.setBitsFrom(11); - else if (MinSignedBits <= 32 && !ShrunkMask.isSignedIntN(32)) + else if (!C->isOpaque() && MinSignedBits <= 32 && !ShrunkMask.isSignedIntN(32)) NewMask.setBitsFrom(31); else return false; @@ -10015,15 +10072,15 @@ static MachineBasicBlock *emitSelectPseudo(MachineInstr &MI, LastSelectPseudo = &*SequenceMBBI; SequenceMBBI->collectDebugValues(SelectDebugValues); SelectDests.insert(SequenceMBBI->getOperand(0).getReg()); - } else { - if (SequenceMBBI->hasUnmodeledSideEffects() || - SequenceMBBI->mayLoadOrStore()) - break; - if (llvm::any_of(SequenceMBBI->operands(), [&](MachineOperand &MO) { - return MO.isReg() && MO.isUse() && SelectDests.count(MO.getReg()); - })) - break; + continue; } + if (SequenceMBBI->hasUnmodeledSideEffects() || + SequenceMBBI->mayLoadOrStore()) + break; + if (llvm::any_of(SequenceMBBI->operands(), [&](MachineOperand &MO) { + return MO.isReg() && MO.isUse() && SelectDests.count(MO.getReg()); + })) + break; } const RISCVInstrInfo &TII = *Subtarget.getInstrInfo(); @@ -12159,7 +12216,8 @@ bool RISCVTargetLowering::isVScaleKnownToBeAPowerOfTwo() const { // FIXME: This doesn't work for zve32, but that's already broken // elsewhere for the same reason. assert(Subtarget.getRealMinVLen() >= 64 && "zve32* unsupported"); - assert(RISCV::RVVBitsPerBlock == 64 && "RVVBitsPerBlock changed, audit needed"); + static_assert(RISCV::RVVBitsPerBlock == 64, + "RVVBitsPerBlock changed, audit needed"); return true; } @@ -12214,10 +12272,12 @@ bool RISCVTargetLowering::shouldSignExtendTypeInLibCall(EVT Type, bool IsSigned) bool RISCVTargetLowering::decomposeMulByConstant(LLVMContext &Context, EVT VT, SDValue C) const { // Check integral scalar types. + const bool HasExtMOrZmmul = + Subtarget.hasStdExtM() || Subtarget.hasStdExtZmmul(); if (VT.isScalarInteger()) { // Omit the optimization if the sub target has the M extension and the data // size exceeds XLen. - if (Subtarget.hasStdExtM() && VT.getSizeInBits() > Subtarget.getXLen()) + if (HasExtMOrZmmul && VT.getSizeInBits() > Subtarget.getXLen()) return false; if (auto *ConstNode = dyn_cast<ConstantSDNode>(C.getNode())) { // Break the MUL to a SLLI and an ADD/SUB. @@ -12232,7 +12292,7 @@ bool RISCVTargetLowering::decomposeMulByConstant(LLVMContext &Context, EVT VT, return true; // Omit the following optimization if the sub target has the M extension // and the data size >= XLen. - if (Subtarget.hasStdExtM() && VT.getSizeInBits() >= Subtarget.getXLen()) + if (HasExtMOrZmmul && VT.getSizeInBits() >= Subtarget.getXLen()) return false; // Break the MUL to two SLLI instructions and an ADD/SUB, if Imm needs // a pair of LUI/ADDI. diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp index 685604ad9a59..75a79895330f 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp @@ -637,6 +637,64 @@ void RISCVInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, } } +MachineInstr *RISCVInstrInfo::foldMemoryOperandImpl( + MachineFunction &MF, MachineInstr &MI, ArrayRef<unsigned> Ops, + MachineBasicBlock::iterator InsertPt, int FrameIndex, LiveIntervals *LIS, + VirtRegMap *VRM) const { + const MachineFrameInfo &MFI = MF.getFrameInfo(); + + // The below optimizations narrow the load so they are only valid for little + // endian. + // TODO: Support big endian by adding an offset into the frame object? + if (MF.getDataLayout().isBigEndian()) + return nullptr; + + // Fold load from stack followed by sext.w into lw. + // TODO: Fold with sext.b, sext.h, zext.b, zext.h, zext.w? + if (Ops.size() != 1 || Ops[0] != 1) + return nullptr; + + unsigned LoadOpc; + switch (MI.getOpcode()) { + default: + if (RISCV::isSEXT_W(MI)) { + LoadOpc = RISCV::LW; + break; + } + if (RISCV::isZEXT_W(MI)) { + LoadOpc = RISCV::LWU; + break; + } + if (RISCV::isZEXT_B(MI)) { + LoadOpc = RISCV::LBU; + break; + } + return nullptr; + case RISCV::SEXT_H: + LoadOpc = RISCV::LH; + break; + case RISCV::SEXT_B: + LoadOpc = RISCV::LB; + break; + case RISCV::ZEXT_H_RV32: + case RISCV::ZEXT_H_RV64: + LoadOpc = RISCV::LHU; + break; + } + + MachineMemOperand *MMO = MF.getMachineMemOperand( + MachinePointerInfo::getFixedStack(MF, FrameIndex), + MachineMemOperand::MOLoad, MFI.getObjectSize(FrameIndex), + MFI.getObjectAlign(FrameIndex)); + + Register DstReg = MI.getOperand(0).getReg(); + return BuildMI(*MI.getParent(), InsertPt, MI.getDebugLoc(), get(LoadOpc), + DstReg) + .addFrameIndex(FrameIndex) + .addImm(0) + .addMemOperand(MMO); +} + void RISCVInstrInfo::movImm(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, Register DstReg, uint64_t Val, @@ -1799,17 +1857,30 @@ Register RISCVInstrInfo::getVLENFactoredAmount(MachineFunction &MF, .addReg(VL, RegState::Kill) .addImm(ShiftAmount) .setMIFlag(Flag); - } else if ((NumOfVReg == 3 || NumOfVReg == 5 || NumOfVReg == 9) && - STI.hasStdExtZba()) { - // We can use Zba SHXADD instructions for multiply in some cases. - // TODO: Generalize to SHXADD+SLLI. + } else if (STI.hasStdExtZba() && + ((NumOfVReg % 3 == 0 && isPowerOf2_64(NumOfVReg / 3)) || + (NumOfVReg % 5 == 0 && isPowerOf2_64(NumOfVReg / 5)) || + (NumOfVReg % 9 == 0 && isPowerOf2_64(NumOfVReg / 9)))) { + // We can use Zba SHXADD+SLLI instructions for multiply in some cases. unsigned Opc; - switch (NumOfVReg) { - default: llvm_unreachable("Unexpected number of vregs"); - case 3: Opc = RISCV::SH1ADD; break; - case 5: Opc = RISCV::SH2ADD; break; - case 9: Opc = RISCV::SH3ADD; break; + uint32_t ShiftAmount; + if (NumOfVReg % 9 == 0) { + Opc = RISCV::SH3ADD; + ShiftAmount = Log2_64(NumOfVReg / 9); + } else if (NumOfVReg % 5 == 0) { + Opc = RISCV::SH2ADD; + ShiftAmount = Log2_64(NumOfVReg / 5); + } else if (NumOfVReg % 3 == 0) { + Opc = RISCV::SH1ADD; + ShiftAmount = Log2_64(NumOfVReg / 3); + } else { + llvm_unreachable("Unexpected number of vregs"); } + if (ShiftAmount) + BuildMI(MBB, II, DL, get(RISCV::SLLI), VL) + .addReg(VL, RegState::Kill) + .addImm(ShiftAmount) + .setMIFlag(Flag); BuildMI(MBB, II, DL, get(Opc), VL) .addReg(VL, RegState::Kill) .addReg(VL) @@ -1839,10 +1910,11 @@ Register RISCVInstrInfo::getVLENFactoredAmount(MachineFunction &MF, } else { Register N = MRI.createVirtualRegister(&RISCV::GPRRegClass); movImm(MBB, II, DL, N, NumOfVReg, Flag); - if (!STI.hasStdExtM()) + if (!STI.hasStdExtM() && !STI.hasStdExtZmmul()) MF.getFunction().getContext().diagnose(DiagnosticInfoUnsupported{ MF.getFunction(), - "M-extension must be enabled to calculate the vscaled size/offset."}); + "M- or Zmmul-extension must be enabled to calculate the vscaled size/" + "offset."}); BuildMI(MBB, II, DL, get(RISCV::MUL), VL) .addReg(VL, RegState::Kill) .addReg(N, RegState::Kill) @@ -1852,6 +1924,24 @@ Register RISCVInstrInfo::getVLENFactoredAmount(MachineFunction &MF, return VL; } +// Returns true if this is the sext.w pattern, addiw rd, rs1, 0. +bool RISCV::isSEXT_W(const MachineInstr &MI) { + return MI.getOpcode() == RISCV::ADDIW && MI.getOperand(1).isReg() && + MI.getOperand(2).isImm() && MI.getOperand(2).getImm() == 0; +} + +// Returns true if this is the zext.w pattern, adduw rd, rs1, x0. +bool RISCV::isZEXT_W(const MachineInstr &MI) { + return MI.getOpcode() == RISCV::ADD_UW && MI.getOperand(1).isReg() && + MI.getOperand(2).isReg() && MI.getOperand(2).getReg() == RISCV::X0; +} + +// Returns true if this is the zext.b pattern, andi rd, rs1, 255. +bool RISCV::isZEXT_B(const MachineInstr &MI) { + return MI.getOpcode() == RISCV::ANDI && MI.getOperand(1).isReg() && + MI.getOperand(2).isImm() && MI.getOperand(2).getImm() == 255; +} + static bool isRVVWholeLoadStore(unsigned Opcode) { switch (Opcode) { default: diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.h b/llvm/lib/Target/RISCV/RISCVInstrInfo.h index 5368437618bd..4aa9ded5b3a2 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.h +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.h @@ -69,6 +69,14 @@ public: int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI) const override; + using TargetInstrInfo::foldMemoryOperandImpl; + MachineInstr *foldMemoryOperandImpl(MachineFunction &MF, MachineInstr &MI, + ArrayRef<unsigned> Ops, + MachineBasicBlock::iterator InsertPt, + int FrameIndex, + LiveIntervals *LIS = nullptr, + VirtRegMap *VRM = nullptr) const override; + // Materializes the given integer Val into DstReg. void movImm(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, Register DstReg, uint64_t Val, @@ -183,6 +191,11 @@ protected: namespace RISCV { +// Returns true if this is the sext.w pattern, addiw rd, rs1, 0. +bool isSEXT_W(const MachineInstr &MI); +bool isZEXT_W(const MachineInstr &MI); +bool isZEXT_B(const MachineInstr &MI); + // Returns true if the given MI is an RVV instruction opcode for which we may // expect to see a FrameIndex operand. bool isRVVSpill(const MachineInstr &MI); diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.td b/llvm/lib/Target/RISCV/RISCVInstrInfo.td index 06a90438838e..78fd09fbf387 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.td @@ -1278,6 +1278,13 @@ def : Pat<(setgt GPR:$rs1, simm12_minus1_nonzero:$imm), def : Pat<(setugt GPR:$rs1, simm12_minus1_nonzero:$imm), (XORI (SLTIU GPR:$rs1, (ImmPlus1 simm12_minus1_nonzero:$imm)), 1)>; +// If negating a pattern that requires an XORI above, we can fold the XORI with +// the NEG. The XORI is equivalent to 1-X and negating gives X-1. +def : Pat<(ineg (setuge GPR:$rs1, GPR:$rs2)), (ADDI (SLTU GPR:$rs1, GPR:$rs2), -1)>; +def : Pat<(ineg (setule GPR:$rs1, GPR:$rs2)), (ADDI (SLTU GPR:$rs2, GPR:$rs1), -1)>; +def : Pat<(ineg (setge GPR:$rs1, GPR:$rs2)), (ADDI (SLT GPR:$rs1, GPR:$rs2), -1)>; +def : Pat<(ineg (setle GPR:$rs1, GPR:$rs2)), (ADDI (SLT GPR:$rs2, GPR:$rs1), -1)>; + def IntCCtoRISCVCC : SDNodeXForm<riscv_selectcc, [{ ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get(); RISCVCC::CondCode BrCC = getRISCVCCForIntCC(CC); diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoM.td b/llvm/lib/Target/RISCV/RISCVInstrInfoM.td index 72ba8460116f..662604b138d2 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoM.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoM.td @@ -24,7 +24,7 @@ def riscv_remuw : SDNode<"RISCVISD::REMUW", SDT_RISCVIntBinOpW>; // Instructions //===----------------------------------------------------------------------===// -let Predicates = [HasStdExtM] in { +let Predicates = [HasStdExtMOrZmmul] in { def MUL : ALU_rr<0b0000001, 0b000, "mul", /*Commutable*/1>, Sched<[WriteIMul, ReadIMul, ReadIMul]>; def MULH : ALU_rr<0b0000001, 0b001, "mulh", /*Commutable*/1>, @@ -33,6 +33,9 @@ def MULHSU : ALU_rr<0b0000001, 0b010, "mulhsu">, Sched<[WriteIMul, ReadIMul, ReadIMul]>; def MULHU : ALU_rr<0b0000001, 0b011, "mulhu", /*Commutable*/1>, Sched<[WriteIMul, ReadIMul, ReadIMul]>; +} // Predicates = [HasStdExtMOrZmmul] + +let Predicates = [HasStdExtM] in { def DIV : ALU_rr<0b0000001, 0b100, "div">, Sched<[WriteIDiv, ReadIDiv, ReadIDiv]>; def DIVU : ALU_rr<0b0000001, 0b101, "divu">, @@ -43,9 +46,12 @@ def REMU : ALU_rr<0b0000001, 0b111, "remu">, Sched<[WriteIDiv, ReadIDiv, ReadIDiv]>; } // Predicates = [HasStdExtM] -let Predicates = [HasStdExtM, IsRV64] in { +let Predicates = [HasStdExtMOrZmmul, IsRV64] in { def MULW : ALUW_rr<0b0000001, 0b000, "mulw", /*Commutable*/1>, Sched<[WriteIMul32, ReadIMul32, ReadIMul32]>; +} // Predicates = [HasStdExtMOrZmmul, IsRV64] + +let Predicates = [HasStdExtM, IsRV64] in { def DIVW : ALUW_rr<0b0000001, 0b100, "divw">, Sched<[WriteIDiv32, ReadIDiv32, ReadIDiv32]>; def DIVUW : ALUW_rr<0b0000001, 0b101, "divuw">, @@ -60,21 +66,25 @@ def REMUW : ALUW_rr<0b0000001, 0b111, "remuw">, // Pseudo-instructions and codegen patterns //===----------------------------------------------------------------------===// -let Predicates = [HasStdExtM] in { +let Predicates = [HasStdExtMOrZmmul] in { def : PatGprGpr<mul, MUL>; def : PatGprGpr<mulhs, MULH>; def : PatGprGpr<mulhu, MULHU>; def : PatGprGpr<riscv_mulhsu, MULHSU>; +} // Predicates = [HasStdExtMOrZmmul] + +let Predicates = [HasStdExtM] in { def : PatGprGpr<sdiv, DIV>; def : PatGprGpr<udiv, DIVU>; def : PatGprGpr<srem, REM>; def : PatGprGpr<urem, REMU>; } // Predicates = [HasStdExtM] -let Predicates = [HasStdExtM, IsRV64] in { // Select W instructions if only the lower 32-bits of the result are used. +let Predicates = [HasStdExtMOrZmmul, IsRV64] in def : PatGprGpr<binop_allwusers<mul>, MULW>; +let Predicates = [HasStdExtM, IsRV64] in { def : PatGprGpr<riscv_divw, DIVW>; def : PatGprGpr<riscv_divuw, DIVUW>; def : PatGprGpr<riscv_remuw, REMUW>; @@ -96,11 +106,11 @@ def : Pat<(srem (sexti32 (i64 GPR:$rs1)), (sexti32 (i64 GPR:$rs2))), (REMW GPR:$rs1, GPR:$rs2)>; } // Predicates = [HasStdExtM, IsRV64] -let Predicates = [HasStdExtM, IsRV64, NotHasStdExtZba] in { +let Predicates = [HasStdExtMOrZmmul, IsRV64, NotHasStdExtZba] in { // Special case for calculating the full 64-bit product of a 32x32 unsigned // multiply where the inputs aren't known to be zero extended. We can shift the // inputs left by 32 and use a MULHU. This saves two SRLIs needed to finish // zeroing the upper 32 bits. def : Pat<(i64 (mul (and GPR:$rs1, 0xffffffff), (and GPR:$rs2, 0xffffffff))), (MULHU (SLLI GPR:$rs1, 32), (SLLI GPR:$rs2, 32))>; -} // Predicates = [HasStdExtM, IsRV64, NotHasStdExtZba] +} // Predicates = [HasStdExtMOrZmmul, IsRV64, NotHasStdExtZba] diff --git a/llvm/lib/Target/RISCV/RISCVSExtWRemoval.cpp b/llvm/lib/Target/RISCV/RISCVSExtWRemoval.cpp index dadf8f81a2c0..920729e9ebbf 100644 --- a/llvm/lib/Target/RISCV/RISCVSExtWRemoval.cpp +++ b/llvm/lib/Target/RISCV/RISCVSExtWRemoval.cpp @@ -443,8 +443,7 @@ bool RISCVSExtWRemoval::runOnMachineFunction(MachineFunction &MF) { MachineInstr *MI = &*I++; // We're looking for the sext.w pattern ADDIW rd, rs1, 0. - if (MI->getOpcode() != RISCV::ADDIW || !MI->getOperand(2).isImm() || - MI->getOperand(2).getImm() != 0 || !MI->getOperand(1).isReg()) + if (!RISCV::isSEXT_W(*MI)) continue; // Input should be a virtual register. diff --git a/llvm/lib/Target/RISCV/RISCVSubtarget.cpp b/llvm/lib/Target/RISCV/RISCVSubtarget.cpp index 7589b44b81d3..0446edefa979 100644 --- a/llvm/lib/Target/RISCV/RISCVSubtarget.cpp +++ b/llvm/lib/Target/RISCV/RISCVSubtarget.cpp @@ -202,11 +202,9 @@ bool RISCVSubtarget::useRVVForFixedLengthVectors() const { } bool RISCVSubtarget::enableSubRegLiveness() const { - if (EnableSubRegLiveness.getNumOccurrences()) - return EnableSubRegLiveness; - // Enable subregister liveness for RVV to better handle LMUL>1 and segment - // load/store. - return hasVInstructions(); + // FIXME: Enable subregister liveness by default for RVV to better handle + // LMUL>1 and segment load/store. + return EnableSubRegLiveness; } void RISCVSubtarget::getPostRAMutations( diff --git a/llvm/lib/Target/RISCV/RISCVSubtarget.h b/llvm/lib/Target/RISCV/RISCVSubtarget.h index 831f7fadaa62..6eb949fa551c 100644 --- a/llvm/lib/Target/RISCV/RISCVSubtarget.h +++ b/llvm/lib/Target/RISCV/RISCVSubtarget.h @@ -89,6 +89,7 @@ private: bool HasStdExtZicbom = false; bool HasStdExtZicboz = false; bool HasStdExtZicbop = false; + bool HasStdExtZmmul = false; bool HasRV64 = false; bool IsRV32E = false; bool EnableLinkerRelax = false; @@ -184,6 +185,7 @@ public: bool hasStdExtZicbom() const { return HasStdExtZicbom; } bool hasStdExtZicboz() const { return HasStdExtZicboz; } bool hasStdExtZicbop() const { return HasStdExtZicbop; } + bool hasStdExtZmmul() const { return HasStdExtZmmul; } bool is64Bit() const { return HasRV64; } bool isRV32E() const { return IsRV32E; } bool enableLinkerRelax() const { return EnableLinkerRelax; } diff --git a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp index b2707b753e87..50fcb00e6c63 100644 --- a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp +++ b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp @@ -49,6 +49,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeRISCVTarget() { initializeGlobalISel(*PR); initializeRISCVMakeCompressibleOptPass(*PR); initializeRISCVGatherScatterLoweringPass(*PR); + initializeRISCVCodeGenPreparePass(*PR); initializeRISCVMergeBaseOffsetOptPass(*PR); initializeRISCVSExtWRemovalPass(*PR); initializeRISCVExpandPseudoPass(*PR); @@ -187,7 +188,11 @@ TargetPassConfig *RISCVTargetMachine::createPassConfig(PassManagerBase &PM) { void RISCVPassConfig::addIRPasses() { addPass(createAtomicExpandPass()); - addPass(createRISCVGatherScatterLoweringPass()); + if (getOptLevel() != CodeGenOpt::None) + addPass(createRISCVGatherScatterLoweringPass()); + + if (getOptLevel() != CodeGenOpt::None) + addPass(createRISCVCodeGenPreparePass()); TargetPassConfig::addIRPasses(); } diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp index 29d3c5e491de..f9cd5ffb512b 100644 --- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp @@ -65,7 +65,7 @@ InstructionCost RISCVTTIImpl::getIntImmCostInst(unsigned Opcode, unsigned Idx, if (Imm == UINT64_C(0xffff) && ST->hasStdExtZbb()) return TTI::TCC_Free; // zext.w - if (Imm == UINT64_C(0xffffffff) && ST->hasStdExtZbb()) + if (Imm == UINT64_C(0xffffffff) && ST->hasStdExtZba()) return TTI::TCC_Free; LLVM_FALLTHROUGH; case Instruction::Add: @@ -198,6 +198,9 @@ InstructionCost RISCVTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, // vid.v v9 // vrsub.vx v10, v9, a0 // vrgather.vv v9, v8, v10 + if (Tp->getElementType()->isIntegerTy(1)) + // Mask operation additionally required extend and truncate + return LT.first * 9; return LT.first * 6; } } diff --git a/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVBaseInfo.cpp b/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVBaseInfo.cpp index 1a3e35a5f901..220fd76305aa 100644 --- a/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVBaseInfo.cpp +++ b/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVBaseInfo.cpp @@ -1068,5 +1068,15 @@ StringRef getKernelProfilingInfoName(KernelProfilingInfo e) { } llvm_unreachable("Unexpected operand"); } + +std::string getExtInstSetName(InstructionSet e) { + switch (e) { + CASE(InstructionSet, OpenCL_std) + CASE(InstructionSet, GLSL_std_450) + CASE(InstructionSet, SPV_AMD_shader_trinary_minmax) + break; + } + llvm_unreachable("Unexpected operand"); +} } // namespace SPIRV } // namespace llvm diff --git a/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVBaseInfo.h b/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVBaseInfo.h index 2aa9f076c78e..9482723993a2 100644 --- a/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVBaseInfo.h +++ b/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVBaseInfo.h @@ -706,6 +706,19 @@ enum class KernelProfilingInfo : uint32_t { CmdExecTime = 0x1, }; StringRef getKernelProfilingInfoName(KernelProfilingInfo e); + +enum class InstructionSet : uint32_t { + OpenCL_std = 0, + GLSL_std_450 = 1, + SPV_AMD_shader_trinary_minmax = 2, +}; +std::string getExtInstSetName(InstructionSet e); + +// TODO: implement other mnemonics. +enum class Opcode : uint32_t { + InBoundsPtrAccessChain = 70, + PtrCastToGeneric = 121, +}; } // namespace SPIRV } // namespace llvm diff --git a/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVInstPrinter.cpp b/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVInstPrinter.cpp index 3105baa02c90..d60e61f36270 100644 --- a/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVInstPrinter.cpp +++ b/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVInstPrinter.cpp @@ -59,7 +59,7 @@ void SPIRVInstPrinter::printOpConstantVarOps(const MCInst *MI, } void SPIRVInstPrinter::recordOpExtInstImport(const MCInst *MI) { - llvm_unreachable("Unimplemented recordOpExtInstImport"); + // TODO: insert {Reg, Set} into ExtInstSetIDs map. } void SPIRVInstPrinter::printInst(const MCInst *MI, uint64_t Address, @@ -176,7 +176,18 @@ void SPIRVInstPrinter::printInst(const MCInst *MI, uint64_t Address, } void SPIRVInstPrinter::printOpExtInst(const MCInst *MI, raw_ostream &O) { - llvm_unreachable("Unimplemented printOpExtInst"); + // The fixed operands have already been printed, so just need to decide what + // type of ExtInst operands to print based on the instruction set and number. + MCInstrDesc MCDesc = MII.get(MI->getOpcode()); + unsigned NumFixedOps = MCDesc.getNumOperands(); + const auto NumOps = MI->getNumOperands(); + if (NumOps == NumFixedOps) + return; + + O << ' '; + + // TODO: implement special printing for OpenCLExtInst::vstor*. + printRemainingVariableOps(MI, NumFixedOps, O, true); } void SPIRVInstPrinter::printOpDecorate(const MCInst *MI, raw_ostream &O) { diff --git a/llvm/lib/Target/SPIRV/SPIRV.h b/llvm/lib/Target/SPIRV/SPIRV.h index 8da54a5d6e61..5a7f2e51afb8 100644 --- a/llvm/lib/Target/SPIRV/SPIRV.h +++ b/llvm/lib/Target/SPIRV/SPIRV.h @@ -19,6 +19,7 @@ class SPIRVSubtarget; class InstructionSelector; class RegisterBankInfo; +ModulePass *createSPIRVPrepareFunctionsPass(); FunctionPass *createSPIRVPreLegalizerPass(); FunctionPass *createSPIRVEmitIntrinsicsPass(SPIRVTargetMachine *TM); InstructionSelector * diff --git a/llvm/lib/Target/SPIRV/SPIRVAsmPrinter.cpp b/llvm/lib/Target/SPIRV/SPIRVAsmPrinter.cpp index 605bf949187f..6d60bd5e3c97 100644 --- a/llvm/lib/Target/SPIRV/SPIRVAsmPrinter.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVAsmPrinter.cpp @@ -21,6 +21,7 @@ #include "SPIRVUtils.h" #include "TargetInfo/SPIRVTargetInfo.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/Analysis/ValueTracking.h" #include "llvm/CodeGen/AsmPrinter.h" #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineFunctionPass.h" @@ -58,9 +59,14 @@ public: void outputModuleSection(SPIRV::ModuleSectionType MSType); void outputEntryPoints(); void outputDebugSourceAndStrings(const Module &M); + void outputOpExtInstImports(const Module &M); void outputOpMemoryModel(); void outputOpFunctionEnd(); void outputExtFuncDecls(); + void outputExecutionModeFromMDNode(Register Reg, MDNode *Node, + SPIRV::ExecutionMode EM); + void outputExecutionMode(const Module &M); + void outputAnnotations(const Module &M); void outputModuleSections(); void emitInstruction(const MachineInstr *MI) override; @@ -127,6 +133,8 @@ void SPIRVAsmPrinter::emitFunctionBodyEnd() { } void SPIRVAsmPrinter::emitOpLabel(const MachineBasicBlock &MBB) { + if (MAI->MBBsToSkip.contains(&MBB)) + return; MCInst LabelInst; LabelInst.setOpcode(SPIRV::OpLabel); LabelInst.addOperand(MCOperand::createReg(MAI->getOrCreateMBBRegister(MBB))); @@ -237,6 +245,13 @@ void SPIRVAsmPrinter::outputModuleSection(SPIRV::ModuleSectionType MSType) { } void SPIRVAsmPrinter::outputDebugSourceAndStrings(const Module &M) { + // Output OpSourceExtensions. + for (auto &Str : MAI->SrcExt) { + MCInst Inst; + Inst.setOpcode(SPIRV::OpSourceExtension); + addStringImm(Str.first(), Inst); + outputMCInst(Inst); + } // Output OpSource. MCInst Inst; Inst.setOpcode(SPIRV::OpSource); @@ -246,6 +261,19 @@ void SPIRVAsmPrinter::outputDebugSourceAndStrings(const Module &M) { outputMCInst(Inst); } +void SPIRVAsmPrinter::outputOpExtInstImports(const Module &M) { + for (auto &CU : MAI->ExtInstSetMap) { + unsigned Set = CU.first; + Register Reg = CU.second; + MCInst Inst; + Inst.setOpcode(SPIRV::OpExtInstImport); + Inst.addOperand(MCOperand::createReg(Reg)); + addStringImm(getExtInstSetName(static_cast<SPIRV::InstructionSet>(Set)), + Inst); + outputMCInst(Inst); + } +} + void SPIRVAsmPrinter::outputOpMemoryModel() { MCInst Inst; Inst.setOpcode(SPIRV::OpMemoryModel); @@ -301,6 +329,135 @@ void SPIRVAsmPrinter::outputExtFuncDecls() { } } +// Encode LLVM type by SPIR-V execution mode VecTypeHint. +static unsigned encodeVecTypeHint(Type *Ty) { + if (Ty->isHalfTy()) + return 4; + if (Ty->isFloatTy()) + return 5; + if (Ty->isDoubleTy()) + return 6; + if (IntegerType *IntTy = dyn_cast<IntegerType>(Ty)) { + switch (IntTy->getIntegerBitWidth()) { + case 8: + return 0; + case 16: + return 1; + case 32: + return 2; + case 64: + return 3; + default: + llvm_unreachable("invalid integer type"); + } + } + if (FixedVectorType *VecTy = dyn_cast<FixedVectorType>(Ty)) { + Type *EleTy = VecTy->getElementType(); + unsigned Size = VecTy->getNumElements(); + return Size << 16 | encodeVecTypeHint(EleTy); + } + llvm_unreachable("invalid type"); +} + +static void addOpsFromMDNode(MDNode *MDN, MCInst &Inst, + SPIRV::ModuleAnalysisInfo *MAI) { + for (const MDOperand &MDOp : MDN->operands()) { + if (auto *CMeta = dyn_cast<ConstantAsMetadata>(MDOp)) { + Constant *C = CMeta->getValue(); + if (ConstantInt *Const = dyn_cast<ConstantInt>(C)) { + Inst.addOperand(MCOperand::createImm(Const->getZExtValue())); + } else if (auto *CE = dyn_cast<Function>(C)) { + Register FuncReg = MAI->getFuncReg(CE->getName().str()); + assert(FuncReg.isValid()); + Inst.addOperand(MCOperand::createReg(FuncReg)); + } + } + } +} + +void SPIRVAsmPrinter::outputExecutionModeFromMDNode(Register Reg, MDNode *Node, + SPIRV::ExecutionMode EM) { + MCInst Inst; + Inst.setOpcode(SPIRV::OpExecutionMode); + Inst.addOperand(MCOperand::createReg(Reg)); + Inst.addOperand(MCOperand::createImm(static_cast<unsigned>(EM))); + addOpsFromMDNode(Node, Inst, MAI); + outputMCInst(Inst); +} + +void SPIRVAsmPrinter::outputExecutionMode(const Module &M) { + NamedMDNode *Node = M.getNamedMetadata("spirv.ExecutionMode"); + if (Node) { + for (unsigned i = 0; i < Node->getNumOperands(); i++) { + MCInst Inst; + Inst.setOpcode(SPIRV::OpExecutionMode); + addOpsFromMDNode(cast<MDNode>(Node->getOperand(i)), Inst, MAI); + outputMCInst(Inst); + } + } + for (auto FI = M.begin(), E = M.end(); FI != E; ++FI) { + const Function &F = *FI; + if (F.isDeclaration()) + continue; + Register FReg = MAI->getFuncReg(F.getGlobalIdentifier()); + assert(FReg.isValid()); + if (MDNode *Node = F.getMetadata("reqd_work_group_size")) + outputExecutionModeFromMDNode(FReg, Node, + SPIRV::ExecutionMode::LocalSize); + if (MDNode *Node = F.getMetadata("work_group_size_hint")) + outputExecutionModeFromMDNode(FReg, Node, + SPIRV::ExecutionMode::LocalSizeHint); + if (MDNode *Node = F.getMetadata("intel_reqd_sub_group_size")) + outputExecutionModeFromMDNode(FReg, Node, + SPIRV::ExecutionMode::SubgroupSize); + if (MDNode *Node = F.getMetadata("vec_type_hint")) { + MCInst Inst; + Inst.setOpcode(SPIRV::OpExecutionMode); + Inst.addOperand(MCOperand::createReg(FReg)); + unsigned EM = static_cast<unsigned>(SPIRV::ExecutionMode::VecTypeHint); + Inst.addOperand(MCOperand::createImm(EM)); + unsigned TypeCode = encodeVecTypeHint(getMDOperandAsType(Node, 0)); + Inst.addOperand(MCOperand::createImm(TypeCode)); + outputMCInst(Inst); + } + } +} + +void SPIRVAsmPrinter::outputAnnotations(const Module &M) { + outputModuleSection(SPIRV::MB_Annotations); + // Process llvm.global.annotations special global variable. + for (auto F = M.global_begin(), E = M.global_end(); F != E; ++F) { + if ((*F).getName() != "llvm.global.annotations") + continue; + const GlobalVariable *V = &(*F); + const ConstantArray *CA = cast<ConstantArray>(V->getOperand(0)); + for (Value *Op : CA->operands()) { + ConstantStruct *CS = cast<ConstantStruct>(Op); + // The first field of the struct contains a pointer to + // the annotated variable. + Value *AnnotatedVar = CS->getOperand(0)->stripPointerCasts(); + if (!isa<Function>(AnnotatedVar)) + llvm_unreachable("Unsupported value in llvm.global.annotations"); + Function *Func = cast<Function>(AnnotatedVar); + Register Reg = MAI->getFuncReg(Func->getGlobalIdentifier()); + + // The second field contains a pointer to a global annotation string. + GlobalVariable *GV = + cast<GlobalVariable>(CS->getOperand(1)->stripPointerCasts()); + + StringRef AnnotationString; + getConstantStringInfo(GV, AnnotationString); + MCInst Inst; + Inst.setOpcode(SPIRV::OpDecorate); + Inst.addOperand(MCOperand::createReg(Reg)); + unsigned Dec = static_cast<unsigned>(SPIRV::Decoration::UserSemantic); + Inst.addOperand(MCOperand::createImm(Dec)); + addStringImm(AnnotationString, Inst); + outputMCInst(Inst); + } + } +} + void SPIRVAsmPrinter::outputModuleSections() { const Module *M = MMI->getModule(); // Get the global subtarget to output module-level info. @@ -311,13 +468,14 @@ void SPIRVAsmPrinter::outputModuleSections() { // Output instructions according to the Logical Layout of a Module: // TODO: 1,2. All OpCapability instructions, then optional OpExtension // instructions. - // TODO: 3. Optional OpExtInstImport instructions. + // 3. Optional OpExtInstImport instructions. + outputOpExtInstImports(*M); // 4. The single required OpMemoryModel instruction. outputOpMemoryModel(); // 5. All entry point declarations, using OpEntryPoint. outputEntryPoints(); // 6. Execution-mode declarations, using OpExecutionMode or OpExecutionModeId. - // TODO: + outputExecutionMode(*M); // 7a. Debug: all OpString, OpSourceExtension, OpSource, and // OpSourceContinued, without forward references. outputDebugSourceAndStrings(*M); @@ -326,7 +484,7 @@ void SPIRVAsmPrinter::outputModuleSections() { // 7c. Debug: all OpModuleProcessed instructions. outputModuleSection(SPIRV::MB_DebugModuleProcessed); // 8. All annotation instructions (all decorations). - outputModuleSection(SPIRV::MB_Annotations); + outputAnnotations(*M); // 9. All type declarations (OpTypeXXX instructions), all constant // instructions, and all global variable declarations. This section is // the first section to allow use of: OpLine and OpNoLine debug information; diff --git a/llvm/lib/Target/SPIRV/SPIRVCallLowering.cpp b/llvm/lib/Target/SPIRV/SPIRVCallLowering.cpp index 5b6b82aebf30..e8fedfeffde7 100644 --- a/llvm/lib/Target/SPIRV/SPIRVCallLowering.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVCallLowering.cpp @@ -24,9 +24,8 @@ using namespace llvm; SPIRVCallLowering::SPIRVCallLowering(const SPIRVTargetLowering &TLI, - const SPIRVSubtarget &ST, SPIRVGlobalRegistry *GR) - : CallLowering(&TLI), ST(ST), GR(GR) {} + : CallLowering(&TLI), GR(GR) {} bool SPIRVCallLowering::lowerReturn(MachineIRBuilder &MIRBuilder, const Value *Val, ArrayRef<Register> VRegs, @@ -36,11 +35,13 @@ bool SPIRVCallLowering::lowerReturn(MachineIRBuilder &MIRBuilder, // TODO: handle the case of multiple registers. if (VRegs.size() > 1) return false; - if (Val) + if (Val) { + const auto &STI = MIRBuilder.getMF().getSubtarget(); return MIRBuilder.buildInstr(SPIRV::OpReturnValue) .addUse(VRegs[0]) - .constrainAllUses(MIRBuilder.getTII(), *ST.getRegisterInfo(), - *ST.getRegBankInfo()); + .constrainAllUses(MIRBuilder.getTII(), *STI.getRegisterInfo(), + *STI.getRegBankInfo()); + } MIRBuilder.buildInstr(SPIRV::OpReturn); return true; } @@ -63,6 +64,56 @@ static uint32_t getFunctionControl(const Function &F) { return FuncControl; } +static ConstantInt *getConstInt(MDNode *MD, unsigned NumOp) { + if (MD->getNumOperands() > NumOp) { + auto *CMeta = dyn_cast<ConstantAsMetadata>(MD->getOperand(NumOp)); + if (CMeta) + return dyn_cast<ConstantInt>(CMeta->getValue()); + } + return nullptr; +} + +// This code restores function args/retvalue types for composite cases +// because the final types should still be aggregate whereas they're i32 +// during the translation to cope with aggregate flattening etc. +static FunctionType *getOriginalFunctionType(const Function &F) { + auto *NamedMD = F.getParent()->getNamedMetadata("spv.cloned_funcs"); + if (NamedMD == nullptr) + return F.getFunctionType(); + + Type *RetTy = F.getFunctionType()->getReturnType(); + SmallVector<Type *, 4> ArgTypes; + for (auto &Arg : F.args()) + ArgTypes.push_back(Arg.getType()); + + auto ThisFuncMDIt = + std::find_if(NamedMD->op_begin(), NamedMD->op_end(), [&F](MDNode *N) { + return isa<MDString>(N->getOperand(0)) && + cast<MDString>(N->getOperand(0))->getString() == F.getName(); + }); + // TODO: probably one function can have numerous type mutations, + // so we should support this. + if (ThisFuncMDIt != NamedMD->op_end()) { + auto *ThisFuncMD = *ThisFuncMDIt; + MDNode *MD = dyn_cast<MDNode>(ThisFuncMD->getOperand(1)); + assert(MD && "MDNode operand is expected"); + ConstantInt *Const = getConstInt(MD, 0); + if (Const) { + auto *CMeta = dyn_cast<ConstantAsMetadata>(MD->getOperand(1)); + assert(CMeta && "ConstantAsMetadata operand is expected"); + assert(Const->getSExtValue() >= -1); + // Currently -1 indicates return value, greater values mean + // argument numbers. + if (Const->getSExtValue() == -1) + RetTy = CMeta->getType(); + else + ArgTypes[Const->getSExtValue()] = CMeta->getType(); + } + } + + return FunctionType::get(RetTy, ArgTypes, F.isVarArg()); +} + bool SPIRVCallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder, const Function &F, ArrayRef<ArrayRef<Register>> VRegs, @@ -71,7 +122,8 @@ bool SPIRVCallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder, GR->setCurrentFunc(MIRBuilder.getMF()); // Assign types and names to all args, and store their types for later. - SmallVector<Register, 4> ArgTypeVRegs; + FunctionType *FTy = getOriginalFunctionType(F); + SmallVector<SPIRVType *, 4> ArgTypeVRegs; if (VRegs.size() > 0) { unsigned i = 0; for (const auto &Arg : F.args()) { @@ -79,9 +131,18 @@ bool SPIRVCallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder, // TODO: handle the case of multiple registers. if (VRegs[i].size() > 1) return false; - auto *SpirvTy = - GR->assignTypeToVReg(Arg.getType(), VRegs[i][0], MIRBuilder); - ArgTypeVRegs.push_back(GR->getSPIRVTypeID(SpirvTy)); + Type *ArgTy = FTy->getParamType(i); + SPIRV::AccessQualifier AQ = SPIRV::AccessQualifier::ReadWrite; + MDNode *Node = F.getMetadata("kernel_arg_access_qual"); + if (Node && i < Node->getNumOperands()) { + StringRef AQString = cast<MDString>(Node->getOperand(i))->getString(); + if (AQString.compare("read_only") == 0) + AQ = SPIRV::AccessQualifier::ReadOnly; + else if (AQString.compare("write_only") == 0) + AQ = SPIRV::AccessQualifier::WriteOnly; + } + auto *SpirvTy = GR->assignTypeToVReg(ArgTy, VRegs[i][0], MIRBuilder, AQ); + ArgTypeVRegs.push_back(SpirvTy); if (Arg.hasName()) buildOpName(VRegs[i][0], Arg.getName(), MIRBuilder); @@ -92,8 +153,10 @@ bool SPIRVCallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder, SPIRV::Decoration::MaxByteOffset, {DerefBytes}); } if (Arg.hasAttribute(Attribute::Alignment)) { + auto Alignment = static_cast<unsigned>( + Arg.getAttribute(Attribute::Alignment).getValueAsInt()); buildOpDecorate(VRegs[i][0], MIRBuilder, SPIRV::Decoration::Alignment, - {static_cast<unsigned>(Arg.getParamAlignment())}); + {Alignment}); } if (Arg.hasAttribute(Attribute::ReadOnly)) { auto Attr = @@ -107,6 +170,38 @@ bool SPIRVCallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder, buildOpDecorate(VRegs[i][0], MIRBuilder, SPIRV::Decoration::FuncParamAttr, {Attr}); } + if (Arg.hasAttribute(Attribute::NoAlias)) { + auto Attr = + static_cast<unsigned>(SPIRV::FunctionParameterAttribute::NoAlias); + buildOpDecorate(VRegs[i][0], MIRBuilder, + SPIRV::Decoration::FuncParamAttr, {Attr}); + } + Node = F.getMetadata("kernel_arg_type_qual"); + if (Node && i < Node->getNumOperands()) { + StringRef TypeQual = cast<MDString>(Node->getOperand(i))->getString(); + if (TypeQual.compare("volatile") == 0) + buildOpDecorate(VRegs[i][0], MIRBuilder, SPIRV::Decoration::Volatile, + {}); + } + Node = F.getMetadata("spirv.ParameterDecorations"); + if (Node && i < Node->getNumOperands() && + isa<MDNode>(Node->getOperand(i))) { + MDNode *MD = cast<MDNode>(Node->getOperand(i)); + for (const MDOperand &MDOp : MD->operands()) { + MDNode *MD2 = dyn_cast<MDNode>(MDOp); + assert(MD2 && "Metadata operand is expected"); + ConstantInt *Const = getConstInt(MD2, 0); + assert(Const && "MDOperand should be ConstantInt"); + auto Dec = static_cast<SPIRV::Decoration>(Const->getZExtValue()); + std::vector<uint32_t> DecVec; + for (unsigned j = 1; j < MD2->getNumOperands(); j++) { + ConstantInt *Const = getConstInt(MD2, j); + assert(Const && "MDOperand should be ConstantInt"); + DecVec.push_back(static_cast<uint32_t>(Const->getZExtValue())); + } + buildOpDecorate(VRegs[i][0], MIRBuilder, Dec, DecVec); + } + } ++i; } } @@ -117,30 +212,30 @@ bool SPIRVCallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder, MRI->setRegClass(FuncVReg, &SPIRV::IDRegClass); if (F.isDeclaration()) GR->add(&F, &MIRBuilder.getMF(), FuncVReg); - - auto *FTy = F.getFunctionType(); - auto FuncTy = GR->assignTypeToVReg(FTy, FuncVReg, MIRBuilder); + SPIRVType *RetTy = GR->getOrCreateSPIRVType(FTy->getReturnType(), MIRBuilder); + SPIRVType *FuncTy = GR->getOrCreateOpTypeFunctionWithArgs( + FTy, RetTy, ArgTypeVRegs, MIRBuilder); // Build the OpTypeFunction declaring it. - Register ReturnTypeID = FuncTy->getOperand(1).getReg(); uint32_t FuncControl = getFunctionControl(F); MIRBuilder.buildInstr(SPIRV::OpFunction) .addDef(FuncVReg) - .addUse(ReturnTypeID) + .addUse(GR->getSPIRVTypeID(RetTy)) .addImm(FuncControl) .addUse(GR->getSPIRVTypeID(FuncTy)); // Add OpFunctionParameters. - const unsigned NumArgs = ArgTypeVRegs.size(); - for (unsigned i = 0; i < NumArgs; ++i) { + int i = 0; + for (const auto &Arg : F.args()) { assert(VRegs[i].size() == 1 && "Formal arg has multiple vregs"); MRI->setRegClass(VRegs[i][0], &SPIRV::IDRegClass); MIRBuilder.buildInstr(SPIRV::OpFunctionParameter) .addDef(VRegs[i][0]) - .addUse(ArgTypeVRegs[i]); + .addUse(GR->getSPIRVTypeID(ArgTypeVRegs[i])); if (F.isDeclaration()) - GR->add(F.getArg(i), &MIRBuilder.getMF(), VRegs[i][0]); + GR->add(&Arg, &MIRBuilder.getMF(), VRegs[i][0]); + i++; } // Name the function. if (F.hasName()) @@ -169,48 +264,51 @@ bool SPIRVCallLowering::lowerCall(MachineIRBuilder &MIRBuilder, // TODO: handle the case of multiple registers. if (Info.OrigRet.Regs.size() > 1) return false; + MachineFunction &MF = MIRBuilder.getMF(); + GR->setCurrentFunc(MF); + FunctionType *FTy = nullptr; + const Function *CF = nullptr; - GR->setCurrentFunc(MIRBuilder.getMF()); - Register ResVReg = - Info.OrigRet.Regs.empty() ? Register(0) : Info.OrigRet.Regs[0]; // Emit a regular OpFunctionCall. If it's an externally declared function, - // be sure to emit its type and function declaration here. It will be - // hoisted globally later. + // be sure to emit its type and function declaration here. It will be hoisted + // globally later. if (Info.Callee.isGlobal()) { - auto *CF = dyn_cast_or_null<const Function>(Info.Callee.getGlobal()); + CF = dyn_cast_or_null<const Function>(Info.Callee.getGlobal()); // TODO: support constexpr casts and indirect calls. if (CF == nullptr) return false; - if (CF->isDeclaration()) { - // Emit the type info and forward function declaration to the first MBB - // to ensure VReg definition dependencies are valid across all MBBs. - MachineBasicBlock::iterator OldII = MIRBuilder.getInsertPt(); - MachineBasicBlock &OldBB = MIRBuilder.getMBB(); - MachineBasicBlock &FirstBB = *MIRBuilder.getMF().getBlockNumbered(0); - MIRBuilder.setInsertPt(FirstBB, FirstBB.instr_end()); - - SmallVector<ArrayRef<Register>, 8> VRegArgs; - SmallVector<SmallVector<Register, 1>, 8> ToInsert; - for (const Argument &Arg : CF->args()) { - if (MIRBuilder.getDataLayout().getTypeStoreSize(Arg.getType()).isZero()) - continue; // Don't handle zero sized types. - ToInsert.push_back({MIRBuilder.getMRI()->createGenericVirtualRegister( - LLT::scalar(32))}); - VRegArgs.push_back(ToInsert.back()); - } - // TODO: Reuse FunctionLoweringInfo. - FunctionLoweringInfo FuncInfo; - lowerFormalArguments(MIRBuilder, *CF, VRegArgs, FuncInfo); - MIRBuilder.setInsertPt(OldBB, OldII); + FTy = getOriginalFunctionType(*CF); + } + + Register ResVReg = + Info.OrigRet.Regs.empty() ? Register(0) : Info.OrigRet.Regs[0]; + if (CF && CF->isDeclaration() && + !GR->find(CF, &MIRBuilder.getMF()).isValid()) { + // Emit the type info and forward function declaration to the first MBB + // to ensure VReg definition dependencies are valid across all MBBs. + MachineIRBuilder FirstBlockBuilder; + FirstBlockBuilder.setMF(MF); + FirstBlockBuilder.setMBB(*MF.getBlockNumbered(0)); + + SmallVector<ArrayRef<Register>, 8> VRegArgs; + SmallVector<SmallVector<Register, 1>, 8> ToInsert; + for (const Argument &Arg : CF->args()) { + if (MIRBuilder.getDataLayout().getTypeStoreSize(Arg.getType()).isZero()) + continue; // Don't handle zero sized types. + ToInsert.push_back( + {MIRBuilder.getMRI()->createGenericVirtualRegister(LLT::scalar(32))}); + VRegArgs.push_back(ToInsert.back()); } + // TODO: Reuse FunctionLoweringInfo + FunctionLoweringInfo FuncInfo; + lowerFormalArguments(FirstBlockBuilder, *CF, VRegArgs, FuncInfo); } // Make sure there's a valid return reg, even for functions returning void. - if (!ResVReg.isValid()) { + if (!ResVReg.isValid()) ResVReg = MIRBuilder.getMRI()->createVirtualRegister(&SPIRV::IDRegClass); - } SPIRVType *RetType = - GR->assignTypeToVReg(Info.OrigRet.Ty, ResVReg, MIRBuilder); + GR->assignTypeToVReg(FTy->getReturnType(), ResVReg, MIRBuilder); // Emit the OpFunctionCall and its args. auto MIB = MIRBuilder.buildInstr(SPIRV::OpFunctionCall) @@ -224,6 +322,7 @@ bool SPIRVCallLowering::lowerCall(MachineIRBuilder &MIRBuilder, return false; MIB.addUse(Arg.Regs[0]); } - return MIB.constrainAllUses(MIRBuilder.getTII(), *ST.getRegisterInfo(), - *ST.getRegBankInfo()); + const auto &STI = MF.getSubtarget(); + return MIB.constrainAllUses(MIRBuilder.getTII(), *STI.getRegisterInfo(), + *STI.getRegBankInfo()); } diff --git a/llvm/lib/Target/SPIRV/SPIRVCallLowering.h b/llvm/lib/Target/SPIRV/SPIRVCallLowering.h index c179bb35154b..c2d6ad82d507 100644 --- a/llvm/lib/Target/SPIRV/SPIRVCallLowering.h +++ b/llvm/lib/Target/SPIRV/SPIRVCallLowering.h @@ -13,23 +13,21 @@ #ifndef LLVM_LIB_TARGET_SPIRV_SPIRVCALLLOWERING_H #define LLVM_LIB_TARGET_SPIRV_SPIRVCALLLOWERING_H +#include "SPIRVGlobalRegistry.h" #include "llvm/CodeGen/GlobalISel/CallLowering.h" namespace llvm { class SPIRVGlobalRegistry; -class SPIRVSubtarget; class SPIRVTargetLowering; class SPIRVCallLowering : public CallLowering { private: - const SPIRVSubtarget &ST; // Used to create and assign function, argument, and return type information. SPIRVGlobalRegistry *GR; public: - SPIRVCallLowering(const SPIRVTargetLowering &TLI, const SPIRVSubtarget &ST, - SPIRVGlobalRegistry *GR); + SPIRVCallLowering(const SPIRVTargetLowering &TLI, SPIRVGlobalRegistry *GR); // Built OpReturn or OpReturnValue. bool lowerReturn(MachineIRBuilder &MIRBuiler, const Value *Val, diff --git a/llvm/lib/Target/SPIRV/SPIRVDuplicatesTracker.cpp b/llvm/lib/Target/SPIRV/SPIRVDuplicatesTracker.cpp index 57cd4bafd351..1926977ea66e 100644 --- a/llvm/lib/Target/SPIRV/SPIRVDuplicatesTracker.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVDuplicatesTracker.cpp @@ -92,4 +92,4 @@ void SPIRVGeneralDuplicatesTracker::buildDepsGraph( } } } -}
\ No newline at end of file +} diff --git a/llvm/lib/Target/SPIRV/SPIRVDuplicatesTracker.h b/llvm/lib/Target/SPIRV/SPIRVDuplicatesTracker.h index 58ae1f86ce42..ab22c3d2a647 100644 --- a/llvm/lib/Target/SPIRV/SPIRVDuplicatesTracker.h +++ b/llvm/lib/Target/SPIRV/SPIRVDuplicatesTracker.h @@ -169,6 +169,8 @@ public: Register find(const Argument *Arg, const MachineFunction *MF) { return AT.find(const_cast<Argument *>(Arg), MF); } + + const SPIRVDuplicatesTracker<Type> *getTypes() { return &TT; } }; } // namespace llvm -#endif
\ No newline at end of file +#endif // LLVM_LIB_TARGET_SPIRV_SPIRVDUPLICATESTRACKER_H diff --git a/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp b/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp index 9624482e3622..0075f547b6d6 100644 --- a/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp @@ -87,6 +87,7 @@ public: Instruction *visitLoadInst(LoadInst &I); Instruction *visitStoreInst(StoreInst &I); Instruction *visitAllocaInst(AllocaInst &I); + Instruction *visitAtomicCmpXchgInst(AtomicCmpXchgInst &I); bool runOnFunction(Function &F) override; }; } // namespace @@ -103,7 +104,7 @@ static inline bool isAssignTypeInstr(const Instruction *I) { static bool isMemInstrToReplace(Instruction *I) { return isa<StoreInst>(I) || isa<LoadInst>(I) || isa<InsertValueInst>(I) || - isa<ExtractValueInst>(I); + isa<ExtractValueInst>(I) || isa<AtomicCmpXchgInst>(I); } static bool isAggrToReplace(const Value *V) { @@ -134,13 +135,14 @@ void SPIRVEmitIntrinsics::replaceMemInstrUses(Instruction *Old, Instruction *New) { while (!Old->user_empty()) { auto *U = Old->user_back(); - if (isMemInstrToReplace(U) || isa<ReturnInst>(U)) { - U->replaceUsesOfWith(Old, New); - } else if (isAssignTypeInstr(U)) { + if (isAssignTypeInstr(U)) { IRB->SetInsertPoint(U); SmallVector<Value *, 2> Args = {New, U->getOperand(1)}; IRB->CreateIntrinsic(Intrinsic::spv_assign_type, {New->getType()}, Args); U->eraseFromParent(); + } else if (isMemInstrToReplace(U) || isa<ReturnInst>(U) || + isa<CallInst>(U)) { + U->replaceUsesOfWith(Old, New); } else { llvm_unreachable("illegal aggregate intrinsic user"); } @@ -301,10 +303,10 @@ Instruction *SPIRVEmitIntrinsics::visitStoreInst(StoreInst &I) { MachineMemOperand::Flags Flags = TLI->getStoreMemOperandFlags(I, F->getParent()->getDataLayout()); auto *PtrOp = I.getPointerOperand(); - auto *NewI = - IRB->CreateIntrinsic(Intrinsic::spv_store, {PtrOp->getType()}, - {I.getValueOperand(), PtrOp, IRB->getInt16(Flags), - IRB->getInt8(I.getAlign().value())}); + auto *NewI = IRB->CreateIntrinsic( + Intrinsic::spv_store, {I.getValueOperand()->getType(), PtrOp->getType()}, + {I.getValueOperand(), PtrOp, IRB->getInt16(Flags), + IRB->getInt8(I.getAlign().value())}); I.eraseFromParent(); return NewI; } @@ -314,6 +316,22 @@ Instruction *SPIRVEmitIntrinsics::visitAllocaInst(AllocaInst &I) { return &I; } +Instruction *SPIRVEmitIntrinsics::visitAtomicCmpXchgInst(AtomicCmpXchgInst &I) { + assert(I.getType()->isAggregateType() && "Aggregate result is expected"); + SmallVector<Value *> Args; + for (auto &Op : I.operands()) + Args.push_back(Op); + Args.push_back(IRB->getInt32(I.getSyncScopeID())); + Args.push_back(IRB->getInt32( + static_cast<uint32_t>(getMemSemantics(I.getSuccessOrdering())))); + Args.push_back(IRB->getInt32( + static_cast<uint32_t>(getMemSemantics(I.getFailureOrdering())))); + auto *NewI = IRB->CreateIntrinsic(Intrinsic::spv_cmpxchg, + {I.getPointerOperand()->getType()}, {Args}); + replaceMemInstrUses(&I, NewI); + return NewI; +} + void SPIRVEmitIntrinsics::processGlobalValue(GlobalVariable &GV) { // Skip special artifical variable llvm.global.annotations. if (GV.getName() == "llvm.global.annotations") @@ -351,14 +369,13 @@ void SPIRVEmitIntrinsics::insertAssignTypeIntrs(Instruction *I) { // Check GetElementPtrConstantExpr case. (isa<ConstantExpr>(Op) && isa<GEPOperator>(Op))) { IRB->SetInsertPoint(I); - buildIntrWithMD(Intrinsic::spv_assign_type, {Op->getType()}, Op, Op); + if (isa<UndefValue>(Op) && Op->getType()->isAggregateType()) + buildIntrWithMD(Intrinsic::spv_assign_type, {IRB->getInt32Ty()}, Op, + UndefValue::get(IRB->getInt32Ty())); + else + buildIntrWithMD(Intrinsic::spv_assign_type, {Op->getType()}, Op, Op); } } - // StoreInst's operand type can be changed in the next stage so we need to - // store it in the set. - if (isa<StoreInst>(I) && - cast<StoreInst>(I)->getValueOperand()->getType()->isAggregateType()) - AggrStores.insert(I); } void SPIRVEmitIntrinsics::processInstrAfterVisit(Instruction *I) { @@ -378,7 +395,7 @@ void SPIRVEmitIntrinsics::processInstrAfterVisit(Instruction *I) { if ((isa<ConstantAggregateZero>(Op) && Op->getType()->isVectorTy()) || isa<PHINode>(I) || isa<SwitchInst>(I)) TrackConstants = false; - if (isa<ConstantData>(Op) && TrackConstants) { + if ((isa<ConstantData>(Op) || isa<ConstantExpr>(Op)) && TrackConstants) { unsigned OpNo = Op.getOperandNo(); if (II && ((II->getIntrinsicID() == Intrinsic::spv_gep && OpNo == 0) || (II->paramHasAttr(OpNo, Attribute::ImmArg)))) @@ -405,8 +422,20 @@ bool SPIRVEmitIntrinsics::runOnFunction(Function &Func) { AggrConsts.clear(); AggrStores.clear(); - IRB->SetInsertPoint(&Func.getEntryBlock().front()); + // StoreInst's operand type can be changed during the next transformations, + // so we need to store it in the set. Also store already transformed types. + for (auto &I : instructions(Func)) { + StoreInst *SI = dyn_cast<StoreInst>(&I); + if (!SI) + continue; + Type *ElTy = SI->getValueOperand()->getType(); + PointerType *PTy = cast<PointerType>(SI->getOperand(1)->getType()); + if (ElTy->isAggregateType() || ElTy->isVectorTy() || + !PTy->isOpaqueOrPointeeTypeMatches(ElTy)) + AggrStores.insert(&I); + } + IRB->SetInsertPoint(&Func.getEntryBlock().front()); for (auto &GV : Func.getParent()->globals()) processGlobalValue(GV); diff --git a/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.cpp b/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.cpp index 5f890c003cbc..5c8fa7adfbdf 100644 --- a/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.cpp @@ -24,6 +24,24 @@ using namespace llvm; SPIRVGlobalRegistry::SPIRVGlobalRegistry(unsigned PointerSize) : PointerSize(PointerSize) {} +SPIRVType *SPIRVGlobalRegistry::assignIntTypeToVReg(unsigned BitWidth, + Register VReg, + MachineInstr &I, + const SPIRVInstrInfo &TII) { + SPIRVType *SpirvType = getOrCreateSPIRVIntegerType(BitWidth, I, TII); + assignSPIRVTypeToVReg(SpirvType, VReg, *CurMF); + return SpirvType; +} + +SPIRVType *SPIRVGlobalRegistry::assignVectTypeToVReg( + SPIRVType *BaseType, unsigned NumElements, Register VReg, MachineInstr &I, + const SPIRVInstrInfo &TII) { + SPIRVType *SpirvType = + getOrCreateSPIRVVectorType(BaseType, NumElements, I, TII); + assignSPIRVTypeToVReg(SpirvType, VReg, *CurMF); + return SpirvType; +} + SPIRVType *SPIRVGlobalRegistry::assignTypeToVReg( const Type *Type, Register VReg, MachineIRBuilder &MIRBuilder, SPIRV::AccessQualifier AccessQual, bool EmitIR) { @@ -96,6 +114,65 @@ SPIRVType *SPIRVGlobalRegistry::getOpTypeVector(uint32_t NumElems, return MIB; } +std::tuple<Register, ConstantInt *, bool> +SPIRVGlobalRegistry::getOrCreateConstIntReg(uint64_t Val, SPIRVType *SpvType, + MachineIRBuilder *MIRBuilder, + MachineInstr *I, + const SPIRVInstrInfo *TII) { + const IntegerType *LLVMIntTy; + if (SpvType) + LLVMIntTy = cast<IntegerType>(getTypeForSPIRVType(SpvType)); + else + LLVMIntTy = IntegerType::getInt32Ty(CurMF->getFunction().getContext()); + bool NewInstr = false; + // Find a constant in DT or build a new one. + ConstantInt *CI = ConstantInt::get(const_cast<IntegerType *>(LLVMIntTy), Val); + Register Res = DT.find(CI, CurMF); + if (!Res.isValid()) { + unsigned BitWidth = SpvType ? getScalarOrVectorBitWidth(SpvType) : 32; + LLT LLTy = LLT::scalar(32); + Res = CurMF->getRegInfo().createGenericVirtualRegister(LLTy); + if (MIRBuilder) + assignTypeToVReg(LLVMIntTy, Res, *MIRBuilder); + else + assignIntTypeToVReg(BitWidth, Res, *I, *TII); + DT.add(CI, CurMF, Res); + NewInstr = true; + } + return std::make_tuple(Res, CI, NewInstr); +} + +Register SPIRVGlobalRegistry::getOrCreateConstInt(uint64_t Val, MachineInstr &I, + SPIRVType *SpvType, + const SPIRVInstrInfo &TII) { + assert(SpvType); + ConstantInt *CI; + Register Res; + bool New; + std::tie(Res, CI, New) = + getOrCreateConstIntReg(Val, SpvType, nullptr, &I, &TII); + // If we have found Res register which is defined by the passed G_CONSTANT + // machine instruction, a new constant instruction should be created. + if (!New && (!I.getOperand(0).isReg() || Res != I.getOperand(0).getReg())) + return Res; + MachineInstrBuilder MIB; + MachineBasicBlock &BB = *I.getParent(); + if (Val) { + MIB = BuildMI(BB, I, I.getDebugLoc(), TII.get(SPIRV::OpConstantI)) + .addDef(Res) + .addUse(getSPIRVTypeID(SpvType)); + addNumImm(APInt(getScalarOrVectorBitWidth(SpvType), Val), MIB); + } else { + MIB = BuildMI(BB, I, I.getDebugLoc(), TII.get(SPIRV::OpConstantNull)) + .addDef(Res) + .addUse(getSPIRVTypeID(SpvType)); + } + const auto &ST = CurMF->getSubtarget(); + constrainSelectedInstRegOperands(*MIB, *ST.getInstrInfo(), + *ST.getRegisterInfo(), *ST.getRegBankInfo()); + return Res; +} + Register SPIRVGlobalRegistry::buildConstantInt(uint64_t Val, MachineIRBuilder &MIRBuilder, SPIRVType *SpvType, @@ -112,14 +189,32 @@ Register SPIRVGlobalRegistry::buildConstantInt(uint64_t Val, Register Res = DT.find(ConstInt, &MF); if (!Res.isValid()) { unsigned BitWidth = SpvType ? getScalarOrVectorBitWidth(SpvType) : 32; - Res = MF.getRegInfo().createGenericVirtualRegister(LLT::scalar(BitWidth)); - assignTypeToVReg(LLVMIntTy, Res, MIRBuilder); - if (EmitIR) + LLT LLTy = LLT::scalar(EmitIR ? BitWidth : 32); + Res = MF.getRegInfo().createGenericVirtualRegister(LLTy); + assignTypeToVReg(LLVMIntTy, Res, MIRBuilder, + SPIRV::AccessQualifier::ReadWrite, EmitIR); + DT.add(ConstInt, &MIRBuilder.getMF(), Res); + if (EmitIR) { MIRBuilder.buildConstant(Res, *ConstInt); - else - MIRBuilder.buildInstr(SPIRV::OpConstantI) - .addDef(Res) - .addImm(ConstInt->getSExtValue()); + } else { + MachineInstrBuilder MIB; + if (Val) { + assert(SpvType); + MIB = MIRBuilder.buildInstr(SPIRV::OpConstantI) + .addDef(Res) + .addUse(getSPIRVTypeID(SpvType)); + addNumImm(APInt(BitWidth, Val), MIB); + } else { + assert(SpvType); + MIB = MIRBuilder.buildInstr(SPIRV::OpConstantNull) + .addDef(Res) + .addUse(getSPIRVTypeID(SpvType)); + } + const auto &Subtarget = CurMF->getSubtarget(); + constrainSelectedInstRegOperands(*MIB, *Subtarget.getInstrInfo(), + *Subtarget.getRegisterInfo(), + *Subtarget.getRegBankInfo()); + } } return Res; } @@ -142,11 +237,63 @@ Register SPIRVGlobalRegistry::buildConstantFP(APFloat Val, unsigned BitWidth = SpvType ? getScalarOrVectorBitWidth(SpvType) : 32; Res = MF.getRegInfo().createGenericVirtualRegister(LLT::scalar(BitWidth)); assignTypeToVReg(LLVMFPTy, Res, MIRBuilder); + DT.add(ConstFP, &MF, Res); MIRBuilder.buildFConstant(Res, *ConstFP); } return Res; } +Register +SPIRVGlobalRegistry::getOrCreateConsIntVector(uint64_t Val, MachineInstr &I, + SPIRVType *SpvType, + const SPIRVInstrInfo &TII) { + const Type *LLVMTy = getTypeForSPIRVType(SpvType); + assert(LLVMTy->isVectorTy()); + const FixedVectorType *LLVMVecTy = cast<FixedVectorType>(LLVMTy); + Type *LLVMBaseTy = LLVMVecTy->getElementType(); + // Find a constant vector in DT or build a new one. + const auto ConstInt = ConstantInt::get(LLVMBaseTy, Val); + auto ConstVec = + ConstantVector::getSplat(LLVMVecTy->getElementCount(), ConstInt); + Register Res = DT.find(ConstVec, CurMF); + if (!Res.isValid()) { + unsigned BitWidth = getScalarOrVectorBitWidth(SpvType); + SPIRVType *SpvBaseType = getOrCreateSPIRVIntegerType(BitWidth, I, TII); + // SpvScalConst should be created before SpvVecConst to avoid undefined ID + // error on validation. + // TODO: can moved below once sorting of types/consts/defs is implemented. + Register SpvScalConst; + if (Val) + SpvScalConst = getOrCreateConstInt(Val, I, SpvBaseType, TII); + // TODO: maybe use bitwidth of base type. + LLT LLTy = LLT::scalar(32); + Register SpvVecConst = + CurMF->getRegInfo().createGenericVirtualRegister(LLTy); + const unsigned ElemCnt = SpvType->getOperand(2).getImm(); + assignVectTypeToVReg(SpvBaseType, ElemCnt, SpvVecConst, I, TII); + DT.add(ConstVec, CurMF, SpvVecConst); + MachineInstrBuilder MIB; + MachineBasicBlock &BB = *I.getParent(); + if (Val) { + MIB = BuildMI(BB, I, I.getDebugLoc(), TII.get(SPIRV::OpConstantComposite)) + .addDef(SpvVecConst) + .addUse(getSPIRVTypeID(SpvType)); + for (unsigned i = 0; i < ElemCnt; ++i) + MIB.addUse(SpvScalConst); + } else { + MIB = BuildMI(BB, I, I.getDebugLoc(), TII.get(SPIRV::OpConstantNull)) + .addDef(SpvVecConst) + .addUse(getSPIRVTypeID(SpvType)); + } + const auto &Subtarget = CurMF->getSubtarget(); + constrainSelectedInstRegOperands(*MIB, *Subtarget.getInstrInfo(), + *Subtarget.getRegisterInfo(), + *Subtarget.getRegBankInfo()); + return SpvVecConst; + } + return Res; +} + Register SPIRVGlobalRegistry::buildGlobalVariable( Register ResVReg, SPIRVType *BaseType, StringRef Name, const GlobalValue *GV, SPIRV::StorageClass Storage, @@ -169,7 +316,13 @@ Register SPIRVGlobalRegistry::buildGlobalVariable( } GV = GVar; } - Register Reg; + Register Reg = DT.find(GVar, &MIRBuilder.getMF()); + if (Reg.isValid()) { + if (Reg != ResVReg) + MIRBuilder.buildCopy(ResVReg, Reg); + return ResVReg; + } + auto MIB = MIRBuilder.buildInstr(SPIRV::OpVariable) .addDef(ResVReg) .addUse(getSPIRVTypeID(BaseType)) @@ -234,14 +387,76 @@ SPIRVType *SPIRVGlobalRegistry::getOpTypeArray(uint32_t NumElems, return MIB; } +SPIRVType *SPIRVGlobalRegistry::getOpTypeOpaque(const StructType *Ty, + MachineIRBuilder &MIRBuilder) { + assert(Ty->hasName()); + const StringRef Name = Ty->hasName() ? Ty->getName() : ""; + Register ResVReg = createTypeVReg(MIRBuilder); + auto MIB = MIRBuilder.buildInstr(SPIRV::OpTypeOpaque).addDef(ResVReg); + addStringImm(Name, MIB); + buildOpName(ResVReg, Name, MIRBuilder); + return MIB; +} + +SPIRVType *SPIRVGlobalRegistry::getOpTypeStruct(const StructType *Ty, + MachineIRBuilder &MIRBuilder, + bool EmitIR) { + SmallVector<Register, 4> FieldTypes; + for (const auto &Elem : Ty->elements()) { + SPIRVType *ElemTy = findSPIRVType(Elem, MIRBuilder); + assert(ElemTy && ElemTy->getOpcode() != SPIRV::OpTypeVoid && + "Invalid struct element type"); + FieldTypes.push_back(getSPIRVTypeID(ElemTy)); + } + Register ResVReg = createTypeVReg(MIRBuilder); + auto MIB = MIRBuilder.buildInstr(SPIRV::OpTypeStruct).addDef(ResVReg); + for (const auto &Ty : FieldTypes) + MIB.addUse(Ty); + if (Ty->hasName()) + buildOpName(ResVReg, Ty->getName(), MIRBuilder); + if (Ty->isPacked()) + buildOpDecorate(ResVReg, MIRBuilder, SPIRV::Decoration::CPacked, {}); + return MIB; +} + +static bool isOpenCLBuiltinType(const StructType *SType) { + return SType->isOpaque() && SType->hasName() && + SType->getName().startswith("opencl."); +} + +static bool isSPIRVBuiltinType(const StructType *SType) { + return SType->isOpaque() && SType->hasName() && + SType->getName().startswith("spirv."); +} + +static bool isSpecialType(const Type *Ty) { + if (auto PType = dyn_cast<PointerType>(Ty)) { + if (!PType->isOpaque()) + Ty = PType->getNonOpaquePointerElementType(); + } + if (auto SType = dyn_cast<StructType>(Ty)) + return isOpenCLBuiltinType(SType) || isSPIRVBuiltinType(SType); + return false; +} + SPIRVType *SPIRVGlobalRegistry::getOpTypePointer(SPIRV::StorageClass SC, SPIRVType *ElemType, - MachineIRBuilder &MIRBuilder) { - auto MIB = MIRBuilder.buildInstr(SPIRV::OpTypePointer) - .addDef(createTypeVReg(MIRBuilder)) - .addImm(static_cast<uint32_t>(SC)) - .addUse(getSPIRVTypeID(ElemType)); - return MIB; + MachineIRBuilder &MIRBuilder, + Register Reg) { + if (!Reg.isValid()) + Reg = createTypeVReg(MIRBuilder); + return MIRBuilder.buildInstr(SPIRV::OpTypePointer) + .addDef(Reg) + .addImm(static_cast<uint32_t>(SC)) + .addUse(getSPIRVTypeID(ElemType)); +} + +SPIRVType * +SPIRVGlobalRegistry::getOpTypeForwardPointer(SPIRV::StorageClass SC, + MachineIRBuilder &MIRBuilder) { + return MIRBuilder.buildInstr(SPIRV::OpTypeForwardPointer) + .addUse(createTypeVReg(MIRBuilder)) + .addImm(static_cast<uint32_t>(SC)); } SPIRVType *SPIRVGlobalRegistry::getOpTypeFunction( @@ -255,10 +470,49 @@ SPIRVType *SPIRVGlobalRegistry::getOpTypeFunction( return MIB; } +SPIRVType *SPIRVGlobalRegistry::getOrCreateOpTypeFunctionWithArgs( + const Type *Ty, SPIRVType *RetType, + const SmallVectorImpl<SPIRVType *> &ArgTypes, + MachineIRBuilder &MIRBuilder) { + Register Reg = DT.find(Ty, &MIRBuilder.getMF()); + if (Reg.isValid()) + return getSPIRVTypeForVReg(Reg); + SPIRVType *SpirvType = getOpTypeFunction(RetType, ArgTypes, MIRBuilder); + return finishCreatingSPIRVType(Ty, SpirvType); +} + +SPIRVType *SPIRVGlobalRegistry::findSPIRVType(const Type *Ty, + MachineIRBuilder &MIRBuilder, + SPIRV::AccessQualifier AccQual, + bool EmitIR) { + Register Reg = DT.find(Ty, &MIRBuilder.getMF()); + if (Reg.isValid()) + return getSPIRVTypeForVReg(Reg); + if (ForwardPointerTypes.find(Ty) != ForwardPointerTypes.end()) + return ForwardPointerTypes[Ty]; + return restOfCreateSPIRVType(Ty, MIRBuilder, AccQual, EmitIR); +} + +Register SPIRVGlobalRegistry::getSPIRVTypeID(const SPIRVType *SpirvType) const { + assert(SpirvType && "Attempting to get type id for nullptr type."); + if (SpirvType->getOpcode() == SPIRV::OpTypeForwardPointer) + return SpirvType->uses().begin()->getReg(); + return SpirvType->defs().begin()->getReg(); +} + SPIRVType *SPIRVGlobalRegistry::createSPIRVType(const Type *Ty, MachineIRBuilder &MIRBuilder, SPIRV::AccessQualifier AccQual, bool EmitIR) { + assert(!isSpecialType(Ty)); + auto &TypeToSPIRVTypeMap = DT.getTypes()->getAllUses(); + auto t = TypeToSPIRVTypeMap.find(Ty); + if (t != TypeToSPIRVTypeMap.end()) { + auto tt = t->second.find(&MIRBuilder.getMF()); + if (tt != t->second.end()) + return getSPIRVTypeForVReg(tt->second); + } + if (auto IType = dyn_cast<IntegerType>(Ty)) { const unsigned Width = IType->getBitWidth(); return Width == 1 ? getOpTypeBool(MIRBuilder) @@ -269,21 +523,25 @@ SPIRVType *SPIRVGlobalRegistry::createSPIRVType(const Type *Ty, if (Ty->isVoidTy()) return getOpTypeVoid(MIRBuilder); if (Ty->isVectorTy()) { - auto El = getOrCreateSPIRVType(cast<FixedVectorType>(Ty)->getElementType(), - MIRBuilder); + SPIRVType *El = + findSPIRVType(cast<FixedVectorType>(Ty)->getElementType(), MIRBuilder); return getOpTypeVector(cast<FixedVectorType>(Ty)->getNumElements(), El, MIRBuilder); } if (Ty->isArrayTy()) { - auto *El = getOrCreateSPIRVType(Ty->getArrayElementType(), MIRBuilder); + SPIRVType *El = findSPIRVType(Ty->getArrayElementType(), MIRBuilder); return getOpTypeArray(Ty->getArrayNumElements(), El, MIRBuilder, EmitIR); } - assert(!isa<StructType>(Ty) && "Unsupported StructType"); + if (auto SType = dyn_cast<StructType>(Ty)) { + if (SType->isOpaque()) + return getOpTypeOpaque(SType, MIRBuilder); + return getOpTypeStruct(SType, MIRBuilder, EmitIR); + } if (auto FType = dyn_cast<FunctionType>(Ty)) { - SPIRVType *RetTy = getOrCreateSPIRVType(FType->getReturnType(), MIRBuilder); + SPIRVType *RetTy = findSPIRVType(FType->getReturnType(), MIRBuilder); SmallVector<SPIRVType *, 4> ParamTypes; for (const auto &t : FType->params()) { - ParamTypes.push_back(getOrCreateSPIRVType(t, MIRBuilder)); + ParamTypes.push_back(findSPIRVType(t, MIRBuilder)); } return getOpTypeFunction(RetTy, ParamTypes, MIRBuilder); } @@ -292,24 +550,51 @@ SPIRVType *SPIRVGlobalRegistry::createSPIRVType(const Type *Ty, // At the moment, all opaque pointers correspond to i8 element type. // TODO: change the implementation once opaque pointers are supported // in the SPIR-V specification. - if (PType->isOpaque()) { + if (PType->isOpaque()) SpvElementType = getOrCreateSPIRVIntegerType(8, MIRBuilder); - } else { - Type *ElemType = PType->getNonOpaquePointerElementType(); - // TODO: support OpenCL and SPIRV builtins like image2d_t that are passed - // as pointers, but should be treated as custom types like OpTypeImage. - assert(!isa<StructType>(ElemType) && "Unsupported StructType pointer"); - - // Otherwise, treat it as a regular pointer type. - SpvElementType = getOrCreateSPIRVType( - ElemType, MIRBuilder, SPIRV::AccessQualifier::ReadWrite, EmitIR); - } + else + SpvElementType = + findSPIRVType(PType->getNonOpaquePointerElementType(), MIRBuilder, + SPIRV::AccessQualifier::ReadWrite, EmitIR); auto SC = addressSpaceToStorageClass(PType->getAddressSpace()); - return getOpTypePointer(SC, SpvElementType, MIRBuilder); + // Null pointer means we have a loop in type definitions, make and + // return corresponding OpTypeForwardPointer. + if (SpvElementType == nullptr) { + if (ForwardPointerTypes.find(Ty) == ForwardPointerTypes.end()) + ForwardPointerTypes[PType] = getOpTypeForwardPointer(SC, MIRBuilder); + return ForwardPointerTypes[PType]; + } + Register Reg(0); + // If we have forward pointer associated with this type, use its register + // operand to create OpTypePointer. + if (ForwardPointerTypes.find(PType) != ForwardPointerTypes.end()) + Reg = getSPIRVTypeID(ForwardPointerTypes[PType]); + + return getOpTypePointer(SC, SpvElementType, MIRBuilder, Reg); } llvm_unreachable("Unable to convert LLVM type to SPIRVType"); } +SPIRVType *SPIRVGlobalRegistry::restOfCreateSPIRVType( + const Type *Ty, MachineIRBuilder &MIRBuilder, + SPIRV::AccessQualifier AccessQual, bool EmitIR) { + if (TypesInProcessing.count(Ty) && !Ty->isPointerTy()) + return nullptr; + TypesInProcessing.insert(Ty); + SPIRVType *SpirvType = createSPIRVType(Ty, MIRBuilder, AccessQual, EmitIR); + TypesInProcessing.erase(Ty); + VRegToTypeMap[&MIRBuilder.getMF()][getSPIRVTypeID(SpirvType)] = SpirvType; + SPIRVToLLVMType[SpirvType] = Ty; + Register Reg = DT.find(Ty, &MIRBuilder.getMF()); + // Do not add OpTypeForwardPointer to DT, a corresponding normal pointer type + // will be added later. For special types it is already added to DT. + if (SpirvType->getOpcode() != SPIRV::OpTypeForwardPointer && !Reg.isValid() && + !isSpecialType(Ty)) + DT.add(Ty, &MIRBuilder.getMF(), getSPIRVTypeID(SpirvType)); + + return SpirvType; +} + SPIRVType *SPIRVGlobalRegistry::getSPIRVTypeForVReg(Register VReg) const { auto t = VRegToTypeMap.find(CurMF); if (t != VRegToTypeMap.end()) { @@ -321,13 +606,26 @@ SPIRVType *SPIRVGlobalRegistry::getSPIRVTypeForVReg(Register VReg) const { } SPIRVType *SPIRVGlobalRegistry::getOrCreateSPIRVType( - const Type *Type, MachineIRBuilder &MIRBuilder, + const Type *Ty, MachineIRBuilder &MIRBuilder, SPIRV::AccessQualifier AccessQual, bool EmitIR) { - Register Reg = DT.find(Type, &MIRBuilder.getMF()); + Register Reg = DT.find(Ty, &MIRBuilder.getMF()); if (Reg.isValid()) return getSPIRVTypeForVReg(Reg); - SPIRVType *SpirvType = createSPIRVType(Type, MIRBuilder, AccessQual, EmitIR); - return restOfCreateSPIRVType(Type, SpirvType); + TypesInProcessing.clear(); + SPIRVType *STy = restOfCreateSPIRVType(Ty, MIRBuilder, AccessQual, EmitIR); + // Create normal pointer types for the corresponding OpTypeForwardPointers. + for (auto &CU : ForwardPointerTypes) { + const Type *Ty2 = CU.first; + SPIRVType *STy2 = CU.second; + if ((Reg = DT.find(Ty2, &MIRBuilder.getMF())).isValid()) + STy2 = getSPIRVTypeForVReg(Reg); + else + STy2 = restOfCreateSPIRVType(Ty2, MIRBuilder, AccessQual, EmitIR); + if (Ty == Ty2) + STy = STy2; + } + ForwardPointerTypes.clear(); + return STy; } bool SPIRVGlobalRegistry::isScalarOfType(Register VReg, @@ -393,8 +691,8 @@ SPIRVGlobalRegistry::getOrCreateSPIRVIntegerType(unsigned BitWidth, MIRBuilder); } -SPIRVType *SPIRVGlobalRegistry::restOfCreateSPIRVType(const Type *LLVMTy, - SPIRVType *SpirvType) { +SPIRVType *SPIRVGlobalRegistry::finishCreatingSPIRVType(const Type *LLVMTy, + SPIRVType *SpirvType) { assert(CurMF == SpirvType->getMF()); VRegToTypeMap[CurMF][getSPIRVTypeID(SpirvType)] = SpirvType; SPIRVToLLVMType[SpirvType] = LLVMTy; @@ -413,7 +711,7 @@ SPIRVType *SPIRVGlobalRegistry::getOrCreateSPIRVIntegerType( .addDef(createTypeVReg(CurMF->getRegInfo())) .addImm(BitWidth) .addImm(0); - return restOfCreateSPIRVType(LLVMTy, MIB); + return finishCreatingSPIRVType(LLVMTy, MIB); } SPIRVType * @@ -423,6 +721,19 @@ SPIRVGlobalRegistry::getOrCreateSPIRVBoolType(MachineIRBuilder &MIRBuilder) { MIRBuilder); } +SPIRVType * +SPIRVGlobalRegistry::getOrCreateSPIRVBoolType(MachineInstr &I, + const SPIRVInstrInfo &TII) { + Type *LLVMTy = IntegerType::get(CurMF->getFunction().getContext(), 1); + Register Reg = DT.find(LLVMTy, CurMF); + if (Reg.isValid()) + return getSPIRVTypeForVReg(Reg); + MachineBasicBlock &BB = *I.getParent(); + auto MIB = BuildMI(BB, I, I.getDebugLoc(), TII.get(SPIRV::OpTypeBool)) + .addDef(createTypeVReg(CurMF->getRegInfo())); + return finishCreatingSPIRVType(LLVMTy, MIB); +} + SPIRVType *SPIRVGlobalRegistry::getOrCreateSPIRVVectorType( SPIRVType *BaseType, unsigned NumElements, MachineIRBuilder &MIRBuilder) { return getOrCreateSPIRVType( @@ -436,12 +747,15 @@ SPIRVType *SPIRVGlobalRegistry::getOrCreateSPIRVVectorType( const SPIRVInstrInfo &TII) { Type *LLVMTy = FixedVectorType::get( const_cast<Type *>(getTypeForSPIRVType(BaseType)), NumElements); + Register Reg = DT.find(LLVMTy, CurMF); + if (Reg.isValid()) + return getSPIRVTypeForVReg(Reg); MachineBasicBlock &BB = *I.getParent(); auto MIB = BuildMI(BB, I, I.getDebugLoc(), TII.get(SPIRV::OpTypeVector)) .addDef(createTypeVReg(CurMF->getRegInfo())) .addUse(getSPIRVTypeID(BaseType)) .addImm(NumElements); - return restOfCreateSPIRVType(LLVMTy, MIB); + return finishCreatingSPIRVType(LLVMTy, MIB); } SPIRVType * @@ -460,10 +774,39 @@ SPIRVType *SPIRVGlobalRegistry::getOrCreateSPIRVPointerType( Type *LLVMTy = PointerType::get(const_cast<Type *>(getTypeForSPIRVType(BaseType)), storageClassToAddressSpace(SC)); + Register Reg = DT.find(LLVMTy, CurMF); + if (Reg.isValid()) + return getSPIRVTypeForVReg(Reg); MachineBasicBlock &BB = *I.getParent(); auto MIB = BuildMI(BB, I, I.getDebugLoc(), TII.get(SPIRV::OpTypePointer)) .addDef(createTypeVReg(CurMF->getRegInfo())) .addImm(static_cast<uint32_t>(SC)) .addUse(getSPIRVTypeID(BaseType)); - return restOfCreateSPIRVType(LLVMTy, MIB); + return finishCreatingSPIRVType(LLVMTy, MIB); +} + +Register SPIRVGlobalRegistry::getOrCreateUndef(MachineInstr &I, + SPIRVType *SpvType, + const SPIRVInstrInfo &TII) { + assert(SpvType); + const Type *LLVMTy = getTypeForSPIRVType(SpvType); + assert(LLVMTy); + // Find a constant in DT or build a new one. + UndefValue *UV = UndefValue::get(const_cast<Type *>(LLVMTy)); + Register Res = DT.find(UV, CurMF); + if (Res.isValid()) + return Res; + LLT LLTy = LLT::scalar(32); + Res = CurMF->getRegInfo().createGenericVirtualRegister(LLTy); + assignSPIRVTypeToVReg(SpvType, Res, *CurMF); + DT.add(UV, CurMF, Res); + + MachineInstrBuilder MIB; + MIB = BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(SPIRV::OpUndef)) + .addDef(Res) + .addUse(getSPIRVTypeID(SpvType)); + const auto &ST = CurMF->getSubtarget(); + constrainSelectedInstRegOperands(*MIB, *ST.getInstrInfo(), + *ST.getRegisterInfo(), *ST.getRegBankInfo()); + return Res; } diff --git a/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.h b/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.h index 13dcc20a3e0a..59ac2712a02f 100644 --- a/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.h +++ b/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.h @@ -30,7 +30,7 @@ class SPIRVGlobalRegistry { // Do not confuse this with DuplicatesTracker as DT maps Type* to <MF, Reg> // where Reg = OpType... // while VRegToTypeMap tracks SPIR-V type assigned to other regs (i.e. not - // type-declaring ones) + // type-declaring ones). DenseMap<const MachineFunction *, DenseMap<Register, SPIRVType *>> VRegToTypeMap; @@ -38,6 +38,9 @@ class SPIRVGlobalRegistry { DenseMap<SPIRVType *, const Type *> SPIRVToLLVMType; + SmallPtrSet<const Type *, 4> TypesInProcessing; + DenseMap<const Type *, SPIRVType *> ForwardPointerTypes; + // Number of bits pointers and size_t integers require. const unsigned PointerSize; @@ -46,6 +49,14 @@ class SPIRVGlobalRegistry { createSPIRVType(const Type *Type, MachineIRBuilder &MIRBuilder, SPIRV::AccessQualifier AQ = SPIRV::AccessQualifier::ReadWrite, bool EmitIR = true); + SPIRVType *findSPIRVType( + const Type *Ty, MachineIRBuilder &MIRBuilder, + SPIRV::AccessQualifier accessQual = SPIRV::AccessQualifier::ReadWrite, + bool EmitIR = true); + SPIRVType *restOfCreateSPIRVType(const Type *Type, + MachineIRBuilder &MIRBuilder, + SPIRV::AccessQualifier AccessQual, + bool EmitIR); public: SPIRVGlobalRegistry(unsigned PointerSize); @@ -91,6 +102,11 @@ public: const Type *Type, Register VReg, MachineIRBuilder &MIRBuilder, SPIRV::AccessQualifier AQ = SPIRV::AccessQualifier::ReadWrite, bool EmitIR = true); + SPIRVType *assignIntTypeToVReg(unsigned BitWidth, Register VReg, + MachineInstr &I, const SPIRVInstrInfo &TII); + SPIRVType *assignVectTypeToVReg(SPIRVType *BaseType, unsigned NumElements, + Register VReg, MachineInstr &I, + const SPIRVInstrInfo &TII); // In cases where the SPIR-V type is already known, this function can be // used to map it to the given VReg via an ASSIGN_TYPE instruction. @@ -123,10 +139,7 @@ public: } // Return the VReg holding the result of the given OpTypeXXX instruction. - Register getSPIRVTypeID(const SPIRVType *SpirvType) const { - assert(SpirvType && "Attempting to get type id for nullptr type."); - return SpirvType->defs().begin()->getReg(); - } + Register getSPIRVTypeID(const SPIRVType *SpirvType) const; void setCurrentFunc(MachineFunction &MF) { CurMF = &MF; } @@ -167,19 +180,38 @@ private: SPIRVType *getOpTypeArray(uint32_t NumElems, SPIRVType *ElemType, MachineIRBuilder &MIRBuilder, bool EmitIR = true); + SPIRVType *getOpTypeOpaque(const StructType *Ty, + MachineIRBuilder &MIRBuilder); + + SPIRVType *getOpTypeStruct(const StructType *Ty, MachineIRBuilder &MIRBuilder, + bool EmitIR = true); + SPIRVType *getOpTypePointer(SPIRV::StorageClass SC, SPIRVType *ElemType, - MachineIRBuilder &MIRBuilder); + MachineIRBuilder &MIRBuilder, Register Reg); + + SPIRVType *getOpTypeForwardPointer(SPIRV::StorageClass SC, + MachineIRBuilder &MIRBuilder); SPIRVType *getOpTypeFunction(SPIRVType *RetType, const SmallVectorImpl<SPIRVType *> &ArgTypes, MachineIRBuilder &MIRBuilder); - SPIRVType *restOfCreateSPIRVType(const Type *LLVMTy, SPIRVType *SpirvType); + std::tuple<Register, ConstantInt *, bool> getOrCreateConstIntReg( + uint64_t Val, SPIRVType *SpvType, MachineIRBuilder *MIRBuilder, + MachineInstr *I = nullptr, const SPIRVInstrInfo *TII = nullptr); + SPIRVType *finishCreatingSPIRVType(const Type *LLVMTy, SPIRVType *SpirvType); public: Register buildConstantInt(uint64_t Val, MachineIRBuilder &MIRBuilder, SPIRVType *SpvType = nullptr, bool EmitIR = true); + Register getOrCreateConstInt(uint64_t Val, MachineInstr &I, + SPIRVType *SpvType, const SPIRVInstrInfo &TII); Register buildConstantFP(APFloat Val, MachineIRBuilder &MIRBuilder, SPIRVType *SpvType = nullptr); + Register getOrCreateConsIntVector(uint64_t Val, MachineInstr &I, + SPIRVType *SpvType, + const SPIRVInstrInfo &TII); + Register getOrCreateUndef(MachineInstr &I, SPIRVType *SpvType, + const SPIRVInstrInfo &TII); Register buildGlobalVariable(Register Reg, SPIRVType *BaseType, StringRef Name, const GlobalValue *GV, SPIRV::StorageClass Storage, @@ -193,19 +225,24 @@ public: SPIRVType *getOrCreateSPIRVIntegerType(unsigned BitWidth, MachineInstr &I, const SPIRVInstrInfo &TII); SPIRVType *getOrCreateSPIRVBoolType(MachineIRBuilder &MIRBuilder); + SPIRVType *getOrCreateSPIRVBoolType(MachineInstr &I, + const SPIRVInstrInfo &TII); SPIRVType *getOrCreateSPIRVVectorType(SPIRVType *BaseType, unsigned NumElements, MachineIRBuilder &MIRBuilder); SPIRVType *getOrCreateSPIRVVectorType(SPIRVType *BaseType, unsigned NumElements, MachineInstr &I, const SPIRVInstrInfo &TII); - SPIRVType *getOrCreateSPIRVPointerType( SPIRVType *BaseType, MachineIRBuilder &MIRBuilder, SPIRV::StorageClass SClass = SPIRV::StorageClass::Function); SPIRVType *getOrCreateSPIRVPointerType( SPIRVType *BaseType, MachineInstr &I, const SPIRVInstrInfo &TII, SPIRV::StorageClass SClass = SPIRV::StorageClass::Function); + SPIRVType *getOrCreateOpTypeFunctionWithArgs( + const Type *Ty, SPIRVType *RetType, + const SmallVectorImpl<SPIRVType *> &ArgTypes, + MachineIRBuilder &MIRBuilder); }; } // end namespace llvm #endif // LLLVM_LIB_TARGET_SPIRV_SPIRVTYPEMANAGER_H diff --git a/llvm/lib/Target/SPIRV/SPIRVInstrInfo.cpp b/llvm/lib/Target/SPIRV/SPIRVInstrInfo.cpp index 754906308114..66d8b17b4296 100644 --- a/llvm/lib/Target/SPIRV/SPIRVInstrInfo.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVInstrInfo.cpp @@ -52,7 +52,7 @@ bool SPIRVInstrInfo::isTypeDeclInstr(const MachineInstr &MI) const { auto DefRegClass = MRI.getRegClassOrNull(MI.getOperand(0).getReg()); return DefRegClass && DefRegClass->getID() == SPIRV::TYPERegClass.getID(); } else { - return false; + return MI.getOpcode() == SPIRV::OpTypeForwardPointer; } } @@ -193,3 +193,15 @@ void SPIRVInstrInfo::copyPhysReg(MachineBasicBlock &MBB, auto &MRI = I->getMF()->getRegInfo(); MRI.replaceRegWith(DstOp.getReg(), SrcOp.getReg()); } + +bool SPIRVInstrInfo::expandPostRAPseudo(MachineInstr &MI) const { + if (MI.getOpcode() == SPIRV::GET_ID || MI.getOpcode() == SPIRV::GET_fID || + MI.getOpcode() == SPIRV::GET_pID || MI.getOpcode() == SPIRV::GET_vfID || + MI.getOpcode() == SPIRV::GET_vID) { + auto &MRI = MI.getMF()->getRegInfo(); + MRI.replaceRegWith(MI.getOperand(0).getReg(), MI.getOperand(1).getReg()); + MI.eraseFromParent(); + return true; + } + return false; +} diff --git a/llvm/lib/Target/SPIRV/SPIRVInstrInfo.h b/llvm/lib/Target/SPIRV/SPIRVInstrInfo.h index 2600d9cfca2e..334351c8eeae 100644 --- a/llvm/lib/Target/SPIRV/SPIRVInstrInfo.h +++ b/llvm/lib/Target/SPIRV/SPIRVInstrInfo.h @@ -48,6 +48,7 @@ public: void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, bool KillSrc) const override; + bool expandPostRAPseudo(MachineInstr &MI) const override; }; } // namespace llvm diff --git a/llvm/lib/Target/SPIRV/SPIRVInstrInfo.td b/llvm/lib/Target/SPIRV/SPIRVInstrInfo.td index d6fec5fd0785..d1c20795f804 100644 --- a/llvm/lib/Target/SPIRV/SPIRVInstrInfo.td +++ b/llvm/lib/Target/SPIRV/SPIRVInstrInfo.td @@ -449,6 +449,7 @@ def OpCopyLogical: UnOp<"OpCopyLogical", 400>; def OpSNegate: UnOp<"OpSNegate", 126>; def OpFNegate: UnOpTyped<"OpFNegate", 127, fID, fneg>; +def OpFNegateV: UnOpTyped<"OpFNegate", 127, vfID, fneg>; defm OpIAdd: BinOpTypedGen<"OpIAdd", 128, add, 0, 1>; defm OpFAdd: BinOpTypedGen<"OpFAdd", 129, fadd, 1, 1>; @@ -618,8 +619,10 @@ def OpAtomicCompareExchange: Op<230, (outs ID:$res), (ins TYPE:$ty, ID:$ptr, ID:$sc, ID:$eq, ID:$neq, ID:$val, ID:$cmp), "$res = OpAtomicCompareExchange $ty $ptr $sc $eq $neq $val $cmp">; -// TODO Currently the following deprecated opcode is missing: -// OpAtomicCompareExchangeWeak +def OpAtomicCompareExchangeWeak: Op<231, (outs ID:$res), + (ins TYPE:$ty, ID:$ptr, ID:$sc, ID:$eq, + ID:$neq, ID:$val, ID:$cmp), + "$res = OpAtomicCompareExchangeWeak $ty $ptr $sc $eq $neq $val $cmp">; def OpAtomicIIncrement: AtomicOp<"OpAtomicIIncrement", 232>; def OpAtomicIDecrement: AtomicOp<"OpAtomicIDecrement", 233>; @@ -660,6 +663,11 @@ def OpMemoryNamedBarrier: Op<329, (outs), (ins ID:$barr, ID:$mem, ID:$sem), // 3.42.21. Group and Subgroup Instructions +def OpGroupAsyncCopy: Op<259, (outs ID:$res), (ins TYPE:$ty, ID:$scope, + ID:$dst, ID:$src, ID:$nelts, ID:$stride, ID:$event), + "$res = OpGroupAsyncCopy $ty $scope $dst $src $nelts $stride $event">; +def OpGroupWaitEvents: Op<260, (outs), (ins ID:$scope, ID:$nelts, ID:$elist), + "OpGroupWaitEvents $scope $nelts $elist">; def OpGroupAll: Op<261, (outs ID:$res), (ins TYPE:$ty, ID:$scope, ID:$pr), "$res = OpGroupAll $ty $scope $pr">; def OpGroupAny: Op<262, (outs ID:$res), (ins TYPE:$ty, ID:$scope, ID:$pr), @@ -680,6 +688,18 @@ def OpGroupUMax: OpGroup<"UMax", 270>; def OpGroupSMax: OpGroup<"SMax", 271>; // TODO: 3.42.22. Device-Side Enqueue Instructions +def OpRetainEvent: Op<297, (outs), (ins ID:$event), "OpRetainEvent $event">; +def OpReleaseEvent: Op<298, (outs), (ins ID:$event), "OpReleaseEvent $event">; +def OpCreateUserEvent: Op<299, (outs ID:$res), (ins TYPE:$type), + "$res = OpCreateUserEvent $type">; +def OpIsValidEvent: Op<300, (outs ID:$res), (ins TYPE:$type, ID:$event), + "$res = OpIsValidEvent $type $event ">; +def OpSetUserEventStatus: Op<301, (outs), (ins ID:$event, ID:$status), + "OpSetUserEventStatus $event $status">; +def OpCaptureEventProfilingInfo: Op<302, (outs), + (ins ID:$event, ID:$info, ID:$value), + "OpCaptureEventProfilingInfo $event $info $value">; + // TODO: 3.42.23. Pipe Instructions // 3.42.24. Non-Uniform Instructions diff --git a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp index 90b921a06f21..9365fd22e4e7 100644 --- a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp @@ -197,6 +197,8 @@ void SPIRVInstructionSelector::setupMF(MachineFunction &MF, GISelKnownBits *KB, InstructionSelector::setupMF(MF, KB, CoverageInfo, PSI, BFI); } +static bool isImm(const MachineOperand &MO, MachineRegisterInfo *MRI); + // Defined in SPIRVLegalizerInfo.cpp. extern bool isTypeFoldingSupported(unsigned Opcode); @@ -335,6 +337,30 @@ bool SPIRVInstructionSelector::spvSelect(Register ResVReg, return selectUnOp(ResVReg, ResType, I, SPIRV::OpBitcast); case TargetOpcode::G_ADDRSPACE_CAST: return selectAddrSpaceCast(ResVReg, ResType, I); + case TargetOpcode::G_PTR_ADD: { + // Currently, we get G_PTR_ADD only as a result of translating + // global variables, initialized with constant expressions like GV + Const + // (see test opencl/basic/progvar_prog_scope_init.ll). + // TODO: extend the handler once we have other cases. + assert(I.getOperand(1).isReg() && I.getOperand(2).isReg()); + Register GV = I.getOperand(1).getReg(); + MachineRegisterInfo::def_instr_iterator II = MRI->def_instr_begin(GV); + assert(((*II).getOpcode() == TargetOpcode::G_GLOBAL_VALUE || + (*II).getOpcode() == TargetOpcode::COPY || + (*II).getOpcode() == SPIRV::OpVariable) && + isImm(I.getOperand(2), MRI)); + Register Idx = buildZerosVal(GR.getOrCreateSPIRVIntegerType(32, I, TII), I); + MachineBasicBlock &BB = *I.getParent(); + auto MIB = BuildMI(BB, I, I.getDebugLoc(), TII.get(SPIRV::OpSpecConstantOp)) + .addDef(ResVReg) + .addUse(GR.getSPIRVTypeID(ResType)) + .addImm(static_cast<uint32_t>( + SPIRV::Opcode::InBoundsPtrAccessChain)) + .addUse(GV) + .addUse(Idx) + .addUse(I.getOperand(2).getReg()); + return MIB.constrainAllUses(TII, TRI, RBI); + } case TargetOpcode::G_ATOMICRMW_OR: return selectAtomicRMW(ResVReg, ResType, I, SPIRV::OpAtomicOr); @@ -387,23 +413,6 @@ bool SPIRVInstructionSelector::selectUnOp(Register ResVReg, Opcode); } -static SPIRV::MemorySemantics getMemSemantics(AtomicOrdering Ord) { - switch (Ord) { - case AtomicOrdering::Acquire: - return SPIRV::MemorySemantics::Acquire; - case AtomicOrdering::Release: - return SPIRV::MemorySemantics::Release; - case AtomicOrdering::AcquireRelease: - return SPIRV::MemorySemantics::AcquireRelease; - case AtomicOrdering::SequentiallyConsistent: - return SPIRV::MemorySemantics::SequentiallyConsistent; - case AtomicOrdering::Unordered: - case AtomicOrdering::Monotonic: - case AtomicOrdering::NotAtomic: - return SPIRV::MemorySemantics::None; - } -} - static SPIRV::Scope getScope(SyncScope::ID Ord) { switch (Ord) { case SyncScope::SingleThread: @@ -484,16 +493,15 @@ bool SPIRVInstructionSelector::selectMemOperation(Register ResVReg, MachineInstr &I) const { MachineBasicBlock &BB = *I.getParent(); auto MIB = BuildMI(BB, I, I.getDebugLoc(), TII.get(SPIRV::OpCopyMemorySized)) - .addDef(I.getOperand(0).getReg()) + .addUse(I.getOperand(0).getReg()) .addUse(I.getOperand(1).getReg()) .addUse(I.getOperand(2).getReg()); if (I.getNumMemOperands()) addMemoryOperands(*I.memoperands_begin(), MIB); bool Result = MIB.constrainAllUses(TII, TRI, RBI); - if (ResVReg.isValid() && ResVReg != MIB->getOperand(0).getReg()) { + if (ResVReg.isValid() && ResVReg != MIB->getOperand(0).getReg()) BuildMI(BB, I, I.getDebugLoc(), TII.get(TargetOpcode::COPY), ResVReg) .addUse(MIB->getOperand(0).getReg()); - } return Result; } @@ -541,36 +549,71 @@ bool SPIRVInstructionSelector::selectFence(MachineInstr &I) const { bool SPIRVInstructionSelector::selectAtomicCmpXchg(Register ResVReg, const SPIRVType *ResType, MachineInstr &I) const { - assert(I.hasOneMemOperand()); - const MachineMemOperand *MemOp = *I.memoperands_begin(); - uint32_t Scope = static_cast<uint32_t>(getScope(MemOp->getSyncScopeID())); - Register ScopeReg = buildI32Constant(Scope, I); - + Register ScopeReg; + Register MemSemEqReg; + Register MemSemNeqReg; Register Ptr = I.getOperand(2).getReg(); + if (I.getOpcode() != TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS) { + assert(I.hasOneMemOperand()); + const MachineMemOperand *MemOp = *I.memoperands_begin(); + unsigned Scope = static_cast<uint32_t>(getScope(MemOp->getSyncScopeID())); + ScopeReg = buildI32Constant(Scope, I); + + unsigned ScSem = static_cast<uint32_t>( + getMemSemanticsForStorageClass(GR.getPointerStorageClass(Ptr))); + AtomicOrdering AO = MemOp->getSuccessOrdering(); + unsigned MemSemEq = static_cast<uint32_t>(getMemSemantics(AO)) | ScSem; + MemSemEqReg = buildI32Constant(MemSemEq, I); + AtomicOrdering FO = MemOp->getFailureOrdering(); + unsigned MemSemNeq = static_cast<uint32_t>(getMemSemantics(FO)) | ScSem; + MemSemNeqReg = + MemSemEq == MemSemNeq ? MemSemEqReg : buildI32Constant(MemSemNeq, I); + } else { + ScopeReg = I.getOperand(5).getReg(); + MemSemEqReg = I.getOperand(6).getReg(); + MemSemNeqReg = I.getOperand(7).getReg(); + } + Register Cmp = I.getOperand(3).getReg(); Register Val = I.getOperand(4).getReg(); - SPIRVType *SpvValTy = GR.getSPIRVTypeForVReg(Val); - SPIRV::StorageClass SC = GR.getPointerStorageClass(Ptr); - uint32_t ScSem = static_cast<uint32_t>(getMemSemanticsForStorageClass(SC)); - AtomicOrdering AO = MemOp->getSuccessOrdering(); - uint32_t MemSemEq = static_cast<uint32_t>(getMemSemantics(AO)) | ScSem; - Register MemSemEqReg = buildI32Constant(MemSemEq, I); - AtomicOrdering FO = MemOp->getFailureOrdering(); - uint32_t MemSemNeq = static_cast<uint32_t>(getMemSemantics(FO)) | ScSem; - Register MemSemNeqReg = - MemSemEq == MemSemNeq ? MemSemEqReg : buildI32Constant(MemSemNeq, I); + Register ACmpRes = MRI->createVirtualRegister(&SPIRV::IDRegClass); const DebugLoc &DL = I.getDebugLoc(); - return BuildMI(*I.getParent(), I, DL, TII.get(SPIRV::OpAtomicCompareExchange)) - .addDef(ResVReg) - .addUse(GR.getSPIRVTypeID(SpvValTy)) - .addUse(Ptr) - .addUse(ScopeReg) - .addUse(MemSemEqReg) - .addUse(MemSemNeqReg) - .addUse(Val) - .addUse(Cmp) - .constrainAllUses(TII, TRI, RBI); + bool Result = + BuildMI(*I.getParent(), I, DL, TII.get(SPIRV::OpAtomicCompareExchange)) + .addDef(ACmpRes) + .addUse(GR.getSPIRVTypeID(SpvValTy)) + .addUse(Ptr) + .addUse(ScopeReg) + .addUse(MemSemEqReg) + .addUse(MemSemNeqReg) + .addUse(Val) + .addUse(Cmp) + .constrainAllUses(TII, TRI, RBI); + Register CmpSuccReg = MRI->createVirtualRegister(&SPIRV::IDRegClass); + SPIRVType *BoolTy = GR.getOrCreateSPIRVBoolType(I, TII); + Result |= BuildMI(*I.getParent(), I, DL, TII.get(SPIRV::OpIEqual)) + .addDef(CmpSuccReg) + .addUse(GR.getSPIRVTypeID(BoolTy)) + .addUse(ACmpRes) + .addUse(Cmp) + .constrainAllUses(TII, TRI, RBI); + Register TmpReg = MRI->createVirtualRegister(&SPIRV::IDRegClass); + Result |= BuildMI(*I.getParent(), I, DL, TII.get(SPIRV::OpCompositeInsert)) + .addDef(TmpReg) + .addUse(GR.getSPIRVTypeID(ResType)) + .addUse(ACmpRes) + .addUse(GR.getOrCreateUndef(I, ResType, TII)) + .addImm(0) + .constrainAllUses(TII, TRI, RBI); + Result |= BuildMI(*I.getParent(), I, DL, TII.get(SPIRV::OpCompositeInsert)) + .addDef(ResVReg) + .addUse(GR.getSPIRVTypeID(ResType)) + .addUse(CmpSuccReg) + .addUse(TmpReg) + .addImm(1) + .constrainAllUses(TII, TRI, RBI); + return Result; } static bool isGenericCastablePtr(SPIRV::StorageClass SC) { @@ -592,6 +635,27 @@ static bool isGenericCastablePtr(SPIRV::StorageClass SC) { bool SPIRVInstructionSelector::selectAddrSpaceCast(Register ResVReg, const SPIRVType *ResType, MachineInstr &I) const { + // If the AddrSpaceCast user is single and in OpConstantComposite or + // OpVariable, we should select OpSpecConstantOp. + auto UIs = MRI->use_instructions(ResVReg); + if (!UIs.empty() && ++UIs.begin() == UIs.end() && + (UIs.begin()->getOpcode() == SPIRV::OpConstantComposite || + UIs.begin()->getOpcode() == SPIRV::OpVariable || + isSpvIntrinsic(*UIs.begin(), Intrinsic::spv_init_global))) { + Register NewReg = I.getOperand(1).getReg(); + MachineBasicBlock &BB = *I.getParent(); + SPIRVType *SpvBaseTy = GR.getOrCreateSPIRVIntegerType(8, I, TII); + ResType = GR.getOrCreateSPIRVPointerType(SpvBaseTy, I, TII, + SPIRV::StorageClass::Generic); + bool Result = + BuildMI(BB, I, I.getDebugLoc(), TII.get(SPIRV::OpSpecConstantOp)) + .addDef(ResVReg) + .addUse(GR.getSPIRVTypeID(ResType)) + .addImm(static_cast<uint32_t>(SPIRV::Opcode::PtrCastToGeneric)) + .addUse(NewReg) + .constrainAllUses(TII, TRI, RBI); + return Result; + } Register SrcPtr = I.getOperand(1).getReg(); SPIRVType *SrcPtrTy = GR.getSPIRVTypeForVReg(SrcPtr); SPIRV::StorageClass SrcSC = GR.getPointerStorageClass(SrcPtr); @@ -842,7 +906,9 @@ bool SPIRVInstructionSelector::selectFCmp(Register ResVReg, Register SPIRVInstructionSelector::buildZerosVal(const SPIRVType *ResType, MachineInstr &I) const { - return buildI32Constant(0, I, ResType); + if (ResType->getOpcode() == SPIRV::OpTypeVector) + return GR.getOrCreateConsIntVector(0, I, ResType, TII); + return GR.getOrCreateConstInt(0, I, ResType, TII); } Register SPIRVInstructionSelector::buildOnesVal(bool AllOnes, @@ -851,20 +917,9 @@ Register SPIRVInstructionSelector::buildOnesVal(bool AllOnes, unsigned BitWidth = GR.getScalarOrVectorBitWidth(ResType); APInt One = AllOnes ? APInt::getAllOnesValue(BitWidth) : APInt::getOneBitSet(BitWidth, 0); - Register OneReg = buildI32Constant(One.getZExtValue(), I, ResType); - if (ResType->getOpcode() == SPIRV::OpTypeVector) { - const unsigned NumEles = ResType->getOperand(2).getImm(); - Register OneVec = MRI->createVirtualRegister(&SPIRV::IDRegClass); - unsigned Opcode = SPIRV::OpConstantComposite; - auto MIB = BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(Opcode)) - .addDef(OneVec) - .addUse(GR.getSPIRVTypeID(ResType)); - for (unsigned i = 0; i < NumEles; ++i) - MIB.addUse(OneReg); - constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI); - return OneVec; - } - return OneReg; + if (ResType->getOpcode() == SPIRV::OpTypeVector) + return GR.getOrCreateConsIntVector(One.getZExtValue(), I, ResType, TII); + return GR.getOrCreateConstInt(One.getZExtValue(), I, ResType, TII); } bool SPIRVInstructionSelector::selectSelect(Register ResVReg, @@ -959,13 +1014,23 @@ bool SPIRVInstructionSelector::selectConst(Register ResVReg, const SPIRVType *ResType, const APInt &Imm, MachineInstr &I) const { - assert(ResType->getOpcode() != SPIRV::OpTypePointer || Imm.isNullValue()); + unsigned TyOpcode = ResType->getOpcode(); + assert(TyOpcode != SPIRV::OpTypePointer || Imm.isNullValue()); MachineBasicBlock &BB = *I.getParent(); - if (ResType->getOpcode() == SPIRV::OpTypePointer && Imm.isNullValue()) { + if ((TyOpcode == SPIRV::OpTypePointer || TyOpcode == SPIRV::OpTypeEvent) && + Imm.isNullValue()) return BuildMI(BB, I, I.getDebugLoc(), TII.get(SPIRV::OpConstantNull)) .addDef(ResVReg) .addUse(GR.getSPIRVTypeID(ResType)) .constrainAllUses(TII, TRI, RBI); + if (TyOpcode == SPIRV::OpTypeInt) { + Register Reg = GR.getOrCreateConstInt(Imm.getZExtValue(), I, ResType, TII); + if (Reg == ResVReg) + return true; + return BuildMI(BB, I, I.getDebugLoc(), TII.get(TargetOpcode::COPY)) + .addDef(ResVReg) + .addUse(Reg) + .constrainAllUses(TII, TRI, RBI); } auto MIB = BuildMI(BB, I, I.getDebugLoc(), TII.get(SPIRV::OpConstantI)) .addDef(ResVReg) @@ -1006,29 +1071,29 @@ bool SPIRVInstructionSelector::selectInsertVal(Register ResVReg, const SPIRVType *ResType, MachineInstr &I) const { MachineBasicBlock &BB = *I.getParent(); - return BuildMI(BB, I, I.getDebugLoc(), TII.get(SPIRV::OpCompositeInsert)) - .addDef(ResVReg) - .addUse(GR.getSPIRVTypeID(ResType)) - // object to insert - .addUse(I.getOperand(3).getReg()) - // composite to insert into - .addUse(I.getOperand(2).getReg()) - // TODO: support arbitrary number of indices - .addImm(foldImm(I.getOperand(4), MRI)) - .constrainAllUses(TII, TRI, RBI); + auto MIB = BuildMI(BB, I, I.getDebugLoc(), TII.get(SPIRV::OpCompositeInsert)) + .addDef(ResVReg) + .addUse(GR.getSPIRVTypeID(ResType)) + // object to insert + .addUse(I.getOperand(3).getReg()) + // composite to insert into + .addUse(I.getOperand(2).getReg()); + for (unsigned i = 4; i < I.getNumOperands(); i++) + MIB.addImm(foldImm(I.getOperand(i), MRI)); + return MIB.constrainAllUses(TII, TRI, RBI); } bool SPIRVInstructionSelector::selectExtractVal(Register ResVReg, const SPIRVType *ResType, MachineInstr &I) const { MachineBasicBlock &BB = *I.getParent(); - return BuildMI(BB, I, I.getDebugLoc(), TII.get(SPIRV::OpCompositeExtract)) - .addDef(ResVReg) - .addUse(GR.getSPIRVTypeID(ResType)) - .addUse(I.getOperand(2).getReg()) - // TODO: support arbitrary number of indices - .addImm(foldImm(I.getOperand(3), MRI)) - .constrainAllUses(TII, TRI, RBI); + auto MIB = BuildMI(BB, I, I.getDebugLoc(), TII.get(SPIRV::OpCompositeExtract)) + .addDef(ResVReg) + .addUse(GR.getSPIRVTypeID(ResType)) + .addUse(I.getOperand(2).getReg()); + for (unsigned i = 3; i < I.getNumOperands(); i++) + MIB.addImm(foldImm(I.getOperand(i), MRI)); + return MIB.constrainAllUses(TII, TRI, RBI); } bool SPIRVInstructionSelector::selectInsertElt(Register ResVReg, @@ -1154,6 +1219,9 @@ bool SPIRVInstructionSelector::selectIntrinsic(Register ResVReg, } return MIB.constrainAllUses(TII, TRI, RBI); } break; + case Intrinsic::spv_cmpxchg: + return selectAtomicCmpXchg(ResVReg, ResType, I); + break; default: llvm_unreachable("Intrinsic selection not implemented"); } @@ -1239,8 +1307,32 @@ bool SPIRVInstructionSelector::selectGlobalValue( GV->getType(), MIRBuilder, SPIRV::AccessQualifier::ReadWrite, false); std::string GlobalIdent = GV->getGlobalIdentifier(); - // TODO: suport @llvm.global.annotations. + // We have functions as operands in tests with blocks of instruction e.g. in + // transcoding/global_block.ll. These operands are not used and should be + // substituted by zero constants. Their type is expected to be always + // OpTypePointer Function %uchar. + if (isa<Function>(GV)) { + const Constant *ConstVal = GV; + MachineBasicBlock &BB = *I.getParent(); + Register NewReg = GR.find(ConstVal, GR.CurMF); + if (!NewReg.isValid()) { + SPIRVType *SpvBaseTy = GR.getOrCreateSPIRVIntegerType(8, I, TII); + ResType = GR.getOrCreateSPIRVPointerType(SpvBaseTy, I, TII); + Register NewReg = ResVReg; + GR.add(ConstVal, GR.CurMF, NewReg); + return BuildMI(BB, I, I.getDebugLoc(), TII.get(SPIRV::OpConstantNull)) + .addDef(NewReg) + .addUse(GR.getSPIRVTypeID(ResType)) + .constrainAllUses(TII, TRI, RBI); + } + assert(NewReg != ResVReg); + return BuildMI(BB, I, I.getDebugLoc(), TII.get(TargetOpcode::COPY)) + .addDef(ResVReg) + .addUse(NewReg) + .constrainAllUses(TII, TRI, RBI); + } auto GlobalVar = cast<GlobalVariable>(GV); + assert(GlobalVar->getName() != "llvm.global.annotations"); bool HasInit = GlobalVar->hasInitializer() && !isa<UndefValue>(GlobalVar->getInitializer()); diff --git a/llvm/lib/Target/SPIRV/SPIRVMCInstLower.cpp b/llvm/lib/Target/SPIRV/SPIRVMCInstLower.cpp index 8e4ab973bf07..8aaac50c94d7 100644 --- a/llvm/lib/Target/SPIRV/SPIRVMCInstLower.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVMCInstLower.cpp @@ -45,7 +45,12 @@ void SPIRVMCInstLower::lower(const MachineInstr *MI, MCInst &OutMI, break; } case MachineOperand::MO_Immediate: - MCOp = MCOperand::createImm(MO.getImm()); + if (MI->getOpcode() == SPIRV::OpExtInst && i == 2) { + Register Reg = MAI->getExtInstSetReg(MO.getImm()); + MCOp = MCOperand::createReg(Reg); + } else { + MCOp = MCOperand::createImm(MO.getImm()); + } break; case MachineOperand::MO_FPImmediate: MCOp = MCOperand::createDFPImm( diff --git a/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp b/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp index a39df5234935..143ddf7297dc 100644 --- a/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp @@ -60,62 +60,50 @@ void SPIRVModuleAnalysis::setBaseInfo(const Module &M) { MAI.InstrsToDelete.clear(); MAI.FuncNameMap.clear(); MAI.GlobalVarList.clear(); + MAI.ExtInstSetMap.clear(); // TODO: determine memory model and source language from the configuratoin. - MAI.Mem = SPIRV::MemoryModel::OpenCL; - MAI.SrcLang = SPIRV::SourceLanguage::OpenCL_C; - unsigned PtrSize = ST->getPointerSize(); - MAI.Addr = PtrSize == 32 ? SPIRV::AddressingModel::Physical32 - : PtrSize == 64 ? SPIRV::AddressingModel::Physical64 - : SPIRV::AddressingModel::Logical; + if (auto MemModel = M.getNamedMetadata("spirv.MemoryModel")) { + auto MemMD = MemModel->getOperand(0); + MAI.Addr = static_cast<SPIRV::AddressingModel>(getMetadataUInt(MemMD, 0)); + MAI.Mem = static_cast<SPIRV::MemoryModel>(getMetadataUInt(MemMD, 1)); + } else { + MAI.Mem = SPIRV::MemoryModel::OpenCL; + unsigned PtrSize = ST->getPointerSize(); + MAI.Addr = PtrSize == 32 ? SPIRV::AddressingModel::Physical32 + : PtrSize == 64 ? SPIRV::AddressingModel::Physical64 + : SPIRV::AddressingModel::Logical; + } // Get the OpenCL version number from metadata. // TODO: support other source languages. - MAI.SrcLangVersion = 0; if (auto VerNode = M.getNamedMetadata("opencl.ocl.version")) { - // Construct version literal according to OpenCL 2.2 environment spec. + MAI.SrcLang = SPIRV::SourceLanguage::OpenCL_C; + // Construct version literal in accordance with SPIRV-LLVM-Translator. + // TODO: support multiple OCL version metadata. + assert(VerNode->getNumOperands() > 0 && "Invalid SPIR"); auto VersionMD = VerNode->getOperand(0); unsigned MajorNum = getMetadataUInt(VersionMD, 0, 2); unsigned MinorNum = getMetadataUInt(VersionMD, 1); unsigned RevNum = getMetadataUInt(VersionMD, 2); - MAI.SrcLangVersion = 0 | (MajorNum << 16) | (MinorNum << 8) | RevNum; + MAI.SrcLangVersion = (MajorNum * 100 + MinorNum) * 1000 + RevNum; + } else { + MAI.SrcLang = SPIRV::SourceLanguage::Unknown; + MAI.SrcLangVersion = 0; } -} -// True if there is an instruction in the MS list with all the same operands as -// the given instruction has (after the given starting index). -// TODO: maybe it needs to check Opcodes too. -static bool findSameInstrInMS(const MachineInstr &A, - SPIRV::ModuleSectionType MSType, - SPIRV::ModuleAnalysisInfo &MAI, - bool UpdateRegAliases, - unsigned StartOpIndex = 0) { - for (const auto *B : MAI.MS[MSType]) { - const unsigned NumAOps = A.getNumOperands(); - if (NumAOps == B->getNumOperands() && A.getNumDefs() == B->getNumDefs()) { - bool AllOpsMatch = true; - for (unsigned i = StartOpIndex; i < NumAOps && AllOpsMatch; ++i) { - if (A.getOperand(i).isReg() && B->getOperand(i).isReg()) { - Register RegA = A.getOperand(i).getReg(); - Register RegB = B->getOperand(i).getReg(); - AllOpsMatch = MAI.getRegisterAlias(A.getMF(), RegA) == - MAI.getRegisterAlias(B->getMF(), RegB); - } else { - AllOpsMatch = A.getOperand(i).isIdenticalTo(B->getOperand(i)); - } - } - if (AllOpsMatch) { - if (UpdateRegAliases) { - assert(A.getOperand(0).isReg() && B->getOperand(0).isReg()); - Register LocalReg = A.getOperand(0).getReg(); - Register GlobalReg = - MAI.getRegisterAlias(B->getMF(), B->getOperand(0).getReg()); - MAI.setRegisterAlias(A.getMF(), LocalReg, GlobalReg); - } - return true; - } + if (auto ExtNode = M.getNamedMetadata("opencl.used.extensions")) { + for (unsigned I = 0, E = ExtNode->getNumOperands(); I != E; ++I) { + MDNode *MD = ExtNode->getOperand(I); + if (!MD || MD->getNumOperands() == 0) + continue; + for (unsigned J = 0, N = MD->getNumOperands(); J != N; ++J) + MAI.SrcExt.insert(cast<MDString>(MD->getOperand(J))->getString()); } } - return false; + + // TODO: check if it's required by default. + MAI.ExtInstSetMap[static_cast<unsigned>(SPIRV::InstructionSet::OpenCL_std)] = + Register::index2VirtReg(MAI.getNextID()); } // Collect MI which defines the register in the given machine function. @@ -135,7 +123,7 @@ void SPIRVModuleAnalysis::collectGlobalEntities( const std::vector<SPIRV::DTSortableEntry *> &DepsGraph, SPIRV::ModuleSectionType MSType, std::function<bool(const SPIRV::DTSortableEntry *)> Pred, - bool UsePreOrder) { + bool UsePreOrder = false) { DenseSet<const SPIRV::DTSortableEntry *> Visited; for (const auto *E : DepsGraph) { std::function<void(const SPIRV::DTSortableEntry *)> RecHoistUtil; @@ -188,13 +176,41 @@ void SPIRVModuleAnalysis::processDefInstrs(const Module &M) { collectGlobalEntities( DepsGraph, SPIRV::MB_TypeConstVars, - [](const SPIRV::DTSortableEntry *E) { return !E->getIsFunc(); }, false); + [](const SPIRV::DTSortableEntry *E) { return !E->getIsFunc(); }); collectGlobalEntities( DepsGraph, SPIRV::MB_ExtFuncDecls, [](const SPIRV::DTSortableEntry *E) { return E->getIsFunc(); }, true); } +// True if there is an instruction in the MS list with all the same operands as +// the given instruction has (after the given starting index). +// TODO: maybe it needs to check Opcodes too. +static bool findSameInstrInMS(const MachineInstr &A, + SPIRV::ModuleSectionType MSType, + SPIRV::ModuleAnalysisInfo &MAI, + unsigned StartOpIndex = 0) { + for (const auto *B : MAI.MS[MSType]) { + const unsigned NumAOps = A.getNumOperands(); + if (NumAOps != B->getNumOperands() || A.getNumDefs() != B->getNumDefs()) + continue; + bool AllOpsMatch = true; + for (unsigned i = StartOpIndex; i < NumAOps && AllOpsMatch; ++i) { + if (A.getOperand(i).isReg() && B->getOperand(i).isReg()) { + Register RegA = A.getOperand(i).getReg(); + Register RegB = B->getOperand(i).getReg(); + AllOpsMatch = MAI.getRegisterAlias(A.getMF(), RegA) == + MAI.getRegisterAlias(B->getMF(), RegB); + } else { + AllOpsMatch = A.getOperand(i).isIdenticalTo(B->getOperand(i)); + } + } + if (AllOpsMatch) + return true; + } + return false; +} + // Look for IDs declared with Import linkage, and map the imported name string // to the register defining that variable (which will usually be the result of // an OpFunction). This lets us call externally imported functions using @@ -228,12 +244,16 @@ void SPIRVModuleAnalysis::collectFuncNames(MachineInstr &MI, // numbering has already occurred by this point. We can directly compare reg // arguments when detecting duplicates. static void collectOtherInstr(MachineInstr &MI, SPIRV::ModuleAnalysisInfo &MAI, - SPIRV::ModuleSectionType MSType) { + SPIRV::ModuleSectionType MSType, + bool Append = true) { MAI.setSkipEmission(&MI); - if (findSameInstrInMS(MI, MSType, MAI, false)) + if (findSameInstrInMS(MI, MSType, MAI)) return; // Found a duplicate, so don't add it. // No duplicates, so add it. - MAI.MS[MSType].push_back(&MI); + if (Append) + MAI.MS[MSType].push_back(&MI); + else + MAI.MS[MSType].insert(MAI.MS[MSType].begin(), &MI); } // Some global instructions make reference to function-local ID regs, so cannot @@ -256,15 +276,22 @@ void SPIRVModuleAnalysis::processOtherInstrs(const Module &M) { } else if (TII->isDecorationInstr(MI)) { collectOtherInstr(MI, MAI, SPIRV::MB_Annotations); collectFuncNames(MI, *F); + } else if (TII->isConstantInstr(MI)) { + // Now OpSpecConstant*s are not in DT, + // but they need to be collected anyway. + collectOtherInstr(MI, MAI, SPIRV::MB_TypeConstVars); } else if (OpCode == SPIRV::OpFunction) { collectFuncNames(MI, *F); + } else if (OpCode == SPIRV::OpTypeForwardPointer) { + collectOtherInstr(MI, MAI, SPIRV::MB_TypeConstVars, false); } } } } // Number registers in all functions globally from 0 onwards and store -// the result in global register alias table. +// the result in global register alias table. Some registers are already +// numbered in collectGlobalEntities. void SPIRVModuleAnalysis::numberRegistersGlobally(const Module &M) { for (auto F = M.begin(), E = M.end(); F != E; ++F) { if ((*F).isDeclaration()) @@ -282,11 +309,50 @@ void SPIRVModuleAnalysis::numberRegistersGlobally(const Module &M) { Register NewReg = Register::index2VirtReg(MAI.getNextID()); MAI.setRegisterAlias(MF, Reg, NewReg); } + if (MI.getOpcode() != SPIRV::OpExtInst) + continue; + auto Set = MI.getOperand(2).getImm(); + if (MAI.ExtInstSetMap.find(Set) == MAI.ExtInstSetMap.end()) + MAI.ExtInstSetMap[Set] = Register::index2VirtReg(MAI.getNextID()); } } } } +// Find OpIEqual and OpBranchConditional instructions originating from +// OpSwitches, mark them skipped for emission. Also mark MBB skipped if it +// contains only these instructions. +static void processSwitches(const Module &M, SPIRV::ModuleAnalysisInfo &MAI, + MachineModuleInfo *MMI) { + DenseSet<Register> SwitchRegs; + for (auto F = M.begin(), E = M.end(); F != E; ++F) { + MachineFunction *MF = MMI->getMachineFunction(*F); + if (!MF) + continue; + for (MachineBasicBlock &MBB : *MF) + for (MachineInstr &MI : MBB) { + if (MAI.getSkipEmission(&MI)) + continue; + if (MI.getOpcode() == SPIRV::OpSwitch) { + assert(MI.getOperand(0).isReg()); + SwitchRegs.insert(MI.getOperand(0).getReg()); + } + if (MI.getOpcode() != SPIRV::OpIEqual || !MI.getOperand(2).isReg() || + !SwitchRegs.contains(MI.getOperand(2).getReg())) + continue; + Register CmpReg = MI.getOperand(0).getReg(); + MachineInstr *CBr = MI.getNextNode(); + assert(CBr && CBr->getOpcode() == SPIRV::OpBranchConditional && + CBr->getOperand(0).isReg() && + CBr->getOperand(0).getReg() == CmpReg); + MAI.setSkipEmission(&MI); + MAI.setSkipEmission(CBr); + if (&MBB.front() == &MI && &MBB.back() == CBr) + MAI.MBBsToSkip.insert(&MBB); + } + } +} + struct SPIRV::ModuleAnalysisInfo SPIRVModuleAnalysis::MAI; void SPIRVModuleAnalysis::getAnalysisUsage(AnalysisUsage &AU) const { @@ -305,7 +371,9 @@ bool SPIRVModuleAnalysis::runOnModule(Module &M) { setBaseInfo(M); - // TODO: Process type/const/global var/func decl instructions, number their + processSwitches(M, MAI, MMI); + + // Process type/const/global var/func decl instructions, number their // destination registers from 0 to N, collect Extensions and Capabilities. processDefInstrs(M); diff --git a/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.h b/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.h index 585868909d28..9bcdf6e9ae2a 100644 --- a/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.h +++ b/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.h @@ -52,6 +52,9 @@ struct ModuleAnalysisInfo { SPIRV::AddressingModel Addr; SPIRV::SourceLanguage SrcLang; unsigned SrcLangVersion; + StringSet<> SrcExt; + // Maps ExtInstSet to corresponding ID register. + DenseMap<unsigned, Register> ExtInstSetMap; // Contains the list of all global OpVariables in the module. SmallVector<MachineInstr *, 4> GlobalVarList; // Maps function names to coresponding function ID registers. @@ -59,6 +62,9 @@ struct ModuleAnalysisInfo { // The set contains machine instructions which are necessary // for correct MIR but will not be emitted in function bodies. DenseSet<MachineInstr *> InstrsToDelete; + // The set contains machine basic blocks which are necessary + // for correct MIR but will not be emitted. + DenseSet<MachineBasicBlock *> MBBsToSkip; // The table contains global aliases of local registers for each machine // function. The aliases are used to substitute local registers during // code emission. @@ -75,6 +81,7 @@ struct ModuleAnalysisInfo { assert(FuncReg != FuncNameMap.end() && "Cannot find function Id"); return FuncReg->second; } + Register getExtInstSetReg(unsigned SetNum) { return ExtInstSetMap[SetNum]; } InstrList &getMSInstrs(unsigned MSType) { return MS[MSType]; } void setSkipEmission(MachineInstr *MI) { InstrsToDelete.insert(MI); } bool getSkipEmission(const MachineInstr *MI) { @@ -123,7 +130,6 @@ public: private: void setBaseInfo(const Module &M); - template <typename T> void collectTypesConstsVars(); void collectGlobalEntities( const std::vector<SPIRV::DTSortableEntry *> &DepsGraph, SPIRV::ModuleSectionType MSType, diff --git a/llvm/lib/Target/SPIRV/SPIRVPreLegalizer.cpp b/llvm/lib/Target/SPIRV/SPIRVPreLegalizer.cpp index 687f84046650..e620226dcc7a 100644 --- a/llvm/lib/Target/SPIRV/SPIRVPreLegalizer.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVPreLegalizer.cpp @@ -39,11 +39,58 @@ public: }; } // namespace -static bool isSpvIntrinsic(MachineInstr &MI, Intrinsic::ID IntrinsicID) { - if (MI.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS && - MI.getIntrinsicID() == IntrinsicID) - return true; - return false; +static void addConstantsToTrack(MachineFunction &MF, SPIRVGlobalRegistry *GR) { + MachineRegisterInfo &MRI = MF.getRegInfo(); + DenseMap<MachineInstr *, Register> RegsAlreadyAddedToDT; + SmallVector<MachineInstr *, 10> ToErase, ToEraseComposites; + for (MachineBasicBlock &MBB : MF) { + for (MachineInstr &MI : MBB) { + if (!isSpvIntrinsic(MI, Intrinsic::spv_track_constant)) + continue; + ToErase.push_back(&MI); + auto *Const = + cast<Constant>(cast<ConstantAsMetadata>( + MI.getOperand(3).getMetadata()->getOperand(0)) + ->getValue()); + if (auto *GV = dyn_cast<GlobalValue>(Const)) { + Register Reg = GR->find(GV, &MF); + if (!Reg.isValid()) + GR->add(GV, &MF, MI.getOperand(2).getReg()); + else + RegsAlreadyAddedToDT[&MI] = Reg; + } else { + Register Reg = GR->find(Const, &MF); + if (!Reg.isValid()) { + if (auto *ConstVec = dyn_cast<ConstantDataVector>(Const)) { + auto *BuildVec = MRI.getVRegDef(MI.getOperand(2).getReg()); + assert(BuildVec && + BuildVec->getOpcode() == TargetOpcode::G_BUILD_VECTOR); + for (unsigned i = 0; i < ConstVec->getNumElements(); ++i) + GR->add(ConstVec->getElementAsConstant(i), &MF, + BuildVec->getOperand(1 + i).getReg()); + } + GR->add(Const, &MF, MI.getOperand(2).getReg()); + } else { + RegsAlreadyAddedToDT[&MI] = Reg; + // This MI is unused and will be removed. If the MI uses + // const_composite, it will be unused and should be removed too. + assert(MI.getOperand(2).isReg() && "Reg operand is expected"); + MachineInstr *SrcMI = MRI.getVRegDef(MI.getOperand(2).getReg()); + if (SrcMI && isSpvIntrinsic(*SrcMI, Intrinsic::spv_const_composite)) + ToEraseComposites.push_back(SrcMI); + } + } + } + } + for (MachineInstr *MI : ToErase) { + Register Reg = MI->getOperand(2).getReg(); + if (RegsAlreadyAddedToDT.find(MI) != RegsAlreadyAddedToDT.end()) + Reg = RegsAlreadyAddedToDT[MI]; + MRI.replaceRegWith(MI->getOperand(0).getReg(), Reg); + MI->eraseFromParent(); + } + for (MachineInstr *MI : ToEraseComposites) + MI->eraseFromParent(); } static void foldConstantsIntoIntrinsics(MachineFunction &MF) { @@ -120,6 +167,7 @@ static SPIRVType *propagateSPIRVType(MachineInstr *MI, SPIRVGlobalRegistry *GR, } case TargetOpcode::G_TRUNC: case TargetOpcode::G_ADDRSPACE_CAST: + case TargetOpcode::G_PTR_ADD: case TargetOpcode::COPY: { MachineOperand &Op = MI->getOperand(1); MachineInstr *Def = Op.isReg() ? MRI.getVRegDef(Op.getReg()) : nullptr; @@ -308,6 +356,22 @@ static void processInstrsWithTypeFolding(MachineFunction &MF, processInstr(MI, MIB, MRI, GR); } } + for (MachineBasicBlock &MBB : MF) { + for (MachineInstr &MI : MBB) { + // We need to rewrite dst types for ASSIGN_TYPE instrs to be able + // to perform tblgen'erated selection and we can't do that on Legalizer + // as it operates on gMIR only. + if (MI.getOpcode() != SPIRV::ASSIGN_TYPE) + continue; + Register SrcReg = MI.getOperand(1).getReg(); + if (!isTypeFoldingSupported(MRI.getVRegDef(SrcReg)->getOpcode())) + continue; + Register DstReg = MI.getOperand(0).getReg(); + if (MRI.getType(DstReg).isVector()) + MRI.setRegClass(DstReg, &SPIRV::IDRegClass); + MRI.setType(DstReg, LLT::scalar(32)); + } + } } static void processSwitches(MachineFunction &MF, SPIRVGlobalRegistry *GR, @@ -421,6 +485,7 @@ bool SPIRVPreLegalizer::runOnMachineFunction(MachineFunction &MF) { SPIRVGlobalRegistry *GR = ST.getSPIRVGlobalRegistry(); GR->setCurrentFunc(MF); MachineIRBuilder MIB(MF); + addConstantsToTrack(MF, GR); foldConstantsIntoIntrinsics(MF); insertBitcasts(MF, GR, MIB); generateAssignInstrs(MF, GR, MIB); diff --git a/llvm/lib/Target/SPIRV/SPIRVPrepareFunctions.cpp b/llvm/lib/Target/SPIRV/SPIRVPrepareFunctions.cpp new file mode 100644 index 000000000000..13c3c12c1b41 --- /dev/null +++ b/llvm/lib/Target/SPIRV/SPIRVPrepareFunctions.cpp @@ -0,0 +1,288 @@ +//===-- SPIRVPrepareFunctions.cpp - modify function signatures --*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This pass modifies function signatures containing aggregate arguments +// and/or return value. Also it substitutes some llvm intrinsic calls by +// function calls, generating these functions as the translator does. +// +// NOTE: this pass is a module-level one due to the necessity to modify +// GVs/functions. +// +//===----------------------------------------------------------------------===// + +#include "SPIRV.h" +#include "SPIRVTargetMachine.h" +#include "SPIRVUtils.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/Transforms/Utils/Cloning.h" +#include "llvm/Transforms/Utils/LowerMemIntrinsics.h" + +using namespace llvm; + +namespace llvm { +void initializeSPIRVPrepareFunctionsPass(PassRegistry &); +} + +namespace { + +class SPIRVPrepareFunctions : public ModulePass { + Function *processFunctionSignature(Function *F); + +public: + static char ID; + SPIRVPrepareFunctions() : ModulePass(ID) { + initializeSPIRVPrepareFunctionsPass(*PassRegistry::getPassRegistry()); + } + + bool runOnModule(Module &M) override; + + StringRef getPassName() const override { return "SPIRV prepare functions"; } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + ModulePass::getAnalysisUsage(AU); + } +}; + +} // namespace + +char SPIRVPrepareFunctions::ID = 0; + +INITIALIZE_PASS(SPIRVPrepareFunctions, "prepare-functions", + "SPIRV prepare functions", false, false) + +Function *SPIRVPrepareFunctions::processFunctionSignature(Function *F) { + IRBuilder<> B(F->getContext()); + + bool IsRetAggr = F->getReturnType()->isAggregateType(); + bool HasAggrArg = + std::any_of(F->arg_begin(), F->arg_end(), [](Argument &Arg) { + return Arg.getType()->isAggregateType(); + }); + bool DoClone = IsRetAggr || HasAggrArg; + if (!DoClone) + return F; + SmallVector<std::pair<int, Type *>, 4> ChangedTypes; + Type *RetType = IsRetAggr ? B.getInt32Ty() : F->getReturnType(); + if (IsRetAggr) + ChangedTypes.push_back(std::pair<int, Type *>(-1, F->getReturnType())); + SmallVector<Type *, 4> ArgTypes; + for (const auto &Arg : F->args()) { + if (Arg.getType()->isAggregateType()) { + ArgTypes.push_back(B.getInt32Ty()); + ChangedTypes.push_back( + std::pair<int, Type *>(Arg.getArgNo(), Arg.getType())); + } else + ArgTypes.push_back(Arg.getType()); + } + FunctionType *NewFTy = + FunctionType::get(RetType, ArgTypes, F->getFunctionType()->isVarArg()); + Function *NewF = + Function::Create(NewFTy, F->getLinkage(), F->getName(), *F->getParent()); + + ValueToValueMapTy VMap; + auto NewFArgIt = NewF->arg_begin(); + for (auto &Arg : F->args()) { + StringRef ArgName = Arg.getName(); + NewFArgIt->setName(ArgName); + VMap[&Arg] = &(*NewFArgIt++); + } + SmallVector<ReturnInst *, 8> Returns; + + CloneFunctionInto(NewF, F, VMap, CloneFunctionChangeType::LocalChangesOnly, + Returns); + NewF->takeName(F); + + NamedMDNode *FuncMD = + F->getParent()->getOrInsertNamedMetadata("spv.cloned_funcs"); + SmallVector<Metadata *, 2> MDArgs; + MDArgs.push_back(MDString::get(B.getContext(), NewF->getName())); + for (auto &ChangedTyP : ChangedTypes) + MDArgs.push_back(MDNode::get( + B.getContext(), + {ConstantAsMetadata::get(B.getInt32(ChangedTyP.first)), + ValueAsMetadata::get(Constant::getNullValue(ChangedTyP.second))})); + MDNode *ThisFuncMD = MDNode::get(B.getContext(), MDArgs); + FuncMD->addOperand(ThisFuncMD); + + for (auto *U : make_early_inc_range(F->users())) { + if (auto *CI = dyn_cast<CallInst>(U)) + CI->mutateFunctionType(NewF->getFunctionType()); + U->replaceUsesOfWith(F, NewF); + } + return NewF; +} + +std::string lowerLLVMIntrinsicName(IntrinsicInst *II) { + Function *IntrinsicFunc = II->getCalledFunction(); + assert(IntrinsicFunc && "Missing function"); + std::string FuncName = IntrinsicFunc->getName().str(); + std::replace(FuncName.begin(), FuncName.end(), '.', '_'); + FuncName = "spirv." + FuncName; + return FuncName; +} + +static Function *getOrCreateFunction(Module *M, Type *RetTy, + ArrayRef<Type *> ArgTypes, + StringRef Name) { + FunctionType *FT = FunctionType::get(RetTy, ArgTypes, false); + Function *F = M->getFunction(Name); + if (F && F->getFunctionType() == FT) + return F; + Function *NewF = Function::Create(FT, GlobalValue::ExternalLinkage, Name, M); + if (F) + NewF->setDSOLocal(F->isDSOLocal()); + NewF->setCallingConv(CallingConv::SPIR_FUNC); + return NewF; +} + +static void lowerFunnelShifts(Module *M, IntrinsicInst *FSHIntrinsic) { + // Get a separate function - otherwise, we'd have to rework the CFG of the + // current one. Then simply replace the intrinsic uses with a call to the new + // function. + // Generate LLVM IR for i* @spirv.llvm_fsh?_i* (i* %a, i* %b, i* %c) + FunctionType *FSHFuncTy = FSHIntrinsic->getFunctionType(); + Type *FSHRetTy = FSHFuncTy->getReturnType(); + const std::string FuncName = lowerLLVMIntrinsicName(FSHIntrinsic); + Function *FSHFunc = + getOrCreateFunction(M, FSHRetTy, FSHFuncTy->params(), FuncName); + + if (!FSHFunc->empty()) { + FSHIntrinsic->setCalledFunction(FSHFunc); + return; + } + BasicBlock *RotateBB = BasicBlock::Create(M->getContext(), "rotate", FSHFunc); + IRBuilder<> IRB(RotateBB); + Type *Ty = FSHFunc->getReturnType(); + // Build the actual funnel shift rotate logic. + // In the comments, "int" is used interchangeably with "vector of int + // elements". + FixedVectorType *VectorTy = dyn_cast<FixedVectorType>(Ty); + Type *IntTy = VectorTy ? VectorTy->getElementType() : Ty; + unsigned BitWidth = IntTy->getIntegerBitWidth(); + ConstantInt *BitWidthConstant = IRB.getInt({BitWidth, BitWidth}); + Value *BitWidthForInsts = + VectorTy + ? IRB.CreateVectorSplat(VectorTy->getNumElements(), BitWidthConstant) + : BitWidthConstant; + Value *RotateModVal = + IRB.CreateURem(/*Rotate*/ FSHFunc->getArg(2), BitWidthForInsts); + Value *FirstShift = nullptr, *SecShift = nullptr; + if (FSHIntrinsic->getIntrinsicID() == Intrinsic::fshr) { + // Shift the less significant number right, the "rotate" number of bits + // will be 0-filled on the left as a result of this regular shift. + FirstShift = IRB.CreateLShr(FSHFunc->getArg(1), RotateModVal); + } else { + // Shift the more significant number left, the "rotate" number of bits + // will be 0-filled on the right as a result of this regular shift. + FirstShift = IRB.CreateShl(FSHFunc->getArg(0), RotateModVal); + } + // We want the "rotate" number of the more significant int's LSBs (MSBs) to + // occupy the leftmost (rightmost) "0 space" left by the previous operation. + // Therefore, subtract the "rotate" number from the integer bitsize... + Value *SubRotateVal = IRB.CreateSub(BitWidthForInsts, RotateModVal); + if (FSHIntrinsic->getIntrinsicID() == Intrinsic::fshr) { + // ...and left-shift the more significant int by this number, zero-filling + // the LSBs. + SecShift = IRB.CreateShl(FSHFunc->getArg(0), SubRotateVal); + } else { + // ...and right-shift the less significant int by this number, zero-filling + // the MSBs. + SecShift = IRB.CreateLShr(FSHFunc->getArg(1), SubRotateVal); + } + // A simple binary addition of the shifted ints yields the final result. + IRB.CreateRet(IRB.CreateOr(FirstShift, SecShift)); + + FSHIntrinsic->setCalledFunction(FSHFunc); +} + +static void buildUMulWithOverflowFunc(Module *M, Function *UMulFunc) { + // The function body is already created. + if (!UMulFunc->empty()) + return; + + BasicBlock *EntryBB = BasicBlock::Create(M->getContext(), "entry", UMulFunc); + IRBuilder<> IRB(EntryBB); + // Build the actual unsigned multiplication logic with the overflow + // indication. Do unsigned multiplication Mul = A * B. Then check + // if unsigned division Div = Mul / A is not equal to B. If so, + // then overflow has happened. + Value *Mul = IRB.CreateNUWMul(UMulFunc->getArg(0), UMulFunc->getArg(1)); + Value *Div = IRB.CreateUDiv(Mul, UMulFunc->getArg(0)); + Value *Overflow = IRB.CreateICmpNE(UMulFunc->getArg(0), Div); + + // umul.with.overflow intrinsic return a structure, where the first element + // is the multiplication result, and the second is an overflow bit. + Type *StructTy = UMulFunc->getReturnType(); + Value *Agg = IRB.CreateInsertValue(UndefValue::get(StructTy), Mul, {0}); + Value *Res = IRB.CreateInsertValue(Agg, Overflow, {1}); + IRB.CreateRet(Res); +} + +static void lowerUMulWithOverflow(Module *M, IntrinsicInst *UMulIntrinsic) { + // Get a separate function - otherwise, we'd have to rework the CFG of the + // current one. Then simply replace the intrinsic uses with a call to the new + // function. + FunctionType *UMulFuncTy = UMulIntrinsic->getFunctionType(); + Type *FSHLRetTy = UMulFuncTy->getReturnType(); + const std::string FuncName = lowerLLVMIntrinsicName(UMulIntrinsic); + Function *UMulFunc = + getOrCreateFunction(M, FSHLRetTy, UMulFuncTy->params(), FuncName); + buildUMulWithOverflowFunc(M, UMulFunc); + UMulIntrinsic->setCalledFunction(UMulFunc); +} + +static void substituteIntrinsicCalls(Module *M, Function *F) { + for (BasicBlock &BB : *F) { + for (Instruction &I : BB) { + auto Call = dyn_cast<CallInst>(&I); + if (!Call) + continue; + Call->setTailCall(false); + Function *CF = Call->getCalledFunction(); + if (!CF || !CF->isIntrinsic()) + continue; + auto *II = cast<IntrinsicInst>(Call); + if (II->getIntrinsicID() == Intrinsic::fshl || + II->getIntrinsicID() == Intrinsic::fshr) + lowerFunnelShifts(M, II); + else if (II->getIntrinsicID() == Intrinsic::umul_with_overflow) + lowerUMulWithOverflow(M, II); + } + } +} + +bool SPIRVPrepareFunctions::runOnModule(Module &M) { + for (Function &F : M) + substituteIntrinsicCalls(&M, &F); + + std::vector<Function *> FuncsWorklist; + bool Changed = false; + for (auto &F : M) + FuncsWorklist.push_back(&F); + + for (auto *Func : FuncsWorklist) { + Function *F = processFunctionSignature(Func); + + bool CreatedNewF = F != Func; + + if (Func->isDeclaration()) { + Changed |= CreatedNewF; + continue; + } + + if (CreatedNewF) + Func->eraseFromParent(); + } + + return Changed; +} + +ModulePass *llvm::createSPIRVPrepareFunctionsPass() { + return new SPIRVPrepareFunctions(); +} diff --git a/llvm/lib/Target/SPIRV/SPIRVSubtarget.cpp b/llvm/lib/Target/SPIRV/SPIRVSubtarget.cpp index cdf3a160f373..00549c7b5768 100644 --- a/llvm/lib/Target/SPIRV/SPIRVSubtarget.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVSubtarget.cpp @@ -46,8 +46,7 @@ SPIRVSubtarget::SPIRVSubtarget(const Triple &TT, const std::string &CPU, PointerSize(computePointerSize(TT)), SPIRVVersion(0), InstrInfo(), FrameLowering(initSubtargetDependencies(CPU, FS)), TLInfo(TM, *this) { GR = std::make_unique<SPIRVGlobalRegistry>(PointerSize); - CallLoweringInfo = - std::make_unique<SPIRVCallLowering>(TLInfo, *this, GR.get()); + CallLoweringInfo = std::make_unique<SPIRVCallLowering>(TLInfo, GR.get()); Legalizer = std::make_unique<SPIRVLegalizerInfo>(*this); RegBankInfo = std::make_unique<SPIRVRegisterBankInfo>(); InstSelector.reset( diff --git a/llvm/lib/Target/SPIRV/SPIRVTargetMachine.cpp b/llvm/lib/Target/SPIRV/SPIRVTargetMachine.cpp index f7c88a5c6d4a..7f5f14dc3ce8 100644 --- a/llvm/lib/Target/SPIRV/SPIRVTargetMachine.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVTargetMachine.cpp @@ -140,7 +140,10 @@ TargetPassConfig *SPIRVTargetMachine::createPassConfig(PassManagerBase &PM) { return new SPIRVPassConfig(*this, PM); } -void SPIRVPassConfig::addIRPasses() { TargetPassConfig::addIRPasses(); } +void SPIRVPassConfig::addIRPasses() { + TargetPassConfig::addIRPasses(); + addPass(createSPIRVPrepareFunctionsPass()); +} void SPIRVPassConfig::addISelPrepare() { addPass(createSPIRVEmitIntrinsicsPass(&getTM<SPIRVTargetMachine>())); diff --git a/llvm/lib/Target/SPIRV/SPIRVUtils.cpp b/llvm/lib/Target/SPIRV/SPIRVUtils.cpp index b92dc12735f8..15671ef3e512 100644 --- a/llvm/lib/Target/SPIRV/SPIRVUtils.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVUtils.cpp @@ -45,6 +45,14 @@ static size_t getPaddedLen(const StringRef &Str) { return (Len % 4 == 0) ? Len : Len + (4 - (Len % 4)); } +void addStringImm(const StringRef &Str, MCInst &Inst) { + const size_t PaddedLen = getPaddedLen(Str); + for (unsigned i = 0; i < PaddedLen; i += 4) { + // Add an operand for the 32-bits of chars or padding. + Inst.addOperand(MCOperand::createImm(convertCharsToWord(Str, i))); + } +} + void addStringImm(const StringRef &Str, MachineInstrBuilder &MIB) { const size_t PaddedLen = getPaddedLen(Str); for (unsigned i = 0; i < PaddedLen; i += 4) { @@ -182,6 +190,24 @@ SPIRV::MemorySemantics getMemSemanticsForStorageClass(SPIRV::StorageClass SC) { } } +SPIRV::MemorySemantics getMemSemantics(AtomicOrdering Ord) { + switch (Ord) { + case AtomicOrdering::Acquire: + return SPIRV::MemorySemantics::Acquire; + case AtomicOrdering::Release: + return SPIRV::MemorySemantics::Release; + case AtomicOrdering::AcquireRelease: + return SPIRV::MemorySemantics::AcquireRelease; + case AtomicOrdering::SequentiallyConsistent: + return SPIRV::MemorySemantics::SequentiallyConsistent; + case AtomicOrdering::Unordered: + case AtomicOrdering::Monotonic: + case AtomicOrdering::NotAtomic: + default: + return SPIRV::MemorySemantics::None; + } +} + MachineInstr *getDefInstrMaybeConstant(Register &ConstReg, const MachineRegisterInfo *MRI) { MachineInstr *ConstInstr = MRI->getVRegDef(ConstReg); @@ -202,6 +228,11 @@ uint64_t getIConstVal(Register ConstReg, const MachineRegisterInfo *MRI) { return MI->getOperand(1).getCImm()->getValue().getZExtValue(); } +bool isSpvIntrinsic(MachineInstr &MI, Intrinsic::ID IntrinsicID) { + return MI.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS && + MI.getIntrinsicID() == IntrinsicID; +} + Type *getMDOperandAsType(const MDNode *N, unsigned I) { return cast<ValueAsMetadata>(N->getOperand(I))->getType(); } diff --git a/llvm/lib/Target/SPIRV/SPIRVUtils.h b/llvm/lib/Target/SPIRV/SPIRVUtils.h index ffa82c9c1fe4..35e24b076570 100644 --- a/llvm/lib/Target/SPIRV/SPIRVUtils.h +++ b/llvm/lib/Target/SPIRV/SPIRVUtils.h @@ -32,6 +32,7 @@ class SPIRVInstrInfo; // Add the given string as a series of integer operand, inserting null // terminators and padding to make sure the operands all have 32-bit // little-endian words. +void addStringImm(const llvm::StringRef &Str, llvm::MCInst &Inst); void addStringImm(const llvm::StringRef &Str, llvm::MachineInstrBuilder &MIB); void addStringImm(const llvm::StringRef &Str, llvm::IRBuilder<> &B, std::vector<llvm::Value *> &Args); @@ -67,6 +68,8 @@ llvm::SPIRV::StorageClass addressSpaceToStorageClass(unsigned AddrSpace); llvm::SPIRV::MemorySemantics getMemSemanticsForStorageClass(llvm::SPIRV::StorageClass SC); +llvm::SPIRV::MemorySemantics getMemSemantics(llvm::AtomicOrdering Ord); + // Find def instruction for the given ConstReg, walking through // spv_track_constant and ASSIGN_TYPE instructions. Updates ConstReg by def // of OpConstant instruction. @@ -78,6 +81,9 @@ getDefInstrMaybeConstant(llvm::Register &ConstReg, uint64_t getIConstVal(llvm::Register ConstReg, const llvm::MachineRegisterInfo *MRI); +// Check if MI is a SPIR-V specific intrinsic call. +bool isSpvIntrinsic(llvm::MachineInstr &MI, llvm::Intrinsic::ID IntrinsicID); + // Get type of i-th operand of the metadata node. llvm::Type *getMDOperandAsType(const llvm::MDNode *N, unsigned I); #endif // LLVM_LIB_TARGET_SPIRV_SPIRVUTILS_H diff --git a/llvm/lib/Target/Sparc/TargetInfo/SparcTargetInfo.cpp b/llvm/lib/Target/Sparc/TargetInfo/SparcTargetInfo.cpp index 1138788ac7fa..1f8837eb0194 100644 --- a/llvm/lib/Target/Sparc/TargetInfo/SparcTargetInfo.cpp +++ b/llvm/lib/Target/Sparc/TargetInfo/SparcTargetInfo.cpp @@ -24,10 +24,10 @@ Target &llvm::getTheSparcelTarget() { } extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeSparcTargetInfo() { - RegisterTarget<Triple::sparc, /*HasJIT=*/true> X(getTheSparcTarget(), "sparc", - "Sparc", "Sparc"); - RegisterTarget<Triple::sparcv9, /*HasJIT=*/true> Y( + RegisterTarget<Triple::sparc, /*HasJIT=*/false> X(getTheSparcTarget(), + "sparc", "Sparc", "Sparc"); + RegisterTarget<Triple::sparcv9, /*HasJIT=*/false> Y( getTheSparcV9Target(), "sparcv9", "Sparc V9", "Sparc"); - RegisterTarget<Triple::sparcel, /*HasJIT=*/true> Z( + RegisterTarget<Triple::sparcel, /*HasJIT=*/false> Z( getTheSparcelTarget(), "sparcel", "Sparc LE", "Sparc"); } diff --git a/llvm/lib/Target/SystemZ/SystemZCallingConv.cpp b/llvm/lib/Target/SystemZ/SystemZCallingConv.cpp index 9c73757d7f5c..86eb8365d527 100644 --- a/llvm/lib/Target/SystemZ/SystemZCallingConv.cpp +++ b/llvm/lib/Target/SystemZ/SystemZCallingConv.cpp @@ -28,7 +28,3 @@ const MCPhysReg SystemZ::XPLINK64ArgGPRs[SystemZ::XPLINK64NumArgGPRs] = { const MCPhysReg SystemZ::XPLINK64ArgFPRs[SystemZ::XPLINK64NumArgFPRs] = { SystemZ::F0D, SystemZ::F2D, SystemZ::F4D, SystemZ::F6D }; - -const MCPhysReg SystemZ::XPLINK64ArgVRs[SystemZ::XPLINK64NumArgVRs] = { - SystemZ::V24, SystemZ::V25, SystemZ::V26, SystemZ::V27, - SystemZ::V28, SystemZ::V29, SystemZ::V30, SystemZ::V31}; diff --git a/llvm/lib/Target/SystemZ/SystemZCallingConv.h b/llvm/lib/Target/SystemZ/SystemZCallingConv.h index f82c61c0f344..387411942aba 100644 --- a/llvm/lib/Target/SystemZ/SystemZCallingConv.h +++ b/llvm/lib/Target/SystemZ/SystemZCallingConv.h @@ -27,9 +27,6 @@ namespace SystemZ { const unsigned XPLINK64NumArgFPRs = 4; extern const MCPhysReg XPLINK64ArgFPRs[XPLINK64NumArgFPRs]; - - const unsigned XPLINK64NumArgVRs = 8; - extern const MCPhysReg XPLINK64ArgVRs[XPLINK64NumArgVRs]; } // end namespace SystemZ class SystemZCCState : public CCState { @@ -205,41 +202,6 @@ inline bool CC_XPLINK64_Allocate128BitVararg(unsigned &ValNo, MVT &ValVT, return false; } -inline bool CC_XPLINK64_Shadow_Stack(unsigned &ValNo, MVT &ValVT, MVT &LocVT, - CCValAssign::LocInfo &LocInfo, - ISD::ArgFlagsTy &ArgFlags, - CCState &State) { - ArrayRef<MCPhysReg> RegList; - - switch (LocVT.SimpleTy) { - case MVT::i64: - RegList = SystemZ::XPLINK64ArgGPRs; - break; - case MVT::v16i8: - case MVT::v8i16: - case MVT::v4i32: - case MVT::v2i64: - case MVT::v4f32: - case MVT::v2f64: - RegList = SystemZ::XPLINK64ArgVRs; - break; - case MVT::f32: - case MVT::f64: - case MVT::f128: - RegList = SystemZ::XPLINK64ArgFPRs; - break; - default: - return false; - } - - unsigned UnallocatedRegisterIndex = State.getFirstUnallocated(RegList); - // Every time we can allocate a register, allocate on the stack. - if (UnallocatedRegisterIndex < RegList.size()) - State.AllocateStack(LocVT.getSizeInBits() / 8, Align(8)); - - return false; -} - inline bool RetCC_SystemZ_Error(unsigned &, MVT &, MVT &, CCValAssign::LocInfo &, ISD::ArgFlagsTy &, CCState &) { diff --git a/llvm/lib/Target/SystemZ/SystemZCallingConv.td b/llvm/lib/Target/SystemZ/SystemZCallingConv.td index fdd82a01f211..29b4a26736b2 100644 --- a/llvm/lib/Target/SystemZ/SystemZCallingConv.td +++ b/llvm/lib/Target/SystemZ/SystemZCallingConv.td @@ -221,9 +221,10 @@ def CC_SystemZ_XPLINK64 : CallingConv<[ // XPLINK64 ABI compliant code widens integral types smaller than i64 // to i64 before placing the parameters either on the stack or in registers. CCIfType<[i32], CCIfExtend<CCPromoteToType<i64>>>, - // Promote f32 to f64 and bitcast to i64, if it needs to be passed in GPRS. - CCIfType<[f32], CCIfNotFixed<CCPromoteToType<f64>>>, - CCIfType<[f64], CCIfNotFixed<CCBitConvertToType<i64>>>, + // Promote f32 to f64 and bitcast to i64, if it needs to be passed in GPRs. + // Although we assign the f32 vararg to be bitcast, it will first be promoted + // to an f64 within convertValVTToLocVT(). + CCIfType<[f32, f64], CCIfNotFixed<CCBitConvertToType<i64>>>, // long double, can only be passed in GPR2 and GPR3, if available, // hence R2Q CCIfType<[f128], CCIfNotFixed<CCCustom<"CC_XPLINK64_Allocate128BitVararg">>>, @@ -246,34 +247,29 @@ def CC_SystemZ_XPLINK64 : CallingConv<[ // The first 3 integer arguments are passed in registers R1D-R3D. // The rest will be passed in the user area. The address offset of the user // area can be found in register R4D. - CCIfType<[i64], CCCustom<"CC_XPLINK64_Shadow_Stack">>, - CCIfType<[i64], CCAssignToReg<[R1D, R2D, R3D]>>, + CCIfType<[i64], CCAssignToRegAndStack<[R1D, R2D, R3D], 8, 8>>, - // The first 8 named vector arguments are passed in V24-V31. Sub-128 vectors + // The first 8 named vector arguments are passed in V24-V31. Sub-128 vectors // are passed in the same way, but they're widened to one of these types // during type legalization. CCIfSubtarget<"hasVector()", CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], - CCIfFixed<CCCustom<"CC_XPLINK64_Shadow_Reg">>>>, - CCIfSubtarget<"hasVector()", - CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], - CCIfFixed<CCCustom<"CC_XPLINK64_Shadow_Stack">>>>, + CCIfFixed<CCCustom<"CC_XPLINK64_Shadow_Reg">>>>, CCIfSubtarget<"hasVector()", CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], - CCIfFixed<CCAssignToReg<[V24, V25, V26, V27, - V28, V29, V30, V31]>>>>, + CCIfFixed<CCAssignToRegAndStack<[V24, V25, V26, V27, + V28, V29, V30, V31], 16, 8>>>>, - // The first 4 named float and double arguments are passed in registers FPR0-FPR6. - // The rest will be passed in the user area. + // The first 4 named float and double arguments are passed in registers + // FPR0-FPR6. The rest will be passed in the user area. CCIfType<[f32, f64], CCIfFixed<CCCustom<"CC_XPLINK64_Shadow_Reg">>>, - CCIfType<[f32, f64], CCIfFixed<CCCustom<"CC_XPLINK64_Shadow_Stack">>>, - CCIfType<[f32], CCIfFixed<CCAssignToReg<[F0S, F2S, F4S, F6S]>>>, - CCIfType<[f64], CCIfFixed<CCAssignToReg<[F0D, F2D, F4D, F6D]>>>, + CCIfType<[f32], CCIfFixed<CCAssignToRegAndStack<[F0S, F2S, F4S, F6S], 4, 8>>>, + CCIfType<[f64], CCIfFixed<CCAssignToRegAndStack<[F0D, F2D, F4D, F6D], 8, 8>>>, + // The first 2 long double arguments are passed in register FPR0/FPR2 // and FPR4/FPR6. The rest will be passed in the user area. CCIfType<[f128], CCIfFixed<CCCustom<"CC_XPLINK64_Shadow_Reg">>>, - CCIfType<[f128], CCIfFixed<CCCustom<"CC_XPLINK64_Shadow_Stack">>>, - CCIfType<[f128], CCIfFixed<CCAssignToReg<[F0Q, F4Q]>>>, + CCIfType<[f128], CCIfFixed<CCAssignToRegAndStack<[F0Q, F4Q], 16, 8>>>, // Other arguments are passed in 8-byte-aligned 8-byte stack slots. CCIfType<[i32, i64, f32, f64], CCAssignToStack<8, 8>>, diff --git a/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp b/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp index 975eb8862e82..d943507b4112 100644 --- a/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp +++ b/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp @@ -911,6 +911,54 @@ SystemZXPLINKFrameLowering::SystemZXPLINKFrameLowering() XPLINKSpillOffsetTable[I].Offset; } +// Checks if the function is a potential candidate for being a XPLeaf routine. +static bool isXPLeafCandidate(const MachineFunction &MF) { + const MachineFrameInfo &MFFrame = MF.getFrameInfo(); + const MachineRegisterInfo &MRI = MF.getRegInfo(); + const SystemZSubtarget &Subtarget = MF.getSubtarget<SystemZSubtarget>(); + auto *Regs = + static_cast<SystemZXPLINK64Registers *>(Subtarget.getSpecialRegisters()); + + // If function calls other functions including alloca, then it is not a XPLeaf + // routine. + if (MFFrame.hasCalls()) + return false; + + // If the function has var Sized Objects, then it is not a XPLeaf routine. + if (MFFrame.hasVarSizedObjects()) + return false; + + // If the function adjusts the stack, then it is not a XPLeaf routine. + if (MFFrame.adjustsStack()) + return false; + + // If function modifies the stack pointer register, then it is not a XPLeaf + // routine. + if (MRI.isPhysRegModified(Regs->getStackPointerRegister())) + return false; + + // If function modifies the ADA register, then it is not a XPLeaf routine. + if (MRI.isPhysRegModified(Regs->getAddressOfCalleeRegister())) + return false; + + // If function modifies the return address register, then it is not a XPLeaf + // routine. + if (MRI.isPhysRegModified(Regs->getReturnFunctionAddressRegister())) + return false; + + // If the backchain pointer should be stored, then it is not a XPLeaf routine. + if (MF.getFunction().hasFnAttribute("backchain")) + return false; + + // If function acquires its own stack frame, then it is not a XPLeaf routine. + // At the time this function is called, only slots for local variables are + // allocated, so this is a very rough estimate. + if (MFFrame.estimateStackSize(MF) > 0) + return false; + + return true; +} + bool SystemZXPLINKFrameLowering::assignCalleeSavedSpillSlots( MachineFunction &MF, const TargetRegisterInfo *TRI, std::vector<CalleeSavedInfo> &CSI) const { @@ -920,6 +968,18 @@ bool SystemZXPLINKFrameLowering::assignCalleeSavedSpillSlots( auto &Regs = Subtarget.getSpecialRegisters<SystemZXPLINK64Registers>(); auto &GRRegClass = SystemZ::GR64BitRegClass; + // At this point, the result of isXPLeafCandidate() is not accurate because + // the size of the save area has not yet been determined. If + // isXPLeafCandidate() indicates a potential leaf function, and there are no + // callee-save registers, then it is indeed a leaf function, and we can early + // exit. + // TODO: It is possible for leaf functions to use callee-saved registers. + // It can use the 0-2k range between R4 and the caller's stack frame without + // acquiring its own stack frame. + bool IsLeaf = CSI.empty() && isXPLeafCandidate(MF); + if (IsLeaf) + return true; + // For non-leaf functions: // - the address of callee (entry point) register R6 must be saved CSI.push_back(CalleeSavedInfo(Regs.getAddressOfCalleeRegister())); @@ -1137,16 +1197,16 @@ void SystemZXPLINKFrameLowering::emitPrologue(MachineFunction &MF, auto &Regs = Subtarget.getSpecialRegisters<SystemZXPLINK64Registers>(); MachineFrameInfo &MFFrame = MF.getFrameInfo(); MachineInstr *StoreInstr = nullptr; + + determineFrameLayout(MF); + bool HasFP = hasFP(MF); // Debug location must be unknown since the first debug location is used // to determine the end of the prologue. DebugLoc DL; uint64_t Offset = 0; - // TODO: Support leaf functions; only add size of save+reserved area when - // function is non-leaf. - MFFrame.setStackSize(MFFrame.getStackSize() + Regs.getCallFrameSize()); - uint64_t StackSize = MFFrame.getStackSize(); + const uint64_t StackSize = MFFrame.getStackSize(); if (ZFI->getSpillGPRRegs().LowGPR) { // Skip over the GPR saves. @@ -1213,8 +1273,8 @@ void SystemZXPLINKFrameLowering::emitPrologue(MachineFunction &MF, // Mark the FramePtr as live at the beginning of every block except // the entry block. (We'll have marked R8 as live on entry when // saving the GPRs.) - for (auto I = std::next(MF.begin()), E = MF.end(); I != E; ++I) - I->addLiveIn(Regs.getFramePointerRegister()); + for (MachineBasicBlock &B : llvm::drop_begin(MF)) + B.addLiveIn(Regs.getFramePointerRegister()); } } @@ -1321,3 +1381,32 @@ void SystemZXPLINKFrameLowering::processFunctionBeforeFrameFinalized( // Setup stack frame offset MFFrame.setOffsetAdjustment(Regs.getStackPointerBias()); } + +// Determines the size of the frame, and creates the deferred spill objects. +void SystemZXPLINKFrameLowering::determineFrameLayout( + MachineFunction &MF) const { + MachineFrameInfo &MFFrame = MF.getFrameInfo(); + const SystemZSubtarget &Subtarget = MF.getSubtarget<SystemZSubtarget>(); + auto *Regs = + static_cast<SystemZXPLINK64Registers *>(Subtarget.getSpecialRegisters()); + + uint64_t StackSize = MFFrame.getStackSize(); + if (StackSize == 0) + return; + + // Add the size of the register save area and the reserved area to the size. + StackSize += Regs->getCallFrameSize(); + MFFrame.setStackSize(StackSize); + + // We now know the stack size. Create the fixed spill stack objects for the + // register save area now. This has no impact on the stack frame layout, as + // this is already computed. However, it makes sure that all callee saved + // registers have a valid frame index assigned. + const unsigned RegSize = MF.getDataLayout().getPointerSize(); + for (auto &CS : MFFrame.getCalleeSavedInfo()) { + int Offset = RegSpillOffsets[CS.getReg()]; + if (Offset >= 0) + CS.setFrameIdx( + MFFrame.CreateFixedSpillStackObject(RegSize, Offset - StackSize)); + } +} diff --git a/llvm/lib/Target/SystemZ/SystemZFrameLowering.h b/llvm/lib/Target/SystemZ/SystemZFrameLowering.h index bec83a9457e0..95f30e3c0d99 100644 --- a/llvm/lib/Target/SystemZ/SystemZFrameLowering.h +++ b/llvm/lib/Target/SystemZ/SystemZFrameLowering.h @@ -134,6 +134,8 @@ public: void processFunctionBeforeFrameFinalized(MachineFunction &MF, RegScavenger *RS) const override; + + void determineFrameLayout(MachineFunction &MF) const; }; } // end namespace llvm diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp index 42c1c77f14e4..ac4531262187 100644 --- a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp +++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp @@ -1404,8 +1404,12 @@ static SDValue convertValVTToLocVT(SelectionDAG &DAG, const SDLoc &DL, return DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), Value); case CCValAssign::BCvt: { assert(VA.getLocVT() == MVT::i64 || VA.getLocVT() == MVT::i128); - assert(VA.getValVT().isVector() || VA.getValVT() == MVT::f64 || - VA.getValVT() == MVT::f128); + assert(VA.getValVT().isVector() || VA.getValVT() == MVT::f32 || + VA.getValVT() == MVT::f64 || VA.getValVT() == MVT::f128); + // For an f32 vararg we need to first promote it to an f64 and then + // bitcast it to an i64. + if (VA.getValVT() == MVT::f32 && VA.getLocVT() == MVT::i64) + Value = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f64, Value); MVT BitCastToType = VA.getValVT().isVector() && VA.getLocVT() == MVT::i64 ? MVT::v2i64 : VA.getLocVT(); diff --git a/llvm/lib/Target/VE/VEInstrInfo.cpp b/llvm/lib/Target/VE/VEInstrInfo.cpp index 94ebb59c4c77..46bb85606a62 100644 --- a/llvm/lib/Target/VE/VEInstrInfo.cpp +++ b/llvm/lib/Target/VE/VEInstrInfo.cpp @@ -418,7 +418,9 @@ unsigned VEInstrInfo::isLoadFromStackSlot(const MachineInstr &MI, if (MI.getOpcode() == VE::LDrii || // I64 MI.getOpcode() == VE::LDLSXrii || // I32 MI.getOpcode() == VE::LDUrii || // F32 - MI.getOpcode() == VE::LDQrii // F128 (pseudo) + MI.getOpcode() == VE::LDQrii || // F128 (pseudo) + MI.getOpcode() == VE::LDVMrii || // VM (pseudo) + MI.getOpcode() == VE::LDVM512rii // VM512 (pseudo) ) { if (MI.getOperand(1).isFI() && MI.getOperand(2).isImm() && MI.getOperand(2).getImm() == 0 && MI.getOperand(3).isImm() && @@ -437,10 +439,12 @@ unsigned VEInstrInfo::isLoadFromStackSlot(const MachineInstr &MI, /// any side effects other than storing to the stack slot. unsigned VEInstrInfo::isStoreToStackSlot(const MachineInstr &MI, int &FrameIndex) const { - if (MI.getOpcode() == VE::STrii || // I64 - MI.getOpcode() == VE::STLrii || // I32 - MI.getOpcode() == VE::STUrii || // F32 - MI.getOpcode() == VE::STQrii // F128 (pseudo) + if (MI.getOpcode() == VE::STrii || // I64 + MI.getOpcode() == VE::STLrii || // I32 + MI.getOpcode() == VE::STUrii || // F32 + MI.getOpcode() == VE::STQrii || // F128 (pseudo) + MI.getOpcode() == VE::STVMrii || // VM (pseudo) + MI.getOpcode() == VE::STVM512rii // VM512 (pseudo) ) { if (MI.getOperand(0).isFI() && MI.getOperand(1).isImm() && MI.getOperand(1).getImm() == 0 && MI.getOperand(2).isImm() && @@ -496,6 +500,20 @@ void VEInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, .addImm(0) .addReg(SrcReg, getKillRegState(isKill)) .addMemOperand(MMO); + } else if (RC == &VE::VMRegClass) { + BuildMI(MBB, I, DL, get(VE::STVMrii)) + .addFrameIndex(FI) + .addImm(0) + .addImm(0) + .addReg(SrcReg, getKillRegState(isKill)) + .addMemOperand(MMO); + } else if (VE::VM512RegClass.hasSubClassEq(RC)) { + BuildMI(MBB, I, DL, get(VE::STVM512rii)) + .addFrameIndex(FI) + .addImm(0) + .addImm(0) + .addReg(SrcReg, getKillRegState(isKill)) + .addMemOperand(MMO); } else report_fatal_error("Can't store this register to stack slot"); } @@ -539,6 +557,18 @@ void VEInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, .addImm(0) .addImm(0) .addMemOperand(MMO); + } else if (RC == &VE::VMRegClass) { + BuildMI(MBB, I, DL, get(VE::LDVMrii), DestReg) + .addFrameIndex(FI) + .addImm(0) + .addImm(0) + .addMemOperand(MMO); + } else if (VE::VM512RegClass.hasSubClassEq(RC)) { + BuildMI(MBB, I, DL, get(VE::LDVM512rii), DestReg) + .addFrameIndex(FI) + .addImm(0) + .addImm(0) + .addMemOperand(MMO); } else report_fatal_error("Can't load this register from stack slot"); } diff --git a/llvm/lib/Target/VE/VEInstrPatternsVec.td b/llvm/lib/Target/VE/VEInstrPatternsVec.td index 71199717a3a2..0b2f5039e3f3 100644 --- a/llvm/lib/Target/VE/VEInstrPatternsVec.td +++ b/llvm/lib/Target/VE/VEInstrPatternsVec.td @@ -25,6 +25,20 @@ def: Pat<(i64 (repl_i32 i32:$val)), (zero_f32 (i2l $val)), (SLLri (i2l $val), 32))>; +///// Mask Load & Store ///// + +// Store for v256i1, v512i1 are implemented in 2 ways. These STVM/STVM512 +// pseudo instruction is used for frameindex related load/store instructions. +// Custom Lowering is used for other load/store instructions. + +def : Pat<(v256i1 (load ADDRrii:$addr)), + (LDVMrii ADDRrii:$addr)>; +def : Pat<(v512i1 (load ADDRrii:$addr)), + (LDVM512rii ADDRrii:$addr)>; +def : Pat<(store v256i1:$vx, ADDRrii:$addr), + (STVMrii ADDRrii:$addr, $vx)>; +def : Pat<(store v512i1:$vx, ADDRrii:$addr), + (STVM512rii ADDRrii:$addr, $vx)>; multiclass vbrd_elem32<ValueType v32, ValueType s32, SDPatternOperator ImmOp, SDNodeXForm ImmCast, OutPatFrag SuperRegCast> { diff --git a/llvm/lib/Target/VE/VEInstrVec.td b/llvm/lib/Target/VE/VEInstrVec.td index 4a8476f7288a..327ad9ceacc5 100644 --- a/llvm/lib/Target/VE/VEInstrVec.td +++ b/llvm/lib/Target/VE/VEInstrVec.td @@ -2,6 +2,33 @@ // Vector Instructions //===----------------------------------------------------------------------===// +// Pseudo instructions for VM/VM512 spill/restore +// +// These pseudo instructions are used for only spill/restore since +// InlineSpiller assumes storeRegToStackSlot/loadRegFromStackSlot +// functions emit only single instruction. Those functions emit a +// single store/load instruction or one of these pseudo store/load +// instructions. +// +// Specifies hasSideEffects = 0 to disable UnmodeledSideEffects. + +let mayLoad = 1, hasSideEffects = 0 in { +def LDVMrii : Pseudo< + (outs VM:$vmx), (ins MEMrii:$addr), + "# pseudo ldvm $vmx, $addr", []>; +def LDVM512rii : Pseudo< + (outs VM512:$vmx), (ins MEMrii:$addr), + "# pseudo ldvm512 $vmx, $addr", []>; +} +let mayStore = 1, hasSideEffects = 0 in { +def STVMrii : Pseudo< + (outs), (ins MEMrii:$addr, VM:$vmx), + "# pseudo stvm $addr, $vmx", []>; +def STVM512rii : Pseudo< + (outs), (ins MEMrii:$addr, VM512:$vmx), + "# pseudo stvm512 $addr, $vmx", []>; +} + //===----------------------------------------------------------------------===// // Pseudo instructions for VM512 modifications //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/VE/VERegisterInfo.cpp b/llvm/lib/Target/VE/VERegisterInfo.cpp index f334af128162..397ea09c9a02 100644 --- a/llvm/lib/Target/VE/VERegisterInfo.cpp +++ b/llvm/lib/Target/VE/VERegisterInfo.cpp @@ -180,6 +180,16 @@ class EliminateFrameIndex { int FIOperandNum); void processLDQ(MachineInstr &MI, Register FrameReg, int64_t Offset, int FIOperandNum); + // Expand and eliminate Frame Index of pseudo STVMrii and LDVMrii. + void processSTVM(MachineInstr &MI, Register FrameReg, int64_t Offset, + int FIOperandNum); + void processLDVM(MachineInstr &MI, Register FrameReg, int64_t Offset, + int FIOperandNum); + // Expand and eliminate Frame Index of pseudo STVM512rii and LDVM512rii. + void processSTVM512(MachineInstr &MI, Register FrameReg, int64_t Offset, + int FIOperandNum); + void processLDVM512(MachineInstr &MI, Register FrameReg, int64_t Offset, + int FIOperandNum); public: EliminateFrameIndex(const TargetInstrInfo &TII, const TargetRegisterInfo &TRI, @@ -271,6 +281,185 @@ void EliminateFrameIndex::processLDQ(MachineInstr &MI, Register FrameReg, replaceFI(MI, FrameReg, Offset, FIOperandNum); } +void EliminateFrameIndex::processSTVM(MachineInstr &MI, Register FrameReg, + int64_t Offset, int FIOperandNum) { + assert(MI.getOpcode() == VE::STVMrii); + LLVM_DEBUG(dbgs() << "processSTVM: "; MI.dump()); + + // Original MI is: + // STVMrii frame-index, 0, offset, reg (, memory operand) + // Convert it to: + // SVMi tmp-reg, reg, 0 + // STrii frame-reg, 0, offset, tmp-reg + // SVMi tmp-reg, reg, 1 + // STrii frame-reg, 0, offset+8, tmp-reg + // SVMi tmp-reg, reg, 2 + // STrii frame-reg, 0, offset+16, tmp-reg + // SVMi tmp-reg, reg, 3 + // STrii frame-reg, 0, offset+24, tmp-reg + + prepareReplaceFI(MI, FrameReg, Offset, 24); + + Register SrcReg = MI.getOperand(3).getReg(); + bool isKill = MI.getOperand(3).isKill(); + // FIXME: it would be better to scavenge a register here instead of + // reserving SX16 all of the time. + Register TmpReg = VE::SX16; + for (int i = 0; i < 3; ++i) { + build(VE::SVMmr, TmpReg).addReg(SrcReg).addImm(i); + MachineInstr *StMI = + build(VE::STrii).addReg(FrameReg).addImm(0).addImm(0).addReg( + TmpReg, getKillRegState(true)); + replaceFI(*StMI, FrameReg, Offset, 0); + Offset += 8; + } + build(VE::SVMmr, TmpReg).addReg(SrcReg, getKillRegState(isKill)).addImm(3); + MI.setDesc(get(VE::STrii)); + MI.getOperand(3).ChangeToRegister(TmpReg, false, false, true); + replaceFI(MI, FrameReg, Offset, FIOperandNum); +} + +void EliminateFrameIndex::processLDVM(MachineInstr &MI, Register FrameReg, + int64_t Offset, int FIOperandNum) { + assert(MI.getOpcode() == VE::LDVMrii); + LLVM_DEBUG(dbgs() << "processLDVM: "; MI.dump()); + + // Original MI is: + // LDVMri reg, frame-index, 0, offset (, memory operand) + // Convert it to: + // LDrii tmp-reg, frame-reg, 0, offset + // LVMir vm, 0, tmp-reg + // LDrii tmp-reg, frame-reg, 0, offset+8 + // LVMir_m vm, 1, tmp-reg, vm + // LDrii tmp-reg, frame-reg, 0, offset+16 + // LVMir_m vm, 2, tmp-reg, vm + // LDrii tmp-reg, frame-reg, 0, offset+24 + // LVMir_m vm, 3, tmp-reg, vm + + prepareReplaceFI(MI, FrameReg, Offset, 24); + + Register DestReg = MI.getOperand(0).getReg(); + // FIXME: it would be better to scavenge a register here instead of + // reserving SX16 all of the time. + unsigned TmpReg = VE::SX16; + for (int i = 0; i < 4; ++i) { + if (i != 3) { + MachineInstr *StMI = + build(VE::LDrii, TmpReg).addReg(FrameReg).addImm(0).addImm(0); + replaceFI(*StMI, FrameReg, Offset, 1); + Offset += 8; + } else { + // Last LDrii replace the target instruction. + MI.setDesc(get(VE::LDrii)); + MI.getOperand(0).ChangeToRegister(TmpReg, true); + } + // First LVM is LVMir. Others are LVMir_m. Last LVM places at the + // next of the target instruction. + if (i == 0) + build(VE::LVMir, DestReg).addImm(i).addReg(TmpReg, getKillRegState(true)); + else if (i != 3) + build(VE::LVMir_m, DestReg) + .addImm(i) + .addReg(TmpReg, getKillRegState(true)) + .addReg(DestReg); + else + BuildMI(*MI.getParent(), std::next(II), DL, get(VE::LVMir_m), DestReg) + .addImm(3) + .addReg(TmpReg, getKillRegState(true)) + .addReg(DestReg); + } + replaceFI(MI, FrameReg, Offset, FIOperandNum); +} + +void EliminateFrameIndex::processSTVM512(MachineInstr &MI, Register FrameReg, + int64_t Offset, int FIOperandNum) { + assert(MI.getOpcode() == VE::STVM512rii); + LLVM_DEBUG(dbgs() << "processSTVM512: "; MI.dump()); + + prepareReplaceFI(MI, FrameReg, Offset, 56); + + Register SrcReg = MI.getOperand(3).getReg(); + Register SrcLoReg = getSubReg(SrcReg, VE::sub_vm_odd); + Register SrcHiReg = getSubReg(SrcReg, VE::sub_vm_even); + bool isKill = MI.getOperand(3).isKill(); + // FIXME: it would be better to scavenge a register here instead of + // reserving SX16 all of the time. + Register TmpReg = VE::SX16; + // store low part of VMP + MachineInstr *LastMI = nullptr; + for (int i = 0; i < 4; ++i) { + LastMI = build(VE::SVMmr, TmpReg).addReg(SrcLoReg).addImm(i); + MachineInstr *StMI = + build(VE::STrii).addReg(FrameReg).addImm(0).addImm(0).addReg( + TmpReg, getKillRegState(true)); + replaceFI(*StMI, FrameReg, Offset, 0); + Offset += 8; + } + if (isKill) + LastMI->addRegisterKilled(SrcLoReg, &TRI, true); + // store high part of VMP + for (int i = 0; i < 3; ++i) { + build(VE::SVMmr, TmpReg).addReg(SrcHiReg).addImm(i); + MachineInstr *StMI = + build(VE::STrii).addReg(FrameReg).addImm(0).addImm(0).addReg( + TmpReg, getKillRegState(true)); + replaceFI(*StMI, FrameReg, Offset, 0); + Offset += 8; + } + LastMI = build(VE::SVMmr, TmpReg).addReg(SrcHiReg).addImm(3); + if (isKill) { + LastMI->addRegisterKilled(SrcHiReg, &TRI, true); + // Add implicit super-register kills to the particular MI. + LastMI->addRegisterKilled(SrcReg, &TRI, true); + } + MI.setDesc(get(VE::STrii)); + MI.getOperand(3).ChangeToRegister(TmpReg, false, false, true); + replaceFI(MI, FrameReg, Offset, FIOperandNum); +} + +void EliminateFrameIndex::processLDVM512(MachineInstr &MI, Register FrameReg, + int64_t Offset, int FIOperandNum) { + assert(MI.getOpcode() == VE::LDVM512rii); + LLVM_DEBUG(dbgs() << "processLDVM512: "; MI.dump()); + + prepareReplaceFI(MI, FrameReg, Offset, 56); + + Register DestReg = MI.getOperand(0).getReg(); + Register DestLoReg = getSubReg(DestReg, VE::sub_vm_odd); + Register DestHiReg = getSubReg(DestReg, VE::sub_vm_even); + // FIXME: it would be better to scavenge a register here instead of + // reserving SX16 all of the time. + Register TmpReg = VE::SX16; + build(VE::IMPLICIT_DEF, DestReg); + for (int i = 0; i < 4; ++i) { + MachineInstr *LdMI = + build(VE::LDrii, TmpReg).addReg(FrameReg).addImm(0).addImm(0); + replaceFI(*LdMI, FrameReg, Offset, 1); + build(VE::LVMir_m, DestLoReg) + .addImm(i) + .addReg(TmpReg, getKillRegState(true)) + .addReg(DestLoReg); + Offset += 8; + } + for (int i = 0; i < 3; ++i) { + MachineInstr *LdMI = + build(VE::LDrii, TmpReg).addReg(FrameReg).addImm(0).addImm(0); + replaceFI(*LdMI, FrameReg, Offset, 1); + build(VE::LVMir_m, DestHiReg) + .addImm(i) + .addReg(TmpReg, getKillRegState(true)) + .addReg(DestHiReg); + Offset += 8; + } + MI.setDesc(get(VE::LDrii)); + MI.getOperand(0).ChangeToRegister(TmpReg, true); + BuildMI(*MI.getParent(), std::next(II), DL, get(VE::LVMir_m), DestHiReg) + .addImm(3) + .addReg(TmpReg, getKillRegState(true)) + .addReg(DestHiReg); + replaceFI(MI, FrameReg, Offset, FIOperandNum); +} + void EliminateFrameIndex::processMI(MachineInstr &MI, Register FrameReg, int64_t Offset, int FIOperandNum) { switch (MI.getOpcode()) { @@ -280,6 +469,18 @@ void EliminateFrameIndex::processMI(MachineInstr &MI, Register FrameReg, case VE::LDQrii: processLDQ(MI, FrameReg, Offset, FIOperandNum); return; + case VE::STVMrii: + processSTVM(MI, FrameReg, Offset, FIOperandNum); + return; + case VE::LDVMrii: + processLDVM(MI, FrameReg, Offset, FIOperandNum); + return; + case VE::STVM512rii: + processSTVM512(MI, FrameReg, Offset, FIOperandNum); + return; + case VE::LDVM512rii: + processLDVM512(MI, FrameReg, Offset, FIOperandNum); + return; } prepareReplaceFI(MI, FrameReg, Offset); replaceFI(MI, FrameReg, Offset, FIOperandNum); diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.cpp index 9316826e3d92..d7720604d6dc 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.cpp @@ -40,7 +40,7 @@ WebAssemblyInstrInfo::WebAssemblyInstrInfo(const WebAssemblySubtarget &STI) RI(STI.getTargetTriple()) {} bool WebAssemblyInstrInfo::isReallyTriviallyReMaterializable( - const MachineInstr &MI, AAResults *AA) const { + const MachineInstr &MI) const { switch (MI.getOpcode()) { case WebAssembly::CONST_I32: case WebAssembly::CONST_I64: diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.h b/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.h index f45a3792467a..29d700bdf83f 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.h +++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.h @@ -43,8 +43,7 @@ public: const WebAssemblyRegisterInfo &getRegisterInfo() const { return RI; } - bool isReallyTriviallyReMaterializable(const MachineInstr &MI, - AAResults *AA) const override; + bool isReallyTriviallyReMaterializable(const MachineInstr &MI) const override; void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp index d3ad47147ac8..f9ef45bfb41c 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp @@ -49,7 +49,6 @@ class WebAssemblyRegStackify final : public MachineFunctionPass { void getAnalysisUsage(AnalysisUsage &AU) const override { AU.setPreservesCFG(); - AU.addRequired<AAResultsWrapperPass>(); AU.addRequired<MachineDominatorTree>(); AU.addRequired<LiveIntervals>(); AU.addPreserved<MachineBlockFrequencyInfo>(); @@ -164,15 +163,15 @@ static void queryCallee(const MachineInstr &MI, bool &Read, bool &Write, // Determine whether MI reads memory, writes memory, has side effects, // and/or uses the stack pointer value. -static void query(const MachineInstr &MI, AliasAnalysis &AA, bool &Read, - bool &Write, bool &Effects, bool &StackPointer) { +static void query(const MachineInstr &MI, bool &Read, bool &Write, + bool &Effects, bool &StackPointer) { assert(!MI.isTerminator()); if (MI.isDebugInstr() || MI.isPosition()) return; // Check for loads. - if (MI.mayLoad() && !MI.isDereferenceableInvariantLoad(&AA)) + if (MI.mayLoad() && !MI.isDereferenceableInvariantLoad()) Read = true; // Check for stores. @@ -255,9 +254,9 @@ static void query(const MachineInstr &MI, AliasAnalysis &AA, bool &Read, } // Test whether Def is safe and profitable to rematerialize. -static bool shouldRematerialize(const MachineInstr &Def, AliasAnalysis &AA, +static bool shouldRematerialize(const MachineInstr &Def, const WebAssemblyInstrInfo *TII) { - return Def.isAsCheapAsAMove() && TII->isTriviallyReMaterializable(Def, &AA); + return Def.isAsCheapAsAMove() && TII->isTriviallyReMaterializable(Def); } // Identify the definition for this register at this point. This is a @@ -311,7 +310,7 @@ static bool hasOneUse(unsigned Reg, MachineInstr *Def, MachineRegisterInfo &MRI, // TODO: Compute memory dependencies in a way that uses AliasAnalysis to be // more precise. static bool isSafeToMove(const MachineOperand *Def, const MachineOperand *Use, - const MachineInstr *Insert, AliasAnalysis &AA, + const MachineInstr *Insert, const WebAssemblyFunctionInfo &MFI, const MachineRegisterInfo &MRI) { const MachineInstr *DefI = Def->getParent(); @@ -391,7 +390,7 @@ static bool isSafeToMove(const MachineOperand *Def, const MachineOperand *Use, } bool Read = false, Write = false, Effects = false, StackPointer = false; - query(*DefI, AA, Read, Write, Effects, StackPointer); + query(*DefI, Read, Write, Effects, StackPointer); // If the instruction does not access memory and has no side effects, it has // no additional dependencies. @@ -406,7 +405,7 @@ static bool isSafeToMove(const MachineOperand *Def, const MachineOperand *Use, bool InterveningWrite = false; bool InterveningEffects = false; bool InterveningStackPointer = false; - query(*I, AA, InterveningRead, InterveningWrite, InterveningEffects, + query(*I, InterveningRead, InterveningWrite, InterveningEffects, InterveningStackPointer); if (Effects && InterveningEffects) return false; @@ -808,7 +807,6 @@ bool WebAssemblyRegStackify::runOnMachineFunction(MachineFunction &MF) { WebAssemblyFunctionInfo &MFI = *MF.getInfo<WebAssemblyFunctionInfo>(); const auto *TII = MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo(); const auto *TRI = MF.getSubtarget<WebAssemblySubtarget>().getRegisterInfo(); - AliasAnalysis &AA = getAnalysis<AAResultsWrapperPass>().getAAResults(); auto &MDT = getAnalysis<MachineDominatorTree>(); auto &LIS = getAnalysis<LiveIntervals>(); @@ -872,8 +870,7 @@ bool WebAssemblyRegStackify::runOnMachineFunction(MachineFunction &MF) { // supports intra-block moves) and it's MachineSink's job to catch all // the sinking opportunities anyway. bool SameBlock = DefI->getParent() == &MBB; - bool CanMove = SameBlock && - isSafeToMove(Def, &Use, Insert, AA, MFI, MRI) && + bool CanMove = SameBlock && isSafeToMove(Def, &Use, Insert, MFI, MRI) && !TreeWalker.isOnStack(Reg); if (CanMove && hasOneUse(Reg, DefI, MRI, MDT, LIS)) { Insert = moveForSingleUse(Reg, Use, DefI, MBB, Insert, LIS, MFI, MRI); @@ -883,7 +880,7 @@ bool WebAssemblyRegStackify::runOnMachineFunction(MachineFunction &MF) { // TODO: Encode this properly as a stackified value. if (MFI.isFrameBaseVirtual() && MFI.getFrameBaseVreg() == Reg) MFI.clearFrameBaseVreg(); - } else if (shouldRematerialize(*DefI, AA, TII)) { + } else if (shouldRematerialize(*DefI, TII)) { Insert = rematerializeCheapDef(Reg, Use, *DefI, MBB, Insert->getIterator(), LIS, MFI, MRI, TII, TRI); diff --git a/llvm/lib/Target/X86/X86.td b/llvm/lib/Target/X86/X86.td index a859176220c7..fa0a6bd415dc 100644 --- a/llvm/lib/Target/X86/X86.td +++ b/llvm/lib/Target/X86/X86.td @@ -1277,7 +1277,7 @@ class ProcModel<string Name, SchedMachineModel Model, // enabled. It has no effect on code generation. // NOTE: As a default tuning, "generic" aims to produce code optimized for the // most common X86 processors. The tunings might be changed over time. It is -// recommended to use "x86-64" in lit tests for consistency. +// recommended to use "tune-cpu"="x86-64" in function attribute for consistency. def : ProcModel<"generic", SandyBridgeModel, [FeatureX87, FeatureCX8, FeatureX86_64], [TuningSlow3OpsLEA, diff --git a/llvm/lib/Target/X86/X86FixupBWInsts.cpp b/llvm/lib/Target/X86/X86FixupBWInsts.cpp index 16bff201dd03..db6923416177 100644 --- a/llvm/lib/Target/X86/X86FixupBWInsts.cpp +++ b/llvm/lib/Target/X86/X86FixupBWInsts.cpp @@ -393,12 +393,12 @@ MachineInstr *FixupBWInstPass::tryReplaceInstr(MachineInstr *MI, switch (MI->getOpcode()) { case X86::MOV8rm: - // Only replace 8 bit loads with the zero extending versions if - // in an inner most loop and not optimizing for size. This takes - // an extra byte to encode, and provides limited performance upside. - if (MachineLoop *ML = MLI->getLoopFor(&MBB)) - if (ML->begin() == ML->end() && !OptForSize) - return tryReplaceLoad(X86::MOVZX32rm8, MI); + // Replace 8-bit loads with the zero-extending version if not optimizing + // for size. The extending op is cheaper across a wide range of uarch and + // it avoids a potentially expensive partial register stall. It takes an + // extra byte to encode, however, so don't do this when optimizing for size. + if (!OptForSize) + return tryReplaceLoad(X86::MOVZX32rm8, MI); break; case X86::MOV16rm: diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 12af6087cb47..5a4533c4bac4 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -555,6 +555,39 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f64, Legal); + auto setF16Action = [&] (MVT VT, LegalizeAction Action) { + setOperationAction(ISD::FABS, VT, Action); + setOperationAction(ISD::FNEG, VT, Action); + setOperationAction(ISD::FCOPYSIGN, VT, Expand); + setOperationAction(ISD::FREM, VT, Action); + setOperationAction(ISD::FMA, VT, Action); + setOperationAction(ISD::FMINNUM, VT, Action); + setOperationAction(ISD::FMAXNUM, VT, Action); + setOperationAction(ISD::FMINIMUM, VT, Action); + setOperationAction(ISD::FMAXIMUM, VT, Action); + setOperationAction(ISD::FSIN, VT, Action); + setOperationAction(ISD::FCOS, VT, Action); + setOperationAction(ISD::FSINCOS, VT, Action); + setOperationAction(ISD::FSQRT, VT, Action); + setOperationAction(ISD::FPOW, VT, Action); + setOperationAction(ISD::FLOG, VT, Action); + setOperationAction(ISD::FLOG2, VT, Action); + setOperationAction(ISD::FLOG10, VT, Action); + setOperationAction(ISD::FEXP, VT, Action); + setOperationAction(ISD::FEXP2, VT, Action); + setOperationAction(ISD::FCEIL, VT, Action); + setOperationAction(ISD::FFLOOR, VT, Action); + setOperationAction(ISD::FNEARBYINT, VT, Action); + setOperationAction(ISD::FRINT, VT, Action); + setOperationAction(ISD::BR_CC, VT, Action); + setOperationAction(ISD::SETCC, VT, Action); + setOperationAction(ISD::SELECT, VT, Custom); + setOperationAction(ISD::SELECT_CC, VT, Action); + setOperationAction(ISD::FROUND, VT, Action); + setOperationAction(ISD::FROUNDEVEN, VT, Action); + setOperationAction(ISD::FTRUNC, VT, Action); + }; + if (!Subtarget.useSoftFloat() && Subtarget.hasSSE2()) { // f16, f32 and f64 use SSE. // Set up the FP register classes. @@ -592,40 +625,11 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, } // Half type will be promoted by default. - setOperationAction(ISD::FABS, MVT::f16, Promote); - setOperationAction(ISD::FNEG, MVT::f16, Promote); - setOperationAction(ISD::FCOPYSIGN, MVT::f16, Expand); + setF16Action(MVT::f16, Promote); setOperationAction(ISD::FADD, MVT::f16, Promote); setOperationAction(ISD::FSUB, MVT::f16, Promote); setOperationAction(ISD::FMUL, MVT::f16, Promote); setOperationAction(ISD::FDIV, MVT::f16, Promote); - setOperationAction(ISD::FREM, MVT::f16, Promote); - setOperationAction(ISD::FMA, MVT::f16, Promote); - setOperationAction(ISD::FMINNUM, MVT::f16, Promote); - setOperationAction(ISD::FMAXNUM, MVT::f16, Promote); - setOperationAction(ISD::FMINIMUM, MVT::f16, Promote); - setOperationAction(ISD::FMAXIMUM, MVT::f16, Promote); - setOperationAction(ISD::FSIN, MVT::f16, Promote); - setOperationAction(ISD::FCOS, MVT::f16, Promote); - setOperationAction(ISD::FSINCOS, MVT::f16, Promote); - setOperationAction(ISD::FSQRT, MVT::f16, Promote); - setOperationAction(ISD::FPOW, MVT::f16, Promote); - setOperationAction(ISD::FLOG, MVT::f16, Promote); - setOperationAction(ISD::FLOG2, MVT::f16, Promote); - setOperationAction(ISD::FLOG10, MVT::f16, Promote); - setOperationAction(ISD::FEXP, MVT::f16, Promote); - setOperationAction(ISD::FEXP2, MVT::f16, Promote); - setOperationAction(ISD::FCEIL, MVT::f16, Promote); - setOperationAction(ISD::FFLOOR, MVT::f16, Promote); - setOperationAction(ISD::FNEARBYINT, MVT::f16, Promote); - setOperationAction(ISD::FRINT, MVT::f16, Promote); - setOperationAction(ISD::BR_CC, MVT::f16, Promote); - setOperationAction(ISD::SETCC, MVT::f16, Promote); - setOperationAction(ISD::SELECT, MVT::f16, Custom); - setOperationAction(ISD::SELECT_CC, MVT::f16, Promote); - setOperationAction(ISD::FROUND, MVT::f16, Promote); - setOperationAction(ISD::FROUNDEVEN, MVT::f16, Promote); - setOperationAction(ISD::FTRUNC, MVT::f16, Promote); setOperationAction(ISD::FP_ROUND, MVT::f16, LibCall); setOperationAction(ISD::FP_EXTEND, MVT::f32, LibCall); setOperationAction(ISD::FP_EXTEND, MVT::f64, Custom); @@ -1003,6 +1007,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, : &X86::VR128RegClass); addRegisterClass(MVT::v8i16, Subtarget.hasVLX() ? &X86::VR128XRegClass : &X86::VR128RegClass); + addRegisterClass(MVT::v8f16, Subtarget.hasVLX() ? &X86::VR128XRegClass + : &X86::VR128RegClass); addRegisterClass(MVT::v4i32, Subtarget.hasVLX() ? &X86::VR128XRegClass : &X86::VR128RegClass); addRegisterClass(MVT::v2i64, Subtarget.hasVLX() ? &X86::VR128XRegClass @@ -1084,7 +1090,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); } - for (auto VT : { MVT::v2f64, MVT::v2i64 }) { + for (auto VT : { MVT::v8f16, MVT::v2f64, MVT::v2i64 }) { setOperationAction(ISD::BUILD_VECTOR, VT, Custom); setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); setOperationAction(ISD::VSELECT, VT, Custom); @@ -1095,19 +1101,25 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); } + setF16Action(MVT::v8f16, Expand); + setOperationAction(ISD::FADD, MVT::v8f16, Expand); + setOperationAction(ISD::FSUB, MVT::v8f16, Expand); + setOperationAction(ISD::FMUL, MVT::v8f16, Expand); + setOperationAction(ISD::FDIV, MVT::v8f16, Expand); // Custom lower v2i64 and v2f64 selects. setOperationAction(ISD::SELECT, MVT::v2f64, Custom); setOperationAction(ISD::SELECT, MVT::v2i64, Custom); setOperationAction(ISD::SELECT, MVT::v4i32, Custom); setOperationAction(ISD::SELECT, MVT::v8i16, Custom); + setOperationAction(ISD::SELECT, MVT::v8f16, Custom); setOperationAction(ISD::SELECT, MVT::v16i8, Custom); - setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Legal); + setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Custom); setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Custom); setOperationAction(ISD::FP_TO_SINT, MVT::v2i32, Custom); setOperationAction(ISD::FP_TO_UINT, MVT::v2i32, Custom); - setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v4i32, Legal); + setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v4i32, Custom); setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v2i32, Custom); // Custom legalize these to avoid over promotion or custom promotion. @@ -1118,8 +1130,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::STRICT_FP_TO_UINT, VT, Custom); } - setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal); - setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v4i32, Legal); + setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Custom); + setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v4i32, Custom); setOperationAction(ISD::SINT_TO_FP, MVT::v2i32, Custom); setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v2i32, Custom); @@ -1304,6 +1316,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, : &X86::VR256RegClass); addRegisterClass(MVT::v16i16, Subtarget.hasVLX() ? &X86::VR256XRegClass : &X86::VR256RegClass); + addRegisterClass(MVT::v16f16, Subtarget.hasVLX() ? &X86::VR256XRegClass + : &X86::VR256RegClass); addRegisterClass(MVT::v8i32, Subtarget.hasVLX() ? &X86::VR256XRegClass : &X86::VR256RegClass); addRegisterClass(MVT::v8f32, Subtarget.hasVLX() ? &X86::VR256XRegClass @@ -1340,12 +1354,14 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v8i16, MVT::v8i32); setOperationPromotedToType(ISD::STRICT_FP_TO_SINT, MVT::v8i16, MVT::v8i32); setOperationPromotedToType(ISD::STRICT_FP_TO_UINT, MVT::v8i16, MVT::v8i32); - setOperationAction(ISD::FP_TO_SINT, MVT::v8i32, Legal); + setOperationAction(ISD::FP_TO_SINT, MVT::v8i32, Custom); setOperationAction(ISD::FP_TO_UINT, MVT::v8i32, Custom); - setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v8i32, Legal); + setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v8i32, Custom); - setOperationAction(ISD::SINT_TO_FP, MVT::v8i32, Legal); - setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v8i32, Legal); + setOperationAction(ISD::SINT_TO_FP, MVT::v8i32, Custom); + setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v8i32, Custom); + setOperationAction(ISD::FP_EXTEND, MVT::v4f64, Custom); + setOperationAction(ISD::STRICT_FP_EXTEND, MVT::v4f64, Custom); setOperationAction(ISD::STRICT_FP_ROUND, MVT::v4f32, Legal); setOperationAction(ISD::STRICT_FADD, MVT::v8f32, Legal); @@ -1356,7 +1372,6 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::STRICT_FMUL, MVT::v4f64, Legal); setOperationAction(ISD::STRICT_FDIV, MVT::v8f32, Legal); setOperationAction(ISD::STRICT_FDIV, MVT::v4f64, Legal); - setOperationAction(ISD::STRICT_FP_EXTEND, MVT::v4f64, Legal); setOperationAction(ISD::STRICT_FSQRT, MVT::v8f32, Legal); setOperationAction(ISD::STRICT_FSQRT, MVT::v4f64, Legal); @@ -1386,6 +1401,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::SELECT, MVT::v4i64, Custom); setOperationAction(ISD::SELECT, MVT::v8i32, Custom); setOperationAction(ISD::SELECT, MVT::v16i16, Custom); + setOperationAction(ISD::SELECT, MVT::v16f16, Custom); setOperationAction(ISD::SELECT, MVT::v32i8, Custom); setOperationAction(ISD::SELECT, MVT::v8f32, Custom); @@ -1507,7 +1523,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, // Custom lower several nodes for 256-bit types. for (MVT VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64, - MVT::v8f32, MVT::v4f64 }) { + MVT::v16f16, MVT::v8f32, MVT::v4f64 }) { setOperationAction(ISD::BUILD_VECTOR, VT, Custom); setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); setOperationAction(ISD::VSELECT, VT, Custom); @@ -1518,6 +1534,11 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::CONCAT_VECTORS, VT, Custom); setOperationAction(ISD::STORE, VT, Custom); } + setF16Action(MVT::v16f16, Expand); + setOperationAction(ISD::FADD, MVT::v16f16, Expand); + setOperationAction(ISD::FSUB, MVT::v16f16, Expand); + setOperationAction(ISD::FMUL, MVT::v16f16, Expand); + setOperationAction(ISD::FDIV, MVT::v16f16, Expand); if (HasInt256) { setOperationAction(ISD::VSELECT, MVT::v32i8, Legal); @@ -1532,11 +1553,23 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, } } - if (!Subtarget.useSoftFloat() && Subtarget.hasF16C()) { - setOperationAction(ISD::FP_ROUND, MVT::f16, Custom); - setOperationAction(ISD::STRICT_FP_ROUND, MVT::f16, Custom); - setOperationAction(ISD::FP_EXTEND, MVT::f32, Custom); - setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f32, Custom); + if (!Subtarget.useSoftFloat() && !Subtarget.hasFP16() && + Subtarget.hasF16C()) { + for (MVT VT : { MVT::f16, MVT::v2f16, MVT::v4f16, MVT::v8f16 }) { + setOperationAction(ISD::FP_ROUND, VT, Custom); + setOperationAction(ISD::STRICT_FP_ROUND, VT, Custom); + } + for (MVT VT : { MVT::f32, MVT::v2f32, MVT::v4f32 }) { + setOperationAction(ISD::FP_EXTEND, VT, Custom); + setOperationAction(ISD::STRICT_FP_EXTEND, VT, Custom); + } + for (unsigned Opc : {ISD::FADD, ISD::FSUB, ISD::FMUL, ISD::FDIV}) { + setOperationPromotedToType(Opc, MVT::v8f16, MVT::v8f32); + setOperationPromotedToType(Opc, MVT::v16f16, MVT::v16f32); + } + + setOperationAction(ISD::FP_EXTEND, MVT::v8f32, Legal); + setOperationAction(ISD::STRICT_FP_EXTEND, MVT::v8f32, Legal); } // This block controls legalization of the mask vector sizes that are @@ -1619,6 +1652,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, addRegisterClass(MVT::v8i64, &X86::VR512RegClass); addRegisterClass(MVT::v8f64, &X86::VR512RegClass); addRegisterClass(MVT::v32i16, &X86::VR512RegClass); + addRegisterClass(MVT::v32f16, &X86::VR512RegClass); addRegisterClass(MVT::v64i8, &X86::VR512RegClass); for (auto ExtType : {ISD::ZEXTLOAD, ISD::SEXTLOAD}) { @@ -1645,14 +1679,16 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationPromotedToType(ISD::STRICT_FP_TO_SINT, VT, MVT::v16i32); setOperationPromotedToType(ISD::STRICT_FP_TO_UINT, VT, MVT::v16i32); } - setOperationAction(ISD::FP_TO_SINT, MVT::v16i32, Legal); - setOperationAction(ISD::FP_TO_UINT, MVT::v16i32, Legal); - setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v16i32, Legal); - setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v16i32, Legal); - setOperationAction(ISD::SINT_TO_FP, MVT::v16i32, Legal); - setOperationAction(ISD::UINT_TO_FP, MVT::v16i32, Legal); - setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v16i32, Legal); - setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v16i32, Legal); + setOperationAction(ISD::FP_TO_SINT, MVT::v16i32, Custom); + setOperationAction(ISD::FP_TO_UINT, MVT::v16i32, Custom); + setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v16i32, Custom); + setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v16i32, Custom); + setOperationAction(ISD::SINT_TO_FP, MVT::v16i32, Custom); + setOperationAction(ISD::UINT_TO_FP, MVT::v16i32, Custom); + setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v16i32, Custom); + setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v16i32, Custom); + setOperationAction(ISD::FP_EXTEND, MVT::v8f64, Custom); + setOperationAction(ISD::STRICT_FP_EXTEND, MVT::v8f64, Custom); setOperationAction(ISD::STRICT_FADD, MVT::v16f32, Legal); setOperationAction(ISD::STRICT_FADD, MVT::v8f64, Legal); @@ -1664,7 +1700,6 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::STRICT_FDIV, MVT::v8f64, Legal); setOperationAction(ISD::STRICT_FSQRT, MVT::v16f32, Legal); setOperationAction(ISD::STRICT_FSQRT, MVT::v8f64, Legal); - setOperationAction(ISD::STRICT_FP_EXTEND, MVT::v8f64, Legal); setOperationAction(ISD::STRICT_FP_ROUND, MVT::v8f32, Legal); setTruncStoreAction(MVT::v8i64, MVT::v8i8, Legal); @@ -1799,15 +1834,10 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::FSHR, MVT::v16i32, Custom); if (Subtarget.hasDQI()) { - setOperationAction(ISD::SINT_TO_FP, MVT::v8i64, Legal); - setOperationAction(ISD::UINT_TO_FP, MVT::v8i64, Legal); - setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v8i64, Legal); - setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v8i64, Legal); - setOperationAction(ISD::FP_TO_SINT, MVT::v8i64, Legal); - setOperationAction(ISD::FP_TO_UINT, MVT::v8i64, Legal); - setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v8i64, Legal); - setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v8i64, Legal); - + for (auto Opc : {ISD::SINT_TO_FP, ISD::UINT_TO_FP, ISD::STRICT_SINT_TO_FP, + ISD::STRICT_UINT_TO_FP, ISD::FP_TO_SINT, ISD::FP_TO_UINT, + ISD::STRICT_FP_TO_SINT, ISD::STRICT_FP_TO_UINT}) + setOperationAction(Opc, MVT::v8i64, Custom); setOperationAction(ISD::MUL, MVT::v8i64, Legal); } @@ -1831,7 +1861,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Legal); for (auto VT : { MVT::v64i8, MVT::v32i16, MVT::v16i32, MVT::v8i64, - MVT::v16f32, MVT::v8f64 }) { + MVT::v32f16, MVT::v16f32, MVT::v8f64 }) { setOperationAction(ISD::CONCAT_VECTORS, VT, Custom); setOperationAction(ISD::INSERT_SUBVECTOR, VT, Legal); setOperationAction(ISD::SELECT, VT, Custom); @@ -1842,6 +1872,15 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom); setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); } + setF16Action(MVT::v32f16, Expand); + setOperationAction(ISD::FP_ROUND, MVT::v16f16, Custom); + setOperationAction(ISD::STRICT_FP_ROUND, MVT::v16f16, Custom); + setOperationAction(ISD::FP_EXTEND, MVT::v16f32, Legal); + setOperationAction(ISD::STRICT_FP_EXTEND, MVT::v16f32, Legal); + for (unsigned Opc : {ISD::FADD, ISD::FSUB, ISD::FMUL, ISD::FDIV}) { + setOperationPromotedToType(Opc, MVT::v16f16, MVT::v16f32); + setOperationPromotedToType(Opc, MVT::v32f16, MVT::v32f32); + } for (auto VT : { MVT::v16i32, MVT::v8i64, MVT::v16f32, MVT::v8f64 }) { setOperationAction(ISD::MLOAD, VT, Legal); @@ -1881,23 +1920,9 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, // These operations are handled on non-VLX by artificially widening in // isel patterns. - setOperationAction(ISD::FP_TO_UINT, MVT::v8i32, - Subtarget.hasVLX() ? Legal : Custom); - setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, - Subtarget.hasVLX() ? Legal : Custom); - setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v8i32, - Subtarget.hasVLX() ? Legal : Custom); - setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v4i32, - Subtarget.hasVLX() ? Legal : Custom); + setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v8i32, Custom); + setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v4i32, Custom); setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v2i32, Custom); - setOperationAction(ISD::UINT_TO_FP, MVT::v8i32, - Subtarget.hasVLX() ? Legal : Custom); - setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, - Subtarget.hasVLX() ? Legal : Custom); - setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v8i32, - Subtarget.hasVLX() ? Legal : Custom); - setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v4i32, - Subtarget.hasVLX() ? Legal : Custom); if (Subtarget.hasDQI()) { // Fast v2f32 SINT_TO_FP( v2i64 ) custom conversion. @@ -1934,25 +1959,14 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::MSCATTER, VT, Custom); if (Subtarget.hasDQI()) { - for (auto VT : { MVT::v2i64, MVT::v4i64 }) { - setOperationAction(ISD::SINT_TO_FP, VT, - Subtarget.hasVLX() ? Legal : Custom); - setOperationAction(ISD::UINT_TO_FP, VT, - Subtarget.hasVLX() ? Legal : Custom); - setOperationAction(ISD::STRICT_SINT_TO_FP, VT, - Subtarget.hasVLX() ? Legal : Custom); - setOperationAction(ISD::STRICT_UINT_TO_FP, VT, - Subtarget.hasVLX() ? Legal : Custom); - setOperationAction(ISD::FP_TO_SINT, VT, - Subtarget.hasVLX() ? Legal : Custom); - setOperationAction(ISD::FP_TO_UINT, VT, - Subtarget.hasVLX() ? Legal : Custom); - setOperationAction(ISD::STRICT_FP_TO_SINT, VT, - Subtarget.hasVLX() ? Legal : Custom); - setOperationAction(ISD::STRICT_FP_TO_UINT, VT, - Subtarget.hasVLX() ? Legal : Custom); - setOperationAction(ISD::MUL, VT, Legal); + for (auto Opc : {ISD::SINT_TO_FP, ISD::UINT_TO_FP, ISD::STRICT_SINT_TO_FP, + ISD::STRICT_UINT_TO_FP, ISD::FP_TO_SINT, ISD::FP_TO_UINT, + ISD::STRICT_FP_TO_SINT, ISD::STRICT_FP_TO_UINT}) { + setOperationAction(Opc, MVT::v2i64, Custom); + setOperationAction(Opc, MVT::v4i64, Custom); } + setOperationAction(ISD::MUL, MVT::v2i64, Legal); + setOperationAction(ISD::MUL, MVT::v4i64, Legal); } if (Subtarget.hasCDI()) { @@ -2052,7 +2066,6 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, // AVX512_FP16 scalar operations setGroup(MVT::f16); - addRegisterClass(MVT::f16, &X86::FR16XRegClass); setOperationAction(ISD::FREM, MVT::f16, Promote); setOperationAction(ISD::STRICT_FREM, MVT::f16, Promote); setOperationAction(ISD::SELECT_CC, MVT::f16, Expand); @@ -2066,6 +2079,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::STRICT_FROUNDEVEN, MVT::f16, Legal); setOperationAction(ISD::FP_ROUND, MVT::f16, Custom); setOperationAction(ISD::STRICT_FP_ROUND, MVT::f16, Custom); + setOperationAction(ISD::FP_EXTEND, MVT::f32, Legal); setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f32, Legal); setCondCodeAction(ISD::SETOEQ, MVT::f16, Expand); @@ -2073,14 +2087,17 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, if (Subtarget.useAVX512Regs()) { setGroup(MVT::v32f16); - addRegisterClass(MVT::v32f16, &X86::VR512RegClass); setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v32f16, Custom); setOperationAction(ISD::SINT_TO_FP, MVT::v32i16, Legal); setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v32i16, Legal); setOperationAction(ISD::UINT_TO_FP, MVT::v32i16, Legal); setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v32i16, Legal); + setOperationAction(ISD::FP_ROUND, MVT::v16f16, Legal); setOperationAction(ISD::STRICT_FP_ROUND, MVT::v16f16, Legal); + setOperationAction(ISD::FP_EXTEND, MVT::v16f32, Legal); setOperationAction(ISD::STRICT_FP_EXTEND, MVT::v16f32, Legal); + setOperationAction(ISD::FP_EXTEND, MVT::v8f64, Legal); + setOperationAction(ISD::STRICT_FP_EXTEND, MVT::v8f64, Legal); setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v32f16, Custom); setOperationAction(ISD::FP_TO_SINT, MVT::v32i16, Custom); @@ -2112,8 +2129,6 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, } if (Subtarget.hasVLX()) { - addRegisterClass(MVT::v8f16, &X86::VR128XRegClass); - addRegisterClass(MVT::v16f16, &X86::VR256XRegClass); setGroup(MVT::v8f16); setGroup(MVT::v16f16); @@ -2132,8 +2147,12 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v8i16, Custom); setOperationAction(ISD::FP_TO_UINT, MVT::v8i16, Custom); setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v8i16, Custom); + setOperationAction(ISD::FP_ROUND, MVT::v8f16, Legal); setOperationAction(ISD::STRICT_FP_ROUND, MVT::v8f16, Legal); + setOperationAction(ISD::FP_EXTEND, MVT::v8f32, Legal); setOperationAction(ISD::STRICT_FP_EXTEND, MVT::v8f32, Legal); + setOperationAction(ISD::FP_EXTEND, MVT::v4f64, Legal); + setOperationAction(ISD::STRICT_FP_EXTEND, MVT::v4f64, Legal); // INSERT_VECTOR_ELT v8f16 extended to VECTOR_SHUFFLE setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8f16, Custom); @@ -2347,7 +2366,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, ISD::FP16_TO_FP, ISD::FP_EXTEND, ISD::STRICT_FP_EXTEND, - ISD::FP_ROUND}); + ISD::FP_ROUND, + ISD::STRICT_FP_ROUND}); computeRegisterProperties(Subtarget.getRegisterInfo()); @@ -2404,6 +2424,10 @@ X86TargetLowering::getPreferredVectorAction(MVT VT) const { return TypeSplitVector; if (!VT.isScalableVector() && VT.getVectorNumElements() != 1 && + !Subtarget.hasF16C() && VT.getVectorElementType() == MVT::f16) + return TypeSplitVector; + + if (!VT.isScalableVector() && VT.getVectorNumElements() != 1 && VT.getVectorElementType() != MVT::i1) return TypeWidenVector; @@ -2447,22 +2471,21 @@ handleMaskRegisterForCallingConv(unsigned NumElts, CallingConv::ID CC, MVT X86TargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const { - if (VT.isVector() && VT.getVectorElementType() == MVT::i1 && - Subtarget.hasAVX512()) { - unsigned NumElts = VT.getVectorNumElements(); + if (VT.isVector()) { + if (VT.getVectorElementType() == MVT::i1 && Subtarget.hasAVX512()) { + unsigned NumElts = VT.getVectorNumElements(); - MVT RegisterVT; - unsigned NumRegisters; - std::tie(RegisterVT, NumRegisters) = - handleMaskRegisterForCallingConv(NumElts, CC, Subtarget); - if (RegisterVT != MVT::INVALID_SIMPLE_VALUE_TYPE) - return RegisterVT; - } + MVT RegisterVT; + unsigned NumRegisters; + std::tie(RegisterVT, NumRegisters) = + handleMaskRegisterForCallingConv(NumElts, CC, Subtarget); + if (RegisterVT != MVT::INVALID_SIMPLE_VALUE_TYPE) + return RegisterVT; + } - // v3f16 will be widen to v4f16. But we don't assign register class for v4f16. - // So its default register type is f16. We override the type to v8f16 here. - if (VT == MVT::v3f16 && Subtarget.hasFP16()) - return MVT::v8f16; + if (VT.getVectorElementType() == MVT::f16 && VT.getVectorNumElements() < 8) + return MVT::v8f16; + } // We will use more GPRs for f64 and f80 on 32 bits when x87 is disabled. if ((VT == MVT::f64 || VT == MVT::f80) && !Subtarget.is64Bit() && @@ -2475,22 +2498,21 @@ MVT X86TargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context, unsigned X86TargetLowering::getNumRegistersForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const { - if (VT.isVector() && VT.getVectorElementType() == MVT::i1 && - Subtarget.hasAVX512()) { - unsigned NumElts = VT.getVectorNumElements(); + if (VT.isVector()) { + if (VT.getVectorElementType() == MVT::i1 && Subtarget.hasAVX512()) { + unsigned NumElts = VT.getVectorNumElements(); - MVT RegisterVT; - unsigned NumRegisters; - std::tie(RegisterVT, NumRegisters) = - handleMaskRegisterForCallingConv(NumElts, CC, Subtarget); - if (RegisterVT != MVT::INVALID_SIMPLE_VALUE_TYPE) - return NumRegisters; - } + MVT RegisterVT; + unsigned NumRegisters; + std::tie(RegisterVT, NumRegisters) = + handleMaskRegisterForCallingConv(NumElts, CC, Subtarget); + if (RegisterVT != MVT::INVALID_SIMPLE_VALUE_TYPE) + return NumRegisters; + } - // v3f16 will be widen to v4f16. But we don't assign register class for v4f16. - // So its default register number is 3. We override the number to 1 here. - if (VT == MVT::v3f16 && Subtarget.hasFP16()) - return 1; + if (VT.getVectorElementType() == MVT::f16 && VT.getVectorNumElements() < 8) + return 1; + } // We have to split f64 to 2 registers and f80 to 3 registers on 32 bits if // x87 is disabled. @@ -9646,13 +9668,13 @@ static SDValue lowerBuildVectorAsBroadcast(BuildVectorSDNode *BVOp, EVT CVT = Ld.getValueType(); assert(!CVT.isVector() && "Must not broadcast a vector type"); - // Splat f32, i32, v4f64, v4i64 in all cases with AVX2. + // Splat f16, f32, i32, v4f64, v4i64 in all cases with AVX2. // For size optimization, also splat v2f64 and v2i64, and for size opt // with AVX2, also splat i8 and i16. // With pattern matching, the VBROADCAST node may become a VMOVDDUP. if (ScalarSize == 32 || (ScalarSize == 64 && (IsGE256 || Subtarget.hasVLX())) || - (ScalarSize == 16 && Subtarget.hasFP16() && CVT.isFloatingPoint()) || + CVT == MVT::f16 || (OptForSize && (ScalarSize == 64 || Subtarget.hasAVX2()))) { const Constant *C = nullptr; if (ConstantSDNode *CI = dyn_cast<ConstantSDNode>(Ld)) @@ -14129,6 +14151,16 @@ static bool isShuffleFoldableLoad(SDValue V) { ISD::isNON_EXTLoad(peekThroughOneUseBitcasts(V).getNode()); } +template<typename T> +static bool isSoftFP16(T VT, const X86Subtarget &Subtarget) { + return VT.getScalarType() == MVT::f16 && !Subtarget.hasFP16(); +} + +template<typename T> +bool X86TargetLowering::isSoftFP16(T VT) const { + return ::isSoftFP16(VT, Subtarget); +} + /// Try to lower insertion of a single element into a zero vector. /// /// This is a common pattern that we have especially efficient patterns to lower @@ -14140,6 +14172,9 @@ static SDValue lowerShuffleAsElementInsertion( MVT ExtVT = VT; MVT EltVT = VT.getVectorElementType(); + if (isSoftFP16(EltVT, Subtarget)) + return SDValue(); + int V2Index = find_if(Mask, [&Mask](int M) { return M >= (int)Mask.size(); }) - Mask.begin(); @@ -19444,6 +19479,15 @@ SDValue X86TargetLowering::LowerVSELECT(SDValue Op, SelectionDAG &DAG) const { SDValue LHS = Op.getOperand(1); SDValue RHS = Op.getOperand(2); + SDLoc dl(Op); + MVT VT = Op.getSimpleValueType(); + if (isSoftFP16(VT)) { + MVT NVT = VT.changeVectorElementTypeToInteger(); + return DAG.getBitcast(VT, DAG.getNode(ISD::VSELECT, dl, NVT, Cond, + DAG.getBitcast(NVT, LHS), + DAG.getBitcast(NVT, RHS))); + } + // A vselect where all conditions and data are constants can be optimized into // a single vector load by SelectionDAGLegalize::ExpandBUILD_VECTOR(). if (ISD::isBuildVectorOfConstantSDNodes(Cond.getNode()) && @@ -19467,8 +19511,6 @@ SDValue X86TargetLowering::LowerVSELECT(SDValue Op, SelectionDAG &DAG) const { if (!Subtarget.hasSSE41()) return SDValue(); - SDLoc dl(Op); - MVT VT = Op.getSimpleValueType(); unsigned EltSize = VT.getScalarSizeInBits(); unsigned NumElts = VT.getVectorNumElements(); @@ -20856,16 +20898,6 @@ static SDValue lowerINT_TO_FP_vXi64(SDValue Op, SelectionDAG &DAG, return Cvt; } -template<typename T> -static bool isSoftFP16(T VT, const X86Subtarget &Subtarget) { - return VT == MVT::f16 && !Subtarget.hasFP16(); -} - -template<typename T> -bool X86TargetLowering::isSoftFP16(T VT) const { - return ::isSoftFP16(VT, Subtarget); -} - static SDValue promoteXINT_TO_FP(SDValue Op, SelectionDAG &DAG) { bool IsStrict = Op->isStrictFPOpcode(); SDValue Src = Op.getOperand(IsStrict ? 1 : 0); @@ -20885,6 +20917,26 @@ static SDValue promoteXINT_TO_FP(SDValue Op, SelectionDAG &DAG) { DAG.getNode(Op.getOpcode(), dl, NVT, Src), Rnd); } +static bool isLegalConversion(MVT VT, bool IsSigned, + const X86Subtarget &Subtarget) { + if (VT == MVT::v4i32 && Subtarget.hasSSE2() && IsSigned) + return true; + if (VT == MVT::v8i32 && Subtarget.hasAVX() && IsSigned) + return true; + if (Subtarget.hasVLX() && (VT == MVT::v4i32 || VT == MVT::v8i32)) + return true; + if (Subtarget.useAVX512Regs()) { + if (VT == MVT::v16i32) + return true; + if (VT == MVT::v8i64 && Subtarget.hasDQI()) + return true; + } + if (Subtarget.hasDQI() && Subtarget.hasVLX() && + (VT == MVT::v2i64 || VT == MVT::v4i64)) + return true; + return false; +} + SDValue X86TargetLowering::LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) const { bool IsStrict = Op->isStrictFPOpcode(); @@ -20897,6 +20949,8 @@ SDValue X86TargetLowering::LowerSINT_TO_FP(SDValue Op, if (isSoftFP16(VT)) return promoteXINT_TO_FP(Op, DAG); + else if (isLegalConversion(SrcVT, true, Subtarget)) + return Op; if (Subtarget.isTargetWin64() && SrcVT == MVT::i128) return LowerWin64_INT128_TO_FP(Op, DAG); @@ -21400,6 +21454,8 @@ SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op, if (isSoftFP16(DstVT)) return promoteXINT_TO_FP(Op, DAG); + else if (isLegalConversion(SrcVT, false, Subtarget)) + return Op; if (DstVT.isVector()) return lowerUINT_TO_FP_vec(Op, DAG, Subtarget); @@ -22229,6 +22285,8 @@ SDValue X86TargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const { {NVT, MVT::Other}, {Chain, Src})}); return DAG.getNode(Op.getOpcode(), dl, VT, DAG.getNode(ISD::FP_EXTEND, dl, NVT, Src)); + } else if (isTypeLegal(SrcVT) && isLegalConversion(VT, IsSigned, Subtarget)) { + return Op; } if (VT.isVector()) { @@ -22826,7 +22884,7 @@ SDValue X86TargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const { return Op; if (SVT.getVectorElementType() == MVT::f16) { - assert(Subtarget.hasFP16() && Subtarget.hasVLX() && "Unexpected features!"); + assert(Subtarget.hasF16C() && "Unexpected features!"); if (SVT == MVT::v2f16) In = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v4f16, In, DAG.getUNDEF(MVT::v2f16)); @@ -22836,6 +22894,8 @@ SDValue X86TargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const { return DAG.getNode(X86ISD::STRICT_VFPEXT, DL, {VT, MVT::Other}, {Op->getOperand(0), Res}); return DAG.getNode(X86ISD::VFPEXT, DL, VT, Res); + } else if (VT == MVT::v4f64 || VT == MVT::v8f64) { + return Op; } assert(SVT == MVT::v2f32 && "Only customize MVT::v2f32 type legalization!"); @@ -22854,34 +22914,19 @@ SDValue X86TargetLowering::LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const { SDLoc DL(Op); SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue(); SDValue In = Op.getOperand(IsStrict ? 1 : 0); - SDValue Op2 = Op.getOperand(IsStrict ? 2 : 1); MVT VT = Op.getSimpleValueType(); MVT SVT = In.getSimpleValueType(); if (SVT == MVT::f128 || (VT == MVT::f16 && SVT == MVT::f80)) return SDValue(); - if (VT == MVT::f16) { - if (Subtarget.hasFP16()) - return Op; - - if (SVT != MVT::f32) { - if (IsStrict) - return DAG.getNode( - ISD::STRICT_FP_ROUND, DL, {VT, MVT::Other}, - {Chain, - DAG.getNode(ISD::STRICT_FP_ROUND, DL, {MVT::f32, MVT::Other}, - {Chain, In, Op2}), - Op2}); - - return DAG.getNode(ISD::FP_ROUND, DL, VT, - DAG.getNode(ISD::FP_ROUND, DL, MVT::f32, In, Op2), - Op2); - } - - if (!Subtarget.hasF16C()) + if (VT.getScalarType() == MVT::f16 && !Subtarget.hasFP16()) { + if (!Subtarget.hasF16C() || SVT.getScalarType() != MVT::f32) return SDValue(); + if (VT.isVector()) + return Op; + SDValue Res; SDValue Rnd = DAG.getTargetConstant(X86::STATIC_ROUNDING::CUR_DIRECTION, DL, MVT::i32); @@ -24176,10 +24221,10 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget &Subtarget, SDLoc dl(Op); if (isFP) { -#ifndef NDEBUG MVT EltVT = Op0.getSimpleValueType().getVectorElementType(); assert(EltVT == MVT::f16 || EltVT == MVT::f32 || EltVT == MVT::f64); -#endif + if (isSoftFP16(EltVT, Subtarget)) + return SDValue(); bool IsSignaling = Op.getOpcode() == ISD::STRICT_FSETCCS; SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue(); @@ -24741,6 +24786,9 @@ SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const { ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(IsStrict ? 3 : 2))->get(); + if (isSoftFP16(Op0.getValueType())) + return SDValue(); + // Handle f128 first, since one possible outcome is a normal integer // comparison which gets handled by emitFlagsForSetcc. if (Op0.getValueType() == MVT::f128) { @@ -24931,10 +24979,12 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { MVT VT = Op1.getSimpleValueType(); SDValue CC; - if (isSoftFP16(VT)) - return DAG.getBitcast(MVT::f16, DAG.getNode(ISD::SELECT, DL, MVT::i16, Cond, - DAG.getBitcast(MVT::i16, Op1), - DAG.getBitcast(MVT::i16, Op2))); + if (isSoftFP16(VT)) { + MVT NVT = VT.changeTypeToInteger(); + return DAG.getBitcast(VT, DAG.getNode(ISD::SELECT, DL, NVT, Cond, + DAG.getBitcast(NVT, Op1), + DAG.getBitcast(NVT, Op2))); + } // Lower FP selects into a CMP/AND/ANDN/OR sequence when the necessary SSE ops // are available or VBLENDV if AVX is available. @@ -27268,27 +27318,6 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, } return DAG.getCopyFromReg(DAG.getEntryNode(), dl, Reg, VT); } - case Intrinsic::swift_async_context_addr: { - auto &MF = DAG.getMachineFunction(); - auto X86FI = MF.getInfo<X86MachineFunctionInfo>(); - if (Subtarget.is64Bit()) { - MF.getFrameInfo().setFrameAddressIsTaken(true); - X86FI->setHasSwiftAsyncContext(true); - return SDValue( - DAG.getMachineNode( - X86::SUB64ri8, dl, MVT::i64, - DAG.getCopyFromReg(DAG.getEntryNode(), dl, X86::RBP, MVT::i64), - DAG.getTargetConstant(8, dl, MVT::i32)), - 0); - } else { - // 32-bit so no special extended frame, create or reuse an existing stack - // slot. - if (!X86FI->getSwiftAsyncContextFrameIdx()) - X86FI->setSwiftAsyncContextFrameIdx( - MF.getFrameInfo().CreateStackObject(4, Align(4), false)); - return DAG.getFrameIndex(*X86FI->getSwiftAsyncContextFrameIdx(), MVT::i32); - } - } case Intrinsic::x86_avx512_vp2intersect_q_512: case Intrinsic::x86_avx512_vp2intersect_q_256: case Intrinsic::x86_avx512_vp2intersect_q_128: @@ -27668,6 +27697,37 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget &Subtarget, const IntrinsicData *IntrData = getIntrinsicWithChain(IntNo); if (!IntrData) { switch (IntNo) { + + case Intrinsic::swift_async_context_addr: { + SDLoc dl(Op); + auto &MF = DAG.getMachineFunction(); + auto X86FI = MF.getInfo<X86MachineFunctionInfo>(); + if (Subtarget.is64Bit()) { + MF.getFrameInfo().setFrameAddressIsTaken(true); + X86FI->setHasSwiftAsyncContext(true); + SDValue Chain = Op->getOperand(0); + SDValue CopyRBP = DAG.getCopyFromReg(Chain, dl, X86::RBP, MVT::i64); + SDValue Result = + SDValue(DAG.getMachineNode(X86::SUB64ri8, dl, MVT::i64, CopyRBP, + DAG.getTargetConstant(8, dl, MVT::i32)), + 0); + // Return { result, chain }. + return DAG.getNode(ISD::MERGE_VALUES, dl, Op->getVTList(), Result, + CopyRBP.getValue(1)); + } else { + // 32-bit so no special extended frame, create or reuse an existing + // stack slot. + if (!X86FI->getSwiftAsyncContextFrameIdx()) + X86FI->setSwiftAsyncContextFrameIdx( + MF.getFrameInfo().CreateStackObject(4, Align(4), false)); + SDValue Result = + DAG.getFrameIndex(*X86FI->getSwiftAsyncContextFrameIdx(), MVT::i32); + // Return { result, chain }. + return DAG.getNode(ISD::MERGE_VALUES, dl, Op->getVTList(), Result, + Op->getOperand(0)); + } + } + case llvm::Intrinsic::x86_seh_ehregnode: return MarkEHRegistrationNode(Op, DAG); case llvm::Intrinsic::x86_seh_ehguard: @@ -32901,20 +32961,39 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N, case ISD::STRICT_FP_ROUND: case ISD::FP_ROUND: { bool IsStrict = N->isStrictFPOpcode(); + SDValue Chain = IsStrict ? N->getOperand(0) : SDValue(); SDValue Src = N->getOperand(IsStrict ? 1 : 0); + SDValue Rnd = N->getOperand(IsStrict ? 2 : 1); + EVT SrcVT = Src.getValueType(); EVT VT = N->getValueType(0); - EVT NewVT = VT.getVectorElementType() == MVT::f16 ? MVT::v8f16 : MVT::v4f32; + SDValue V; if (VT == MVT::v2f16 && Src.getValueType() == MVT::v2f32) { SDValue Ext = IsStrict ? DAG.getConstantFP(0.0, dl, MVT::v2f32) : DAG.getUNDEF(MVT::v2f32); Src = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4f32, Src, Ext); } + if (!Subtarget.hasFP16() && VT.getVectorElementType() == MVT::f16) { + assert(Subtarget.hasF16C() && "Cannot widen f16 without F16C"); + if (SrcVT.getVectorElementType() != MVT::f32) + return; + + if (IsStrict) + V = DAG.getNode(X86ISD::STRICT_CVTPS2PH, dl, {MVT::v8i16, MVT::Other}, + {Chain, Src, Rnd}); + else + V = DAG.getNode(X86ISD::CVTPS2PH, dl, MVT::v8i16, Src, Rnd); + + Results.push_back(DAG.getBitcast(MVT::v8f16, V)); + if (IsStrict) + Results.push_back(V.getValue(1)); + return; + } if (!isTypeLegal(Src.getValueType())) return; - SDValue V; + EVT NewVT = VT.getVectorElementType() == MVT::f16 ? MVT::v8f16 : MVT::v4f32; if (IsStrict) V = DAG.getNode(X86ISD::STRICT_VFPROUND, dl, {NewVT, MVT::Other}, - {N->getOperand(0), Src}); + {Chain, Src}); else V = DAG.getNode(X86ISD::VFPROUND, dl, NewVT, Src); Results.push_back(V); @@ -37342,6 +37421,7 @@ static bool matchBinaryShuffle(MVT MaskVT, ArrayRef<int> Mask, bool IsUnary) { unsigned NumMaskElts = Mask.size(); unsigned EltSizeInBits = MaskVT.getScalarSizeInBits(); + unsigned SizeInBits = MaskVT.getSizeInBits(); if (MaskVT.is128BitVector()) { if (isTargetShuffleEquivalent(MaskVT, Mask, {0, 0}, DAG) && @@ -37409,7 +37489,10 @@ static bool matchBinaryShuffle(MVT MaskVT, ArrayRef<int> Mask, // Attempt to match against a OR if we're performing a blend shuffle and the // non-blended source element is zero in each case. - if ((EltSizeInBits % V1.getScalarValueSizeInBits()) == 0 && + // TODO: Handle cases where V1/V2 sizes doesn't match SizeInBits. + if (SizeInBits == V1.getValueSizeInBits() && + SizeInBits == V2.getValueSizeInBits() && + (EltSizeInBits % V1.getScalarValueSizeInBits()) == 0 && (EltSizeInBits % V2.getScalarValueSizeInBits()) == 0) { bool IsBlend = true; unsigned NumV1Elts = V1.getValueType().getVectorNumElements(); @@ -39652,11 +39735,6 @@ static SDValue combineTargetShuffle(SDValue N, SelectionDAG &DAG, SmallVector<int, 4> Mask; unsigned Opcode = N.getOpcode(); - // FIXME: Remove this after we support vector FP16 - if (isSoftFP16(peekThroughBitcasts(N.getOperand(0)).getSimpleValueType(), - Subtarget)) - return SDValue(); - if (SDValue R = combineCommutableSHUFP(N, VT, DL, DAG)) return R; @@ -40947,12 +41025,20 @@ bool X86TargetLowering::SimplifyDemandedVectorEltsForTargetNode( EltBits)) { OpBits.clearAllBits(); OpElts.clearAllBits(); - for (int I = 0; I != NumElts; ++I) - if (DemandedElts[I] && ((Invert && !EltBits[I].isAllOnes()) || - (!Invert && !EltBits[I].isZero()))) { + for (int I = 0; I != NumElts; ++I) { + if (!DemandedElts[I]) + continue; + if (UndefElts[I]) { + // We can't assume an undef src element gives an undef dst - the + // other src might be zero. + OpBits.setAllBits(); + OpElts.setBit(I); + } else if ((Invert && !EltBits[I].isAllOnes()) || + (!Invert && !EltBits[I].isZero())) { OpBits |= Invert ? ~EltBits[I] : EltBits[I]; OpElts.setBit(I); } + } } return std::make_pair(OpBits, OpElts); }; @@ -44715,7 +44801,7 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG, } // Early exit check - if (!TLI.isTypeLegal(VT)) + if (!TLI.isTypeLegal(VT) || isSoftFP16(VT, Subtarget)) return SDValue(); if (SDValue V = combineVSelectWithAllOnesOrZeros(N, DAG, DCI, Subtarget)) @@ -47798,11 +47884,17 @@ static SDValue combineAnd(SDNode *N, SelectionDAG &DAG, EltBits)) { DemandedBits.clearAllBits(); DemandedElts.clearAllBits(); - for (int I = 0; I != NumElts; ++I) - if (!EltBits[I].isZero()) { + for (int I = 0; I != NumElts; ++I) { + if (UndefElts[I]) { + // We can't assume an undef src element gives an undef dst - the + // other src might be zero. + DemandedBits.setAllBits(); + DemandedElts.setBit(I); + } else if (!EltBits[I].isZero()) { DemandedBits |= EltBits[I]; DemandedElts.setBit(I); } + } } return std::make_pair(DemandedBits, DemandedElts); }; @@ -51042,6 +51134,8 @@ static SDValue combineAndnp(SDNode *N, SelectionDAG &DAG, SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); MVT VT = N->getSimpleValueType(0); + int NumElts = VT.getVectorNumElements(); + unsigned EltSizeInBits = VT.getScalarSizeInBits(); // ANDNP(undef, x) -> 0 // ANDNP(x, undef) -> 0 @@ -51060,6 +51154,19 @@ static SDValue combineAndnp(SDNode *N, SelectionDAG &DAG, if (SDValue Not = IsNOT(N0, DAG)) return DAG.getNode(ISD::AND, SDLoc(N), VT, DAG.getBitcast(VT, Not), N1); + // Constant Folding + APInt Undefs0, Undefs1; + SmallVector<APInt> EltBits0, EltBits1; + if (getTargetConstantBitsFromNode(N0, EltSizeInBits, Undefs0, EltBits0) && + getTargetConstantBitsFromNode(N1, EltSizeInBits, Undefs1, EltBits1)) { + SDLoc DL(N); + SmallVector<APInt> ResultBits; + for (int I = 0; I != NumElts; ++I) + ResultBits.push_back(~EltBits0[I] & EltBits1[I]); + APInt ResultUndefs = APInt::getZero(NumElts); + return getConstVector(ResultBits, ResultUndefs, VT, DAG, DL); + } + // TODO: Constant fold NOT(N0) to allow us to use AND. // TODO: Do this in IsNOT with suitable oneuse checks? @@ -51074,20 +51181,24 @@ static SDValue combineAndnp(SDNode *N, SelectionDAG &DAG, auto GetDemandedMasks = [&](SDValue Op, bool Invert = false) { APInt UndefElts; SmallVector<APInt> EltBits; - int NumElts = VT.getVectorNumElements(); - int EltSizeInBits = VT.getScalarSizeInBits(); APInt DemandedBits = APInt::getAllOnes(EltSizeInBits); APInt DemandedElts = APInt::getAllOnes(NumElts); if (getTargetConstantBitsFromNode(Op, EltSizeInBits, UndefElts, EltBits)) { DemandedBits.clearAllBits(); DemandedElts.clearAllBits(); - for (int I = 0; I != NumElts; ++I) - if ((Invert && !EltBits[I].isAllOnes()) || - (!Invert && !EltBits[I].isZero())) { + for (int I = 0; I != NumElts; ++I) { + if (UndefElts[I]) { + // We can't assume an undef src element gives an undef dst - the + // other src might be zero. + DemandedBits.setAllBits(); + DemandedElts.setBit(I); + } else if ((Invert && !EltBits[I].isAllOnes()) || + (!Invert && !EltBits[I].isZero())) { DemandedBits |= Invert ? ~EltBits[I] : EltBits[I]; DemandedElts.setBit(I); } + } } return std::make_pair(DemandedBits, DemandedElts); }; @@ -54714,8 +54825,9 @@ static SDValue combineFP_ROUND(SDNode *N, SelectionDAG &DAG, if (Subtarget.hasFP16()) return SDValue(); + bool IsStrict = N->isStrictFPOpcode(); EVT VT = N->getValueType(0); - SDValue Src = N->getOperand(0); + SDValue Src = N->getOperand(IsStrict ? 1 : 0); EVT SrcVT = Src.getValueType(); if (!VT.isVector() || VT.getVectorElementType() != MVT::f16 || @@ -54736,8 +54848,15 @@ static SDValue combineFP_ROUND(SDNode *N, SelectionDAG &DAG, // Destination is v8i16 with at least 8 elements. EVT CvtVT = EVT::getVectorVT(*DAG.getContext(), MVT::i16, std::max(8U, NumElts)); - SDValue Cvt = DAG.getNode(X86ISD::CVTPS2PH, dl, CvtVT, Src, - DAG.getTargetConstant(4, dl, MVT::i32)); + SDValue Cvt, Chain; + SDValue Rnd = DAG.getTargetConstant(4, dl, MVT::i32); + if (IsStrict) { + Cvt = DAG.getNode(X86ISD::STRICT_CVTPS2PH, dl, {CvtVT, MVT::Other}, + {N->getOperand(0), Src, Rnd}); + Chain = Cvt.getValue(1); + } else { + Cvt = DAG.getNode(X86ISD::CVTPS2PH, dl, CvtVT, Src, Rnd); + } // Extract down to real number of elements. if (NumElts < 8) { @@ -54746,7 +54865,12 @@ static SDValue combineFP_ROUND(SDNode *N, SelectionDAG &DAG, DAG.getIntPtrConstant(0, dl)); } - return DAG.getBitcast(VT, Cvt); + Cvt = DAG.getBitcast(VT, Cvt); + + if (IsStrict) + return DAG.getMergeValues({Cvt, Chain}, dl); + + return Cvt; } static SDValue combineMOVDQ2Q(SDNode *N, SelectionDAG &DAG) { @@ -54954,6 +55078,7 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N, case ISD::FP16_TO_FP: return combineFP16_TO_FP(N, DAG, Subtarget); case ISD::STRICT_FP_EXTEND: case ISD::FP_EXTEND: return combineFP_EXTEND(N, DAG, Subtarget); + case ISD::STRICT_FP_ROUND: case ISD::FP_ROUND: return combineFP_ROUND(N, DAG, Subtarget); case X86ISD::VBROADCAST_LOAD: case X86ISD::SUBV_BROADCAST_LOAD: return combineBROADCAST_LOAD(N, DAG, DCI); diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td index 48da7b3ac882..c105bde78ad1 100644 --- a/llvm/lib/Target/X86/X86InstrAVX512.td +++ b/llvm/lib/Target/X86/X86InstrAVX512.td @@ -3769,12 +3769,16 @@ let Predicates = [HasAVX512] in { (VMOVDQA64Zrm addr:$src)>; def : Pat<(alignedloadv32i16 addr:$src), (VMOVDQA64Zrm addr:$src)>; + def : Pat<(alignedloadv32f16 addr:$src), + (VMOVAPSZrm addr:$src)>; def : Pat<(alignedloadv64i8 addr:$src), (VMOVDQA64Zrm addr:$src)>; def : Pat<(loadv16i32 addr:$src), (VMOVDQU64Zrm addr:$src)>; def : Pat<(loadv32i16 addr:$src), (VMOVDQU64Zrm addr:$src)>; + def : Pat<(loadv32f16 addr:$src), + (VMOVUPSZrm addr:$src)>; def : Pat<(loadv64i8 addr:$src), (VMOVDQU64Zrm addr:$src)>; @@ -3783,12 +3787,16 @@ let Predicates = [HasAVX512] in { (VMOVDQA64Zmr addr:$dst, VR512:$src)>; def : Pat<(alignedstore (v32i16 VR512:$src), addr:$dst), (VMOVDQA64Zmr addr:$dst, VR512:$src)>; + def : Pat<(alignedstore (v32f16 VR512:$src), addr:$dst), + (VMOVAPSZmr addr:$dst, VR512:$src)>; def : Pat<(alignedstore (v64i8 VR512:$src), addr:$dst), (VMOVDQA64Zmr addr:$dst, VR512:$src)>; def : Pat<(store (v16i32 VR512:$src), addr:$dst), (VMOVDQU64Zmr addr:$dst, VR512:$src)>; def : Pat<(store (v32i16 VR512:$src), addr:$dst), (VMOVDQU64Zmr addr:$dst, VR512:$src)>; + def : Pat<(store (v32f16 VR512:$src), addr:$dst), + (VMOVUPSZmr addr:$dst, VR512:$src)>; def : Pat<(store (v64i8 VR512:$src), addr:$dst), (VMOVDQU64Zmr addr:$dst, VR512:$src)>; } @@ -3799,12 +3807,16 @@ let Predicates = [HasVLX] in { (VMOVDQA64Z128rm addr:$src)>; def : Pat<(alignedloadv8i16 addr:$src), (VMOVDQA64Z128rm addr:$src)>; + def : Pat<(alignedloadv8f16 addr:$src), + (VMOVAPSZ128rm addr:$src)>; def : Pat<(alignedloadv16i8 addr:$src), (VMOVDQA64Z128rm addr:$src)>; def : Pat<(loadv4i32 addr:$src), (VMOVDQU64Z128rm addr:$src)>; def : Pat<(loadv8i16 addr:$src), (VMOVDQU64Z128rm addr:$src)>; + def : Pat<(loadv8f16 addr:$src), + (VMOVUPSZ128rm addr:$src)>; def : Pat<(loadv16i8 addr:$src), (VMOVDQU64Z128rm addr:$src)>; @@ -3813,12 +3825,16 @@ let Predicates = [HasVLX] in { (VMOVDQA64Z128mr addr:$dst, VR128X:$src)>; def : Pat<(alignedstore (v8i16 VR128X:$src), addr:$dst), (VMOVDQA64Z128mr addr:$dst, VR128X:$src)>; + def : Pat<(alignedstore (v8f16 VR128X:$src), addr:$dst), + (VMOVAPSZ128mr addr:$dst, VR128X:$src)>; def : Pat<(alignedstore (v16i8 VR128X:$src), addr:$dst), (VMOVDQA64Z128mr addr:$dst, VR128X:$src)>; def : Pat<(store (v4i32 VR128X:$src), addr:$dst), (VMOVDQU64Z128mr addr:$dst, VR128X:$src)>; def : Pat<(store (v8i16 VR128X:$src), addr:$dst), (VMOVDQU64Z128mr addr:$dst, VR128X:$src)>; + def : Pat<(store (v8f16 VR128X:$src), addr:$dst), + (VMOVUPSZ128mr addr:$dst, VR128X:$src)>; def : Pat<(store (v16i8 VR128X:$src), addr:$dst), (VMOVDQU64Z128mr addr:$dst, VR128X:$src)>; @@ -3827,12 +3843,16 @@ let Predicates = [HasVLX] in { (VMOVDQA64Z256rm addr:$src)>; def : Pat<(alignedloadv16i16 addr:$src), (VMOVDQA64Z256rm addr:$src)>; + def : Pat<(alignedloadv16f16 addr:$src), + (VMOVAPSZ256rm addr:$src)>; def : Pat<(alignedloadv32i8 addr:$src), (VMOVDQA64Z256rm addr:$src)>; def : Pat<(loadv8i32 addr:$src), (VMOVDQU64Z256rm addr:$src)>; def : Pat<(loadv16i16 addr:$src), (VMOVDQU64Z256rm addr:$src)>; + def : Pat<(loadv16f16 addr:$src), + (VMOVUPSZ256rm addr:$src)>; def : Pat<(loadv32i8 addr:$src), (VMOVDQU64Z256rm addr:$src)>; @@ -3841,12 +3861,16 @@ let Predicates = [HasVLX] in { (VMOVDQA64Z256mr addr:$dst, VR256X:$src)>; def : Pat<(alignedstore (v16i16 VR256X:$src), addr:$dst), (VMOVDQA64Z256mr addr:$dst, VR256X:$src)>; + def : Pat<(alignedstore (v16f16 VR256X:$src), addr:$dst), + (VMOVAPSZ256mr addr:$dst, VR256X:$src)>; def : Pat<(alignedstore (v32i8 VR256X:$src), addr:$dst), (VMOVDQA64Z256mr addr:$dst, VR256X:$src)>; def : Pat<(store (v8i32 VR256X:$src), addr:$dst), (VMOVDQU64Z256mr addr:$dst, VR256X:$src)>; def : Pat<(store (v16i16 VR256X:$src), addr:$dst), (VMOVDQU64Z256mr addr:$dst, VR256X:$src)>; + def : Pat<(store (v16f16 VR256X:$src), addr:$dst), + (VMOVUPSZ256mr addr:$dst, VR256X:$src)>; def : Pat<(store (v32i8 VR256X:$src), addr:$dst), (VMOVDQU64Z256mr addr:$dst, VR256X:$src)>; } @@ -3855,16 +3879,12 @@ let Predicates = [HasBWI] in { (VMOVDQU16Zrrk VR512:$src0, VK32WM:$mask, VR512:$src1)>; def : Pat<(v32f16 (vselect VK32WM:$mask, (v32f16 VR512:$src1), v32f16_info.ImmAllZerosV)), (VMOVDQU16Zrrkz VK32WM:$mask, VR512:$src1)>; - def : Pat<(v32f16 (alignedloadv32f16 addr:$src)), - (VMOVAPSZrm addr:$src)>; def : Pat<(v32f16 (vselect VK32WM:$mask, (v32f16 (alignedloadv32f16 addr:$src)), (v32f16 VR512:$src0))), (VMOVDQU16Zrmk VR512:$src0, VK32WM:$mask, addr:$src)>; def : Pat<(v32f16 (vselect VK32WM:$mask, (v32f16 (alignedloadv32f16 addr:$src)), v32f16_info.ImmAllZerosV)), (VMOVDQU16Zrmkz VK32WM:$mask, addr:$src)>; - def : Pat<(v32f16 (loadv32f16 addr:$src)), - (VMOVUPSZrm addr:$src)>; def : Pat<(v32f16 (vselect VK32WM:$mask, (v32f16 (loadv32f16 addr:$src)), (v32f16 VR512:$src0))), (VMOVDQU16Zrmk VR512:$src0, VK32WM:$mask, addr:$src)>; @@ -3878,10 +3898,6 @@ let Predicates = [HasBWI] in { def : Pat<(v32f16 (masked_load addr:$src, VK32WM:$mask, v32f16_info.ImmAllZerosV)), (VMOVDQU16Zrmkz VK32WM:$mask, addr:$src)>; - def : Pat<(alignedstore (v32f16 VR512:$src), addr:$dst), - (VMOVAPSZmr addr:$dst, VR512:$src)>; - def : Pat<(store (v32f16 VR512:$src), addr:$dst), - (VMOVUPSZmr addr:$dst, VR512:$src)>; def : Pat<(masked_store (v32f16 VR512:$src), addr:$dst, VK32WM:$mask), (VMOVDQU16Zmrk addr:$dst, VK32WM:$mask, VR512:$src)>; } @@ -3890,16 +3906,12 @@ let Predicates = [HasBWI, HasVLX] in { (VMOVDQU16Z256rrk VR256X:$src0, VK16WM:$mask, VR256X:$src1)>; def : Pat<(v16f16 (vselect VK16WM:$mask, (v16f16 VR256X:$src1), v16f16x_info.ImmAllZerosV)), (VMOVDQU16Z256rrkz VK16WM:$mask, VR256X:$src1)>; - def : Pat<(v16f16 (alignedloadv16f16 addr:$src)), - (VMOVAPSZ256rm addr:$src)>; def : Pat<(v16f16 (vselect VK16WM:$mask, (v16f16 (alignedloadv16f16 addr:$src)), (v16f16 VR256X:$src0))), (VMOVDQU16Z256rmk VR256X:$src0, VK16WM:$mask, addr:$src)>; def : Pat<(v16f16 (vselect VK16WM:$mask, (v16f16 (alignedloadv16f16 addr:$src)), v16f16x_info.ImmAllZerosV)), (VMOVDQU16Z256rmkz VK16WM:$mask, addr:$src)>; - def : Pat<(v16f16 (loadv16f16 addr:$src)), - (VMOVUPSZ256rm addr:$src)>; def : Pat<(v16f16 (vselect VK16WM:$mask, (v16f16 (loadv16f16 addr:$src)), (v16f16 VR256X:$src0))), (VMOVDQU16Z256rmk VR256X:$src0, VK16WM:$mask, addr:$src)>; @@ -3913,10 +3925,6 @@ let Predicates = [HasBWI, HasVLX] in { def : Pat<(v16f16 (masked_load addr:$src, VK16WM:$mask, v16f16x_info.ImmAllZerosV)), (VMOVDQU16Z256rmkz VK16WM:$mask, addr:$src)>; - def : Pat<(alignedstore (v16f16 VR256X:$src), addr:$dst), - (VMOVAPSZ256mr addr:$dst, VR256X:$src)>; - def : Pat<(store (v16f16 VR256X:$src), addr:$dst), - (VMOVUPSZ256mr addr:$dst, VR256X:$src)>; def : Pat<(masked_store (v16f16 VR256X:$src), addr:$dst, VK16WM:$mask), (VMOVDQU16Z256mrk addr:$dst, VK16WM:$mask, VR256X:$src)>; @@ -3924,16 +3932,12 @@ let Predicates = [HasBWI, HasVLX] in { (VMOVDQU16Z128rrk VR128X:$src0, VK8WM:$mask, VR128X:$src1)>; def : Pat<(v8f16 (vselect VK8WM:$mask, (v8f16 VR128X:$src1), v8f16x_info.ImmAllZerosV)), (VMOVDQU16Z128rrkz VK8WM:$mask, VR128X:$src1)>; - def : Pat<(v8f16 (alignedloadv8f16 addr:$src)), - (VMOVAPSZ128rm addr:$src)>; def : Pat<(v8f16 (vselect VK8WM:$mask, (v8f16 (alignedloadv8f16 addr:$src)), (v8f16 VR128X:$src0))), (VMOVDQU16Z128rmk VR128X:$src0, VK8WM:$mask, addr:$src)>; def : Pat<(v8f16 (vselect VK8WM:$mask, (v8f16 (alignedloadv8f16 addr:$src)), v8f16x_info.ImmAllZerosV)), (VMOVDQU16Z128rmkz VK8WM:$mask, addr:$src)>; - def : Pat<(v8f16 (loadv8f16 addr:$src)), - (VMOVUPSZ128rm addr:$src)>; def : Pat<(v8f16 (vselect VK8WM:$mask, (v8f16 (loadv8f16 addr:$src)), (v8f16 VR128X:$src0))), (VMOVDQU16Z128rmk VR128X:$src0, VK8WM:$mask, addr:$src)>; @@ -3947,10 +3951,6 @@ let Predicates = [HasBWI, HasVLX] in { def : Pat<(v8f16 (masked_load addr:$src, VK8WM:$mask, v8f16x_info.ImmAllZerosV)), (VMOVDQU16Z128rmkz VK8WM:$mask, addr:$src)>; - def : Pat<(alignedstore (v8f16 VR128X:$src), addr:$dst), - (VMOVAPSZ128mr addr:$dst, VR128X:$src)>; - def : Pat<(store (v8f16 VR128X:$src), addr:$dst), - (VMOVUPSZ128mr addr:$dst, VR128X:$src)>; def : Pat<(masked_store (v8f16 VR128X:$src), addr:$dst, VK8WM:$mask), (VMOVDQU16Z128mrk addr:$dst, VK8WM:$mask, VR128X:$src)>; } diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp index ec32ac2acad1..74ef831e1658 100644 --- a/llvm/lib/Target/X86/X86InstrInfo.cpp +++ b/llvm/lib/Target/X86/X86InstrInfo.cpp @@ -742,8 +742,8 @@ static bool regIsPICBase(Register BaseReg, const MachineRegisterInfo &MRI) { return isPICBase; } -bool X86InstrInfo::isReallyTriviallyReMaterializable(const MachineInstr &MI, - AAResults *AA) const { +bool X86InstrInfo::isReallyTriviallyReMaterializable( + const MachineInstr &MI) const { switch (MI.getOpcode()) { default: // This function should only be called for opcodes with the ReMaterializable @@ -869,7 +869,7 @@ bool X86InstrInfo::isReallyTriviallyReMaterializable(const MachineInstr &MI, MI.getOperand(1 + X86::AddrScaleAmt).isImm() && MI.getOperand(1 + X86::AddrIndexReg).isReg() && MI.getOperand(1 + X86::AddrIndexReg).getReg() == 0 && - MI.isDereferenceableInvariantLoad(AA)) { + MI.isDereferenceableInvariantLoad()) { Register BaseReg = MI.getOperand(1 + X86::AddrBaseReg).getReg(); if (BaseReg == 0 || BaseReg == X86::RIP) return true; @@ -3892,6 +3892,10 @@ void X86InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, Register DestReg, int FrameIdx, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI) const { + const MachineFunction &MF = *MBB.getParent(); + const MachineFrameInfo &MFI = MF.getFrameInfo(); + assert(MFI.getObjectSize(FrameIdx) >= TRI->getSpillSize(*RC) && + "Load size exceeds stack slot"); if (RC->getID() == X86::TILERegClassID) { unsigned Opc = X86::TILELOADD; // tileloadd (%sp, %idx), %tmm @@ -3913,8 +3917,6 @@ void X86InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, addFrameReference(BuildMI(MBB, MI, DebugLoc(), get(Opc), DestReg), FrameIdx); } else { - const MachineFunction &MF = *MBB.getParent(); - const MachineFrameInfo &MFI = MF.getFrameInfo(); unsigned Alignment = std::max<uint32_t>(TRI->getSpillSize(*RC), 16); bool isAligned = (Subtarget.getFrameLowering()->getStackAlign() >= Alignment) || diff --git a/llvm/lib/Target/X86/X86InstrInfo.h b/llvm/lib/Target/X86/X86InstrInfo.h index 4943d2152fd2..98da00c39bdb 100644 --- a/llvm/lib/Target/X86/X86InstrInfo.h +++ b/llvm/lib/Target/X86/X86InstrInfo.h @@ -240,8 +240,7 @@ public: unsigned isStoreToStackSlotPostFE(const MachineInstr &MI, int &FrameIndex) const override; - bool isReallyTriviallyReMaterializable(const MachineInstr &MI, - AAResults *AA) const override; + bool isReallyTriviallyReMaterializable(const MachineInstr &MI) const override; void reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register DestReg, unsigned SubIdx, const MachineInstr &Orig, diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td index 06cb280e860a..c5557bd5df4e 100644 --- a/llvm/lib/Target/X86/X86InstrSSE.td +++ b/llvm/lib/Target/X86/X86InstrSSE.td @@ -140,6 +140,7 @@ def V_SET0 : I<0, Pseudo, (outs VR128:$dst), (ins), "", let Predicates = [NoAVX512] in { def : Pat<(v16i8 immAllZerosV), (V_SET0)>; def : Pat<(v8i16 immAllZerosV), (V_SET0)>; +def : Pat<(v8f16 immAllZerosV), (V_SET0)>; def : Pat<(v4i32 immAllZerosV), (V_SET0)>; def : Pat<(v2i64 immAllZerosV), (V_SET0)>; def : Pat<(v2f64 immAllZerosV), (V_SET0)>; @@ -159,6 +160,7 @@ def AVX_SET0 : I<0, Pseudo, (outs VR256:$dst), (ins), "", let Predicates = [NoAVX512] in { def : Pat<(v32i8 immAllZerosV), (AVX_SET0)>; def : Pat<(v16i16 immAllZerosV), (AVX_SET0)>; +def : Pat<(v16f16 immAllZerosV), (AVX_SET0)>; def : Pat<(v4i64 immAllZerosV), (AVX_SET0)>; def : Pat<(v8f32 immAllZerosV), (AVX_SET0)>; def : Pat<(v4f64 immAllZerosV), (AVX_SET0)>; @@ -572,6 +574,23 @@ let Predicates = [HasAVX, NoVLX] in { (VMOVUPSYmr addr:$dst, VR256:$src)>; def : Pat<(store (v32i8 VR256:$src), addr:$dst), (VMOVUPSYmr addr:$dst, VR256:$src)>; + + def : Pat<(alignedloadv8f16 addr:$src), + (VMOVAPSrm addr:$src)>; + def : Pat<(loadv8f16 addr:$src), + (VMOVUPSrm addr:$src)>; + def : Pat<(alignedstore (v8f16 VR128:$src), addr:$dst), + (VMOVAPSmr addr:$dst, VR128:$src)>; + def : Pat<(store (v8f16 VR128:$src), addr:$dst), + (VMOVUPSmr addr:$dst, VR128:$src)>; + def : Pat<(alignedloadv16f16 addr:$src), + (VMOVAPSYrm addr:$src)>; + def : Pat<(loadv16f16 addr:$src), + (VMOVUPSYrm addr:$src)>; + def : Pat<(alignedstore (v16f16 VR256:$src), addr:$dst), + (VMOVAPSYmr addr:$dst, VR256:$src)>; + def : Pat<(store (v16f16 VR256:$src), addr:$dst), + (VMOVUPSYmr addr:$dst, VR256:$src)>; } // Use movaps / movups for SSE integer load / store (one byte shorter). @@ -613,6 +632,17 @@ let Predicates = [UseSSE1] in { (MOVUPSmr addr:$dst, VR128:$src)>; } +let Predicates = [UseSSE2] in { + def : Pat<(alignedloadv8f16 addr:$src), + (MOVAPSrm addr:$src)>; + def : Pat<(loadv8f16 addr:$src), + (MOVUPSrm addr:$src)>; + def : Pat<(alignedstore (v8f16 VR128:$src), addr:$dst), + (MOVAPSmr addr:$dst, VR128:$src)>; + def : Pat<(store (v8f16 VR128:$src), addr:$dst), + (MOVUPSmr addr:$dst, VR128:$src)>; +} + //===----------------------------------------------------------------------===// // SSE 1 & 2 - Move Low packed FP Instructions //===----------------------------------------------------------------------===// @@ -3136,6 +3166,8 @@ let Predicates = [HasAVX, NoVLX] in { (VMOVNTDQYmr addr:$dst, VR256:$src)>; def : Pat<(alignednontemporalstore (v16i16 VR256:$src), addr:$dst), (VMOVNTDQYmr addr:$dst, VR256:$src)>; + def : Pat<(alignednontemporalstore (v16f16 VR256:$src), addr:$dst), + (VMOVNTDQYmr addr:$dst, VR256:$src)>; def : Pat<(alignednontemporalstore (v32i8 VR256:$src), addr:$dst), (VMOVNTDQYmr addr:$dst, VR256:$src)>; @@ -3143,6 +3175,8 @@ let Predicates = [HasAVX, NoVLX] in { (VMOVNTDQmr addr:$dst, VR128:$src)>; def : Pat<(alignednontemporalstore (v8i16 VR128:$src), addr:$dst), (VMOVNTDQmr addr:$dst, VR128:$src)>; + def : Pat<(alignednontemporalstore (v8f16 VR128:$src), addr:$dst), + (VMOVNTDQmr addr:$dst, VR128:$src)>; def : Pat<(alignednontemporalstore (v16i8 VR128:$src), addr:$dst), (VMOVNTDQmr addr:$dst, VR128:$src)>; } @@ -3152,6 +3186,8 @@ let Predicates = [UseSSE2] in { (MOVNTDQmr addr:$dst, VR128:$src)>; def : Pat<(alignednontemporalstore (v8i16 VR128:$src), addr:$dst), (MOVNTDQmr addr:$dst, VR128:$src)>; + def : Pat<(alignednontemporalstore (v8f16 VR128:$src), addr:$dst), + (MOVNTDQmr addr:$dst, VR128:$src)>; def : Pat<(alignednontemporalstore (v16i8 VR128:$src), addr:$dst), (MOVNTDQmr addr:$dst, VR128:$src)>; } @@ -3374,12 +3410,16 @@ let Predicates = [HasAVX, NoVLX] in { (VMOVDQArm addr:$src)>; def : Pat<(alignedloadv8i16 addr:$src), (VMOVDQArm addr:$src)>; + def : Pat<(alignedloadv8f16 addr:$src), + (VMOVDQArm addr:$src)>; def : Pat<(alignedloadv16i8 addr:$src), (VMOVDQArm addr:$src)>; def : Pat<(loadv4i32 addr:$src), (VMOVDQUrm addr:$src)>; def : Pat<(loadv8i16 addr:$src), (VMOVDQUrm addr:$src)>; + def : Pat<(loadv8f16 addr:$src), + (VMOVDQUrm addr:$src)>; def : Pat<(loadv16i8 addr:$src), (VMOVDQUrm addr:$src)>; @@ -3387,12 +3427,16 @@ let Predicates = [HasAVX, NoVLX] in { (VMOVDQAmr addr:$dst, VR128:$src)>; def : Pat<(alignedstore (v8i16 VR128:$src), addr:$dst), (VMOVDQAmr addr:$dst, VR128:$src)>; + def : Pat<(alignedstore (v8f16 VR128:$src), addr:$dst), + (VMOVDQAmr addr:$dst, VR128:$src)>; def : Pat<(alignedstore (v16i8 VR128:$src), addr:$dst), (VMOVDQAmr addr:$dst, VR128:$src)>; def : Pat<(store (v4i32 VR128:$src), addr:$dst), (VMOVDQUmr addr:$dst, VR128:$src)>; def : Pat<(store (v8i16 VR128:$src), addr:$dst), (VMOVDQUmr addr:$dst, VR128:$src)>; + def : Pat<(store (v8f16 VR128:$src), addr:$dst), + (VMOVDQUmr addr:$dst, VR128:$src)>; def : Pat<(store (v16i8 VR128:$src), addr:$dst), (VMOVDQUmr addr:$dst, VR128:$src)>; } @@ -6431,6 +6475,8 @@ let Predicates = [HasAVX2, NoVLX] in { (VMOVNTDQAYrm addr:$src)>; def : Pat<(v16i16 (alignednontemporalload addr:$src)), (VMOVNTDQAYrm addr:$src)>; + def : Pat<(v16f16 (alignednontemporalload addr:$src)), + (VMOVNTDQAYrm addr:$src)>; def : Pat<(v32i8 (alignednontemporalload addr:$src)), (VMOVNTDQAYrm addr:$src)>; } @@ -6446,6 +6492,8 @@ let Predicates = [HasAVX, NoVLX] in { (VMOVNTDQArm addr:$src)>; def : Pat<(v8i16 (alignednontemporalload addr:$src)), (VMOVNTDQArm addr:$src)>; + def : Pat<(v8f16 (alignednontemporalload addr:$src)), + (VMOVNTDQArm addr:$src)>; def : Pat<(v16i8 (alignednontemporalload addr:$src)), (VMOVNTDQArm addr:$src)>; } @@ -6461,6 +6509,8 @@ let Predicates = [UseSSE41] in { (MOVNTDQArm addr:$src)>; def : Pat<(v8i16 (alignednontemporalload addr:$src)), (MOVNTDQArm addr:$src)>; + def : Pat<(v8f16 (alignednontemporalload addr:$src)), + (MOVNTDQArm addr:$src)>; def : Pat<(v16i8 (alignednontemporalload addr:$src)), (MOVNTDQArm addr:$src)>; } @@ -7050,6 +7100,8 @@ def : Pat<(v8i32 (X86SubVBroadcastld128 addr:$src)), (VBROADCASTF128 addr:$src)>; def : Pat<(v16i16 (X86SubVBroadcastld128 addr:$src)), (VBROADCASTF128 addr:$src)>; +def : Pat<(v16f16 (X86SubVBroadcastld128 addr:$src)), + (VBROADCASTF128 addr:$src)>; def : Pat<(v32i8 (X86SubVBroadcastld128 addr:$src)), (VBROADCASTF128 addr:$src)>; } @@ -7095,6 +7147,7 @@ let Predicates = [HasAVX1Only] in { defm : vperm2x128_lowering<"VPERM2F128", v4i64, loadv4i64>; defm : vperm2x128_lowering<"VPERM2F128", v8i32, loadv8i32>; defm : vperm2x128_lowering<"VPERM2F128", v16i16, loadv16i16>; + defm : vperm2x128_lowering<"VPERM2F128", v16f16, loadv16f16>; defm : vperm2x128_lowering<"VPERM2F128", v32i8, loadv32i8>; } @@ -7150,6 +7203,8 @@ let Predicates = [HasAVX1Only] in { defm : vinsert_lowering<"VINSERTF128", "VPERM2F128", v2i64, v4i64, loadv2i64, loadv4i64>; defm : vinsert_lowering<"VINSERTF128", "VPERM2F128", v4i32, v8i32, loadv4i32, loadv8i32>; defm : vinsert_lowering<"VINSERTF128", "VPERM2F128", v8i16, v16i16, loadv8i16, loadv16i16>; + defm : vinsert_lowering<"VINSERTF128", "VPERM2F128", v8f16, v16f16, loadv8f16, loadv16f16>; + defm : vinsert_lowering<"VINSERTF128", "VPERM2F128", v16i8, v32i8, loadv16i8, loadv32i8>; defm : vinsert_lowering<"VINSERTF128", "VPERM2F128", v16i8, v32i8, loadv16i8, loadv32i8>; } @@ -7189,6 +7244,8 @@ let Predicates = [HasAVX1Only] in { defm : vextract_lowering<"VEXTRACTF128", v4i64, v2i64>; defm : vextract_lowering<"VEXTRACTF128", v8i32, v4i32>; defm : vextract_lowering<"VEXTRACTF128", v16i16, v8i16>; + defm : vextract_lowering<"VEXTRACTF128", v16f16, v8f16>; + defm : vextract_lowering<"VEXTRACTF128", v32i8, v16i8>; defm : vextract_lowering<"VEXTRACTF128", v32i8, v16i8>; } @@ -7503,6 +7560,10 @@ def : Pat<(insert_subvector (v16i16 VR256:$src1), (v8i16 VR128:$src2), (iPTR 0)) (VBLENDPSYrri VR256:$src1, (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)), VR128:$src2, sub_xmm), 0xf)>; +def : Pat<(insert_subvector (v16f16 VR256:$src1), (v8f16 VR128:$src2), (iPTR 0)), + (VBLENDPSYrri VR256:$src1, + (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)), + VR128:$src2, sub_xmm), 0xf)>; def : Pat<(insert_subvector (v32i8 VR256:$src1), (v16i8 VR128:$src2), (iPTR 0)), (VBLENDPSYrri VR256:$src1, (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)), @@ -7517,6 +7578,9 @@ def : Pat<(insert_subvector (loadv4i64 addr:$src2), (v2i64 VR128:$src1), (iPTR 0 def : Pat<(insert_subvector (loadv16i16 addr:$src2), (v8i16 VR128:$src1), (iPTR 0)), (VBLENDPSYrmi (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)), VR128:$src1, sub_xmm), addr:$src2, 0xf0)>; +def : Pat<(insert_subvector (loadv16f16 addr:$src2), (v8f16 VR128:$src1), (iPTR 0)), + (VBLENDPSYrmi (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)), + VR128:$src1, sub_xmm), addr:$src2, 0xf0)>; def : Pat<(insert_subvector (loadv32i8 addr:$src2), (v16i8 VR128:$src1), (iPTR 0)), (VBLENDPSYrmi (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)), VR128:$src1, sub_xmm), addr:$src2, 0xf0)>; @@ -7759,6 +7823,8 @@ let Predicates = [HasAVX2] in { defm : vperm2x128_lowering<"VPERM2I128", v4i64, loadv4i64>; defm : vperm2x128_lowering<"VPERM2I128", v8i32, loadv8i32>; defm : vperm2x128_lowering<"VPERM2I128", v16i16, loadv16i16>; + defm : vperm2x128_lowering<"VPERM2I128", v16f16, loadv16f16>; + defm : vperm2x128_lowering<"VPERM2I128", v32i8, loadv32i8>; defm : vperm2x128_lowering<"VPERM2I128", v32i8, loadv32i8>; } @@ -7781,6 +7847,8 @@ let Predicates = [HasAVX2, NoVLX] in { defm : vinsert_lowering<"VINSERTI128", "VPERM2I128", v2i64, v4i64, loadv2i64, loadv4i64>; defm : vinsert_lowering<"VINSERTI128", "VPERM2I128", v4i32, v8i32, loadv4i32, loadv8i32>; defm : vinsert_lowering<"VINSERTI128", "VPERM2I128", v8i16, v16i16, loadv8i16, loadv16i16>; + defm : vinsert_lowering<"VINSERTI128", "VPERM2I128", v8f16, v16f16, loadv8f16, loadv16f16>; + defm : vinsert_lowering<"VINSERTI128", "VPERM2I128", v16i8, v32i8, loadv16i8, loadv32i8>; defm : vinsert_lowering<"VINSERTI128", "VPERM2I128", v16i8, v32i8, loadv16i8, loadv32i8>; } @@ -7801,6 +7869,8 @@ let Predicates = [HasAVX2, NoVLX] in { defm : vextract_lowering<"VEXTRACTI128", v4i64, v2i64>; defm : vextract_lowering<"VEXTRACTI128", v8i32, v4i32>; defm : vextract_lowering<"VEXTRACTI128", v16i16, v8i16>; + defm : vextract_lowering<"VEXTRACTI128", v16f16, v8f16>; + defm : vextract_lowering<"VEXTRACTI128", v32i8, v16i8>; defm : vextract_lowering<"VEXTRACTI128", v32i8, v16i8>; } diff --git a/llvm/lib/Target/X86/X86TargetMachine.cpp b/llvm/lib/Target/X86/X86TargetMachine.cpp index f4e25e4194db..1de2a1725954 100644 --- a/llvm/lib/Target/X86/X86TargetMachine.cpp +++ b/llvm/lib/Target/X86/X86TargetMachine.cpp @@ -254,8 +254,12 @@ X86TargetMachine::getSubtargetImpl(const Function &F) const { StringRef CPU = CPUAttr.isValid() ? CPUAttr.getValueAsString() : (StringRef)TargetCPU; - StringRef TuneCPU = - TuneAttr.isValid() ? TuneAttr.getValueAsString() : (StringRef)CPU; + // "x86-64" is a default target setting for many front ends. In these cases, + // they actually request for "generic" tuning unless the "tune-cpu" was + // specified. + StringRef TuneCPU = TuneAttr.isValid() ? TuneAttr.getValueAsString() + : CPU == "x86-64" ? "generic" + : (StringRef)CPU; StringRef FS = FSAttr.isValid() ? FSAttr.getValueAsString() : (StringRef)TargetFS; diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp index b36f8a3d06d0..b27aac9c4e93 100644 --- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp @@ -1297,29 +1297,6 @@ InstructionCost X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, LT.first = NumOfDests * NumOfShufflesPerDest; } - static const CostTblEntry AVX512FP16ShuffleTbl[] = { - {TTI::SK_Broadcast, MVT::v32f16, 1}, // vpbroadcastw - {TTI::SK_Broadcast, MVT::v16f16, 1}, // vpbroadcastw - {TTI::SK_Broadcast, MVT::v8f16, 1}, // vpbroadcastw - - {TTI::SK_Reverse, MVT::v32f16, 2}, // vpermw - {TTI::SK_Reverse, MVT::v16f16, 2}, // vpermw - {TTI::SK_Reverse, MVT::v8f16, 1}, // vpshufb - - {TTI::SK_PermuteSingleSrc, MVT::v32f16, 2}, // vpermw - {TTI::SK_PermuteSingleSrc, MVT::v16f16, 2}, // vpermw - {TTI::SK_PermuteSingleSrc, MVT::v8f16, 1}, // vpshufb - - {TTI::SK_PermuteTwoSrc, MVT::v32f16, 2}, // vpermt2w - {TTI::SK_PermuteTwoSrc, MVT::v16f16, 2}, // vpermt2w - {TTI::SK_PermuteTwoSrc, MVT::v8f16, 2} // vpermt2w - }; - - if (!ST->useSoftFloat() && ST->hasFP16()) - if (const auto *Entry = - CostTableLookup(AVX512FP16ShuffleTbl, Kind, LT.second)) - return LT.first * Entry->Cost; - static const CostTblEntry AVX512VBMIShuffleTbl[] = { {TTI::SK_Reverse, MVT::v64i8, 1}, // vpermb {TTI::SK_Reverse, MVT::v32i8, 1}, // vpermb @@ -1339,17 +1316,22 @@ InstructionCost X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, static const CostTblEntry AVX512BWShuffleTbl[] = { {TTI::SK_Broadcast, MVT::v32i16, 1}, // vpbroadcastw + {TTI::SK_Broadcast, MVT::v32f16, 1}, // vpbroadcastw {TTI::SK_Broadcast, MVT::v64i8, 1}, // vpbroadcastb {TTI::SK_Reverse, MVT::v32i16, 2}, // vpermw + {TTI::SK_Reverse, MVT::v32f16, 2}, // vpermw {TTI::SK_Reverse, MVT::v16i16, 2}, // vpermw {TTI::SK_Reverse, MVT::v64i8, 2}, // pshufb + vshufi64x2 {TTI::SK_PermuteSingleSrc, MVT::v32i16, 2}, // vpermw + {TTI::SK_PermuteSingleSrc, MVT::v32f16, 2}, // vpermw {TTI::SK_PermuteSingleSrc, MVT::v16i16, 2}, // vpermw + {TTI::SK_PermuteSingleSrc, MVT::v16f16, 2}, // vpermw {TTI::SK_PermuteSingleSrc, MVT::v64i8, 8}, // extend to v32i16 {TTI::SK_PermuteTwoSrc, MVT::v32i16, 2}, // vpermt2w + {TTI::SK_PermuteTwoSrc, MVT::v32f16, 2}, // vpermt2w {TTI::SK_PermuteTwoSrc, MVT::v16i16, 2}, // vpermt2w {TTI::SK_PermuteTwoSrc, MVT::v8i16, 2}, // vpermt2w {TTI::SK_PermuteTwoSrc, MVT::v64i8, 19}, // 6 * v32i8 + 1 @@ -1369,6 +1351,7 @@ InstructionCost X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, {TTI::SK_Broadcast, MVT::v8i64, 1}, // vpbroadcastq {TTI::SK_Broadcast, MVT::v16i32, 1}, // vpbroadcastd {TTI::SK_Broadcast, MVT::v32i16, 1}, // vpbroadcastw + {TTI::SK_Broadcast, MVT::v32f16, 1}, // vpbroadcastw {TTI::SK_Broadcast, MVT::v64i8, 1}, // vpbroadcastb {TTI::SK_Reverse, MVT::v8f64, 1}, // vpermpd @@ -1376,6 +1359,7 @@ InstructionCost X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, {TTI::SK_Reverse, MVT::v8i64, 1}, // vpermq {TTI::SK_Reverse, MVT::v16i32, 1}, // vpermd {TTI::SK_Reverse, MVT::v32i16, 7}, // per mca + {TTI::SK_Reverse, MVT::v32f16, 7}, // per mca {TTI::SK_Reverse, MVT::v64i8, 7}, // per mca {TTI::SK_PermuteSingleSrc, MVT::v8f64, 1}, // vpermpd @@ -1408,11 +1392,14 @@ InstructionCost X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, // FIXME: This just applies the type legalization cost rules above // assuming these completely split. {TTI::SK_PermuteSingleSrc, MVT::v32i16, 14}, + {TTI::SK_PermuteSingleSrc, MVT::v32f16, 14}, {TTI::SK_PermuteSingleSrc, MVT::v64i8, 14}, {TTI::SK_PermuteTwoSrc, MVT::v32i16, 42}, + {TTI::SK_PermuteTwoSrc, MVT::v32f16, 42}, {TTI::SK_PermuteTwoSrc, MVT::v64i8, 42}, {TTI::SK_Select, MVT::v32i16, 1}, // vpternlogq + {TTI::SK_Select, MVT::v32f16, 1}, // vpternlogq {TTI::SK_Select, MVT::v64i8, 1}, // vpternlogq {TTI::SK_Select, MVT::v8f64, 1}, // vblendmpd {TTI::SK_Select, MVT::v16f32, 1}, // vblendmps @@ -1430,6 +1417,7 @@ InstructionCost X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, {TTI::SK_Broadcast, MVT::v4i64, 1}, // vpbroadcastq {TTI::SK_Broadcast, MVT::v8i32, 1}, // vpbroadcastd {TTI::SK_Broadcast, MVT::v16i16, 1}, // vpbroadcastw + {TTI::SK_Broadcast, MVT::v16f16, 1}, // vpbroadcastw {TTI::SK_Broadcast, MVT::v32i8, 1}, // vpbroadcastb {TTI::SK_Reverse, MVT::v4f64, 1}, // vpermpd @@ -1437,9 +1425,11 @@ InstructionCost X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, {TTI::SK_Reverse, MVT::v4i64, 1}, // vpermq {TTI::SK_Reverse, MVT::v8i32, 1}, // vpermd {TTI::SK_Reverse, MVT::v16i16, 2}, // vperm2i128 + pshufb + {TTI::SK_Reverse, MVT::v16f16, 2}, // vperm2i128 + pshufb {TTI::SK_Reverse, MVT::v32i8, 2}, // vperm2i128 + pshufb {TTI::SK_Select, MVT::v16i16, 1}, // vpblendvb + {TTI::SK_Select, MVT::v16f16, 1}, // vpblendvb {TTI::SK_Select, MVT::v32i8, 1}, // vpblendvb {TTI::SK_PermuteSingleSrc, MVT::v4f64, 1}, // vpermpd @@ -1448,6 +1438,8 @@ InstructionCost X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, {TTI::SK_PermuteSingleSrc, MVT::v8i32, 1}, // vpermd {TTI::SK_PermuteSingleSrc, MVT::v16i16, 4}, // vperm2i128 + 2*vpshufb // + vpblendvb + {TTI::SK_PermuteSingleSrc, MVT::v16f16, 4}, // vperm2i128 + 2*vpshufb + // + vpblendvb {TTI::SK_PermuteSingleSrc, MVT::v32i8, 4}, // vperm2i128 + 2*vpshufb // + vpblendvb @@ -1457,6 +1449,8 @@ InstructionCost X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, {TTI::SK_PermuteTwoSrc, MVT::v8i32, 3}, // 2*vpermd + vpblendd {TTI::SK_PermuteTwoSrc, MVT::v16i16, 7}, // 2*vperm2i128 + 4*vpshufb // + vpblendvb + {TTI::SK_PermuteTwoSrc, MVT::v16f16, 7}, // 2*vperm2i128 + 4*vpshufb + // + vpblendvb {TTI::SK_PermuteTwoSrc, MVT::v32i8, 7}, // 2*vperm2i128 + 4*vpshufb // + vpblendvb }; @@ -1493,6 +1487,7 @@ InstructionCost X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, {TTI::SK_Broadcast, MVT::v4i64, 2}, // vperm2f128 + vpermilpd {TTI::SK_Broadcast, MVT::v8i32, 2}, // vperm2f128 + vpermilps {TTI::SK_Broadcast, MVT::v16i16, 3}, // vpshuflw + vpshufd + vinsertf128 + {TTI::SK_Broadcast, MVT::v16f16, 3}, // vpshuflw + vpshufd + vinsertf128 {TTI::SK_Broadcast, MVT::v32i8, 2}, // vpshufb + vinsertf128 {TTI::SK_Reverse, MVT::v4f64, 2}, // vperm2f128 + vpermilpd @@ -1501,6 +1496,8 @@ InstructionCost X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, {TTI::SK_Reverse, MVT::v8i32, 2}, // vperm2f128 + vpermilps {TTI::SK_Reverse, MVT::v16i16, 4}, // vextractf128 + 2*pshufb // + vinsertf128 + {TTI::SK_Reverse, MVT::v16f16, 4}, // vextractf128 + 2*pshufb + // + vinsertf128 {TTI::SK_Reverse, MVT::v32i8, 4}, // vextractf128 + 2*pshufb // + vinsertf128 @@ -1509,6 +1506,7 @@ InstructionCost X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, {TTI::SK_Select, MVT::v8i32, 1}, // vblendps {TTI::SK_Select, MVT::v8f32, 1}, // vblendps {TTI::SK_Select, MVT::v16i16, 3}, // vpand + vpandn + vpor + {TTI::SK_Select, MVT::v16f16, 3}, // vpand + vpandn + vpor {TTI::SK_Select, MVT::v32i8, 3}, // vpand + vpandn + vpor {TTI::SK_PermuteSingleSrc, MVT::v4f64, 2}, // vperm2f128 + vshufpd @@ -1517,6 +1515,8 @@ InstructionCost X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, {TTI::SK_PermuteSingleSrc, MVT::v8i32, 4}, // 2*vperm2f128 + 2*vshufps {TTI::SK_PermuteSingleSrc, MVT::v16i16, 8}, // vextractf128 + 4*pshufb // + 2*por + vinsertf128 + {TTI::SK_PermuteSingleSrc, MVT::v16f16, 8}, // vextractf128 + 4*pshufb + // + 2*por + vinsertf128 {TTI::SK_PermuteSingleSrc, MVT::v32i8, 8}, // vextractf128 + 4*pshufb // + 2*por + vinsertf128 @@ -1526,6 +1526,8 @@ InstructionCost X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, {TTI::SK_PermuteTwoSrc, MVT::v8i32, 4}, // 2*vperm2f128 + 2*vshufps {TTI::SK_PermuteTwoSrc, MVT::v16i16, 15}, // 2*vextractf128 + 8*pshufb // + 4*por + vinsertf128 + {TTI::SK_PermuteTwoSrc, MVT::v16f16, 15}, // 2*vextractf128 + 8*pshufb + // + 4*por + vinsertf128 {TTI::SK_PermuteTwoSrc, MVT::v32i8, 15}, // 2*vextractf128 + 8*pshufb // + 4*por + vinsertf128 }; @@ -1540,6 +1542,7 @@ InstructionCost X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, {TTI::SK_Select, MVT::v4i32, 1}, // pblendw {TTI::SK_Select, MVT::v4f32, 1}, // blendps {TTI::SK_Select, MVT::v8i16, 1}, // pblendw + {TTI::SK_Select, MVT::v8f16, 1}, // pblendw {TTI::SK_Select, MVT::v16i8, 1} // pblendvb }; @@ -1549,18 +1552,23 @@ InstructionCost X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, static const CostTblEntry SSSE3ShuffleTbl[] = { {TTI::SK_Broadcast, MVT::v8i16, 1}, // pshufb + {TTI::SK_Broadcast, MVT::v8f16, 1}, // pshufb {TTI::SK_Broadcast, MVT::v16i8, 1}, // pshufb {TTI::SK_Reverse, MVT::v8i16, 1}, // pshufb + {TTI::SK_Reverse, MVT::v8f16, 1}, // pshufb {TTI::SK_Reverse, MVT::v16i8, 1}, // pshufb {TTI::SK_Select, MVT::v8i16, 3}, // 2*pshufb + por + {TTI::SK_Select, MVT::v8f16, 3}, // 2*pshufb + por {TTI::SK_Select, MVT::v16i8, 3}, // 2*pshufb + por {TTI::SK_PermuteSingleSrc, MVT::v8i16, 1}, // pshufb + {TTI::SK_PermuteSingleSrc, MVT::v8f16, 1}, // pshufb {TTI::SK_PermuteSingleSrc, MVT::v16i8, 1}, // pshufb {TTI::SK_PermuteTwoSrc, MVT::v8i16, 3}, // 2*pshufb + por + {TTI::SK_PermuteTwoSrc, MVT::v8f16, 3}, // 2*pshufb + por {TTI::SK_PermuteTwoSrc, MVT::v16i8, 3}, // 2*pshufb + por }; @@ -1573,12 +1581,14 @@ InstructionCost X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, {TTI::SK_Broadcast, MVT::v2i64, 1}, // pshufd {TTI::SK_Broadcast, MVT::v4i32, 1}, // pshufd {TTI::SK_Broadcast, MVT::v8i16, 2}, // pshuflw + pshufd + {TTI::SK_Broadcast, MVT::v8f16, 2}, // pshuflw + pshufd {TTI::SK_Broadcast, MVT::v16i8, 3}, // unpck + pshuflw + pshufd {TTI::SK_Reverse, MVT::v2f64, 1}, // shufpd {TTI::SK_Reverse, MVT::v2i64, 1}, // pshufd {TTI::SK_Reverse, MVT::v4i32, 1}, // pshufd {TTI::SK_Reverse, MVT::v8i16, 3}, // pshuflw + pshufhw + pshufd + {TTI::SK_Reverse, MVT::v8f16, 3}, // pshuflw + pshufhw + pshufd {TTI::SK_Reverse, MVT::v16i8, 9}, // 2*pshuflw + 2*pshufhw // + 2*pshufd + 2*unpck + packus @@ -1586,6 +1596,7 @@ InstructionCost X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, {TTI::SK_Select, MVT::v2f64, 1}, // movsd {TTI::SK_Select, MVT::v4i32, 2}, // 2*shufps {TTI::SK_Select, MVT::v8i16, 3}, // pand + pandn + por + {TTI::SK_Select, MVT::v8f16, 3}, // pand + pandn + por {TTI::SK_Select, MVT::v16i8, 3}, // pand + pandn + por {TTI::SK_PermuteSingleSrc, MVT::v2f64, 1}, // shufpd @@ -1593,6 +1604,8 @@ InstructionCost X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, {TTI::SK_PermuteSingleSrc, MVT::v4i32, 1}, // pshufd {TTI::SK_PermuteSingleSrc, MVT::v8i16, 5}, // 2*pshuflw + 2*pshufhw // + pshufd/unpck + {TTI::SK_PermuteSingleSrc, MVT::v8f16, 5}, // 2*pshuflw + 2*pshufhw + // + pshufd/unpck { TTI::SK_PermuteSingleSrc, MVT::v16i8, 10 }, // 2*pshuflw + 2*pshufhw // + 2*pshufd + 2*unpck + 2*packus @@ -1600,6 +1613,7 @@ InstructionCost X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, { TTI::SK_PermuteTwoSrc, MVT::v2i64, 1 }, // shufpd { TTI::SK_PermuteTwoSrc, MVT::v4i32, 2 }, // 2*{unpck,movsd,pshufd} { TTI::SK_PermuteTwoSrc, MVT::v8i16, 8 }, // blend+permute + { TTI::SK_PermuteTwoSrc, MVT::v8f16, 8 }, // blend+permute { TTI::SK_PermuteTwoSrc, MVT::v16i8, 13 }, // blend+permute }; @@ -5219,7 +5233,7 @@ bool X86TTIImpl::isLegalMaskedLoad(Type *DataTy, Align Alignment) { if (ScalarTy->isFloatTy() || ScalarTy->isDoubleTy()) return true; - if (ScalarTy->isHalfTy() && ST->hasBWI() && ST->hasFP16()) + if (ScalarTy->isHalfTy() && ST->hasBWI()) return true; if (!ScalarTy->isIntegerTy()) @@ -5674,8 +5688,7 @@ InstructionCost X86TTIImpl::getInterleavedMemoryOpCost( if (EltTy->isFloatTy() || EltTy->isDoubleTy() || EltTy->isIntegerTy(64) || EltTy->isIntegerTy(32) || EltTy->isPointerTy()) return true; - if (EltTy->isIntegerTy(16) || EltTy->isIntegerTy(8) || - (!ST->useSoftFloat() && ST->hasFP16() && EltTy->isHalfTy())) + if (EltTy->isIntegerTy(16) || EltTy->isIntegerTy(8) || EltTy->isHalfTy()) return HasBW; return false; }; diff --git a/llvm/lib/Target/XCore/XCoreFrameLowering.cpp b/llvm/lib/Target/XCore/XCoreFrameLowering.cpp index 19ebcb3ea3e8..2fb06e29bf3b 100644 --- a/llvm/lib/Target/XCore/XCoreFrameLowering.cpp +++ b/llvm/lib/Target/XCore/XCoreFrameLowering.cpp @@ -27,7 +27,7 @@ #include "llvm/IR/Function.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Target/TargetOptions.h" -#include <algorithm> // std::sort +#include <algorithm> using namespace llvm; diff --git a/llvm/lib/ToolDrivers/llvm-lib/Options.td b/llvm/lib/ToolDrivers/llvm-lib/Options.td index 9d969b040ef2..4af250e8ad73 100644 --- a/llvm/lib/ToolDrivers/llvm-lib/Options.td +++ b/llvm/lib/ToolDrivers/llvm-lib/Options.td @@ -48,3 +48,4 @@ def nodefaultlib: P<"nodefaultlib", "">; def nodefaultlib_all: F<"nodefaultlib">; def nologo : F<"nologo">; def subsystem : P<"subsystem", "">; +def verbose : F<"verbose">; diff --git a/llvm/lib/Transforms/Coroutines/CoroCleanup.cpp b/llvm/lib/Transforms/Coroutines/CoroCleanup.cpp index f7bbdcffd2ec..81b43a2ab2c2 100644 --- a/llvm/lib/Transforms/Coroutines/CoroCleanup.cpp +++ b/llvm/lib/Transforms/Coroutines/CoroCleanup.cpp @@ -112,8 +112,8 @@ static bool declaresCoroCleanupIntrinsics(const Module &M) { return coro::declaresIntrinsics( M, {"llvm.coro.alloc", "llvm.coro.begin", "llvm.coro.subfn.addr", "llvm.coro.free", "llvm.coro.id", "llvm.coro.id.retcon", - "llvm.coro.id.retcon.once", "llvm.coro.async.size.replace", - "llvm.coro.async.resume"}); + "llvm.coro.id.async", "llvm.coro.id.retcon.once", + "llvm.coro.async.size.replace", "llvm.coro.async.resume"}); } PreservedAnalyses CoroCleanupPass::run(Module &M, diff --git a/llvm/lib/Transforms/Coroutines/CoroInternal.h b/llvm/lib/Transforms/Coroutines/CoroInternal.h index 5557370c82ba..af35b45c2eaf 100644 --- a/llvm/lib/Transforms/Coroutines/CoroInternal.h +++ b/llvm/lib/Transforms/Coroutines/CoroInternal.h @@ -17,8 +17,6 @@ namespace llvm { class CallGraph; -class CallGraphSCC; -class PassRegistry; namespace coro { diff --git a/llvm/lib/Transforms/Coroutines/CoroSplit.cpp b/llvm/lib/Transforms/Coroutines/CoroSplit.cpp index 9c1b247cdb39..722a1c6ec0ce 100644 --- a/llvm/lib/Transforms/Coroutines/CoroSplit.cpp +++ b/llvm/lib/Transforms/Coroutines/CoroSplit.cpp @@ -1961,6 +1961,13 @@ static coro::Shape splitCoroutine(Function &F, return Shape; } +/// Remove calls to llvm.coro.end in the original function. +static void removeCoroEnds(const coro::Shape &Shape) { + for (auto End : Shape.CoroEnds) { + replaceCoroEnd(End, Shape, Shape.FramePtr, /*in resume*/ false, nullptr); + } +} + static void updateCallGraphAfterCoroutineSplit( LazyCallGraph::Node &N, const coro::Shape &Shape, const SmallVectorImpl<Function *> &Clones, LazyCallGraph::SCC &C, @@ -1969,10 +1976,14 @@ static void updateCallGraphAfterCoroutineSplit( if (!Shape.CoroBegin) return; - for (llvm::AnyCoroEndInst *End : Shape.CoroEnds) { - auto &Context = End->getContext(); - End->replaceAllUsesWith(ConstantInt::getFalse(Context)); - End->eraseFromParent(); + if (Shape.ABI != coro::ABI::Switch) + removeCoroEnds(Shape); + else { + for (llvm::AnyCoroEndInst *End : Shape.CoroEnds) { + auto &Context = End->getContext(); + End->replaceAllUsesWith(ConstantInt::getFalse(Context)); + End->eraseFromParent(); + } } if (!Clones.empty()) { diff --git a/llvm/lib/Transforms/IPO/Attributor.cpp b/llvm/lib/Transforms/IPO/Attributor.cpp index e5ff98e4f73f..37c773bd47d6 100644 --- a/llvm/lib/Transforms/IPO/Attributor.cpp +++ b/llvm/lib/Transforms/IPO/Attributor.cpp @@ -326,7 +326,7 @@ static bool getPotentialCopiesOfMemoryValue( << " (only exact: " << OnlyExact << ")\n";); Value &Ptr = *I.getPointerOperand(); - SmallVector<Value *, 8> Objects; + SmallSetVector<Value *, 8> Objects; if (!AA::getAssumedUnderlyingObjects(A, Ptr, Objects, QueryingAA, &I, UsedAssumedInformation)) { LLVM_DEBUG( @@ -343,6 +343,7 @@ static bool getPotentialCopiesOfMemoryValue( const auto *TLI = A.getInfoCache().getTargetLibraryInfoForFunction(*I.getFunction()); + LLVM_DEBUG(dbgs() << "Visit " << Objects.size() << " objects:\n"); for (Value *Obj : Objects) { LLVM_DEBUG(dbgs() << "Visit underlying object " << *Obj << "\n"); if (isa<UndefValue>(Obj)) @@ -352,8 +353,8 @@ static bool getPotentialCopiesOfMemoryValue( // be OK. We do not try to optimize the latter. if (!NullPointerIsDefined(I.getFunction(), Ptr.getType()->getPointerAddressSpace()) && - A.getAssumedSimplified(Ptr, QueryingAA, UsedAssumedInformation) == - Obj) + A.getAssumedSimplified(Ptr, QueryingAA, UsedAssumedInformation, + AA::Interprocedural) == Obj) continue; LLVM_DEBUG( dbgs() << "Underlying object is a valid nullptr, giving up.\n";); @@ -375,25 +376,37 @@ static bool getPotentialCopiesOfMemoryValue( return false; } - if (IsLoad) { - Value *InitialValue = AA::getInitialValueForObj(*Obj, *I.getType(), TLI); - if (!InitialValue) - return false; - NewCopies.push_back(InitialValue); - NewCopyOrigins.push_back(nullptr); - } + bool NullOnly = true; + bool NullRequired = false; + auto CheckForNullOnlyAndUndef = [&](Optional<Value *> V, bool IsExact) { + if (!V || *V == nullptr) + NullOnly = false; + else if (isa<UndefValue>(*V)) + /* No op */; + else if (isa<Constant>(*V) && cast<Constant>(*V)->isNullValue()) + NullRequired = !IsExact; + else + NullOnly = false; + }; auto CheckAccess = [&](const AAPointerInfo::Access &Acc, bool IsExact) { if ((IsLoad && !Acc.isWrite()) || (!IsLoad && !Acc.isRead())) return true; if (IsLoad && Acc.isWrittenValueYetUndetermined()) return true; - if (OnlyExact && !IsExact && + CheckForNullOnlyAndUndef(Acc.getContent(), IsExact); + if (OnlyExact && !IsExact && !NullOnly && !isa_and_nonnull<UndefValue>(Acc.getWrittenValue())) { LLVM_DEBUG(dbgs() << "Non exact access " << *Acc.getRemoteInst() << ", abort!\n"); return false; } + if (NullRequired && !NullOnly) { + LLVM_DEBUG(dbgs() << "Required all `null` accesses due to non exact " + "one, however found non-null one: " + << *Acc.getRemoteInst() << ", abort!\n"); + return false; + } if (IsLoad) { assert(isa<LoadInst>(I) && "Expected load or store instruction only!"); if (!Acc.isWrittenValueUnknown()) { @@ -424,15 +437,36 @@ static bool getPotentialCopiesOfMemoryValue( return true; }; + // If the value has been written to we don't need the initial value of the + // object. + bool HasBeenWrittenTo = false; + auto &PI = A.getAAFor<AAPointerInfo>(QueryingAA, IRPosition::value(*Obj), DepClassTy::NONE); - if (!PI.forallInterferingAccesses(A, QueryingAA, I, CheckAccess)) { + if (!PI.forallInterferingAccesses(A, QueryingAA, I, CheckAccess, + HasBeenWrittenTo)) { LLVM_DEBUG( dbgs() << "Failed to verify all interfering accesses for underlying object: " << *Obj << "\n"); return false; } + + if (IsLoad && !HasBeenWrittenTo) { + Value *InitialValue = AA::getInitialValueForObj(*Obj, *I.getType(), TLI); + if (!InitialValue) + return false; + CheckForNullOnlyAndUndef(InitialValue, /* IsExact */ true); + if (NullRequired && !NullOnly) { + LLVM_DEBUG(dbgs() << "Non exact access but initial value that is not " + "null or undef, abort!\n"); + return false; + } + + NewCopies.push_back(InitialValue); + NewCopyOrigins.push_back(nullptr); + } + PIs.push_back(&PI); } @@ -520,12 +554,21 @@ isPotentiallyReachable(Attributor &A, const Instruction &FromI, << " from " << FromI << " [GBCB: " << bool(GoBackwardsCB) << "]\n"); + // TODO: If we can go arbitrarily backwards we will eventually reach an + // entry point that can reach ToI. Only once this takes a set of blocks + // through which we cannot go, or once we track internal functions not + // accessible from the outside, it makes sense to perform backwards analysis + // in the absence of a GoBackwardsCB. + if (!GoBackwardsCB) { + LLVM_DEBUG(dbgs() << "[AA] check @" << ToFn.getName() << " from " << FromI + << " is not checked backwards, abort\n"); + return true; + } + SmallPtrSet<const Instruction *, 8> Visited; SmallVector<const Instruction *> Worklist; Worklist.push_back(&FromI); - const auto &NoRecurseAA = A.getAAFor<AANoRecurse>( - QueryingAA, IRPosition::function(ToFn), DepClassTy::OPTIONAL); while (!Worklist.empty()) { const Instruction *CurFromI = Worklist.pop_back_val(); if (!Visited.insert(CurFromI).second) @@ -545,26 +588,13 @@ isPotentiallyReachable(Attributor &A, const Instruction &FromI, << *ToI << " [Intra]\n"); if (Result) return true; - if (NoRecurseAA.isAssumedNoRecurse()) - continue; - } - - // TODO: If we can go arbitrarily backwards we will eventually reach an - // entry point that can reach ToI. Only once this takes a set of blocks - // through which we cannot go, or once we track internal functions not - // accessible from the outside, it makes sense to perform backwards analysis - // in the absence of a GoBackwardsCB. - if (!GoBackwardsCB) { - LLVM_DEBUG(dbgs() << "[AA] check @" << ToFn.getName() << " from " - << *CurFromI << " is not checked backwards, abort\n"); - return true; } // Check if the current instruction is already known to reach the ToFn. const auto &FnReachabilityAA = A.getAAFor<AAFunctionReachability>( QueryingAA, IRPosition::function(*FromFn), DepClassTy::OPTIONAL); bool Result = FnReachabilityAA.instructionCanReach( - A, *CurFromI, ToFn, /* UseBackwards */ false); + A, *CurFromI, ToFn); LLVM_DEBUG(dbgs() << "[AA] " << *CurFromI << " in @" << FromFn->getName() << " " << (Result ? "can potentially " : "cannot ") << "reach @" << ToFn.getName() << " [FromFn]\n"); @@ -1038,60 +1068,74 @@ Attributor::getAssumedConstant(const IRPosition &IRP, } if (auto *C = dyn_cast<Constant>(&IRP.getAssociatedValue())) return C; - const auto &ValueSimplifyAA = - getAAFor<AAValueSimplify>(AA, IRP, DepClassTy::NONE); - Optional<Value *> SimplifiedV = - ValueSimplifyAA.getAssumedSimplifiedValue(*this); - bool IsKnown = ValueSimplifyAA.isAtFixpoint(); - UsedAssumedInformation |= !IsKnown; - if (!SimplifiedV) { - recordDependence(ValueSimplifyAA, AA, DepClassTy::OPTIONAL); - return llvm::None; - } - if (isa_and_nonnull<UndefValue>(SimplifiedV.value())) { - recordDependence(ValueSimplifyAA, AA, DepClassTy::OPTIONAL); - return UndefValue::get(IRP.getAssociatedType()); + SmallVector<AA::ValueAndContext> Values; + if (getAssumedSimplifiedValues(IRP, &AA, Values, + AA::ValueScope::Interprocedural, + UsedAssumedInformation)) { + if (Values.empty()) + return llvm::None; + if (auto *C = dyn_cast_or_null<Constant>( + AAPotentialValues::getSingleValue(*this, AA, IRP, Values))) + return C; } - Constant *CI = dyn_cast_or_null<Constant>(SimplifiedV.value()); - if (CI) - CI = dyn_cast_or_null<Constant>( - AA::getWithType(*CI, *IRP.getAssociatedType())); - if (CI) - recordDependence(ValueSimplifyAA, AA, DepClassTy::OPTIONAL); - return CI; + return nullptr; } -Optional<Value *> -Attributor::getAssumedSimplified(const IRPosition &IRP, - const AbstractAttribute *AA, - bool &UsedAssumedInformation) { +Optional<Value *> Attributor::getAssumedSimplified(const IRPosition &IRP, + const AbstractAttribute *AA, + bool &UsedAssumedInformation, + AA::ValueScope S) { // First check all callbacks provided by outside AAs. If any of them returns // a non-null value that is different from the associated value, or None, we // assume it's simplified. for (auto &CB : SimplificationCallbacks.lookup(IRP)) return CB(IRP, AA, UsedAssumedInformation); - // If no high-level/outside simplification occurred, use AAValueSimplify. - const auto &ValueSimplifyAA = - getOrCreateAAFor<AAValueSimplify>(IRP, AA, DepClassTy::NONE); - Optional<Value *> SimplifiedV = - ValueSimplifyAA.getAssumedSimplifiedValue(*this); - bool IsKnown = ValueSimplifyAA.isAtFixpoint(); - UsedAssumedInformation |= !IsKnown; - if (!SimplifiedV) { - if (AA) - recordDependence(ValueSimplifyAA, *AA, DepClassTy::OPTIONAL); + SmallVector<AA::ValueAndContext> Values; + if (!getAssumedSimplifiedValues(IRP, AA, Values, S, UsedAssumedInformation)) + return &IRP.getAssociatedValue(); + if (Values.empty()) return llvm::None; + if (AA) + if (Value *V = AAPotentialValues::getSingleValue(*this, *AA, IRP, Values)) + return V; + if (IRP.getPositionKind() == IRPosition::IRP_RETURNED || + IRP.getPositionKind() == IRPosition::IRP_CALL_SITE_RETURNED) + return nullptr; + return &IRP.getAssociatedValue(); +} + +bool Attributor::getAssumedSimplifiedValues( + const IRPosition &IRP, const AbstractAttribute *AA, + SmallVectorImpl<AA::ValueAndContext> &Values, AA::ValueScope S, + bool &UsedAssumedInformation) { + // First check all callbacks provided by outside AAs. If any of them returns + // a non-null value that is different from the associated value, or None, we + // assume it's simplified. + const auto &SimplificationCBs = SimplificationCallbacks.lookup(IRP); + for (auto &CB : SimplificationCBs) { + Optional<Value *> CBResult = CB(IRP, AA, UsedAssumedInformation); + if (!CBResult.has_value()) + continue; + Value *V = CBResult.value(); + if (!V) + return false; + if ((S & AA::ValueScope::Interprocedural) || + AA::isValidInScope(*V, IRP.getAnchorScope())) + Values.push_back(AA::ValueAndContext{*V, nullptr}); + else + return false; } - if (*SimplifiedV == nullptr) - return const_cast<Value *>(&IRP.getAssociatedValue()); - if (Value *SimpleV = - AA::getWithType(**SimplifiedV, *IRP.getAssociatedType())) { - if (AA) - recordDependence(ValueSimplifyAA, *AA, DepClassTy::OPTIONAL); - return SimpleV; - } - return const_cast<Value *>(&IRP.getAssociatedValue()); + if (!SimplificationCBs.empty()) + return true; + + // If no high-level/outside simplification occurred, use AAPotentialValues. + const auto &PotentialValuesAA = + getOrCreateAAFor<AAPotentialValues>(IRP, AA, DepClassTy::OPTIONAL); + if (!PotentialValuesAA.getAssumedSimplifiedValues(*this, Values, S)) + return false; + UsedAssumedInformation |= !PotentialValuesAA.isAtFixpoint(); + return true; } Optional<Value *> Attributor::translateArgumentToCallSiteContent( @@ -1106,7 +1150,7 @@ Optional<Value *> Attributor::translateArgumentToCallSiteContent( if (!Arg->hasPointeeInMemoryValueAttr()) return getAssumedSimplified( IRPosition::callsite_argument(CB, Arg->getArgNo()), AA, - UsedAssumedInformation); + UsedAssumedInformation, AA::Intraprocedural); return nullptr; } @@ -1295,8 +1339,21 @@ bool Attributor::checkForAllUses( SmallVector<const Use *, 16> Worklist; SmallPtrSet<const Use *, 16> Visited; - for (const Use &U : V.uses()) - Worklist.push_back(&U); + auto AddUsers = [&](const Value &V, const Use *OldUse) { + for (const Use &UU : V.uses()) { + if (OldUse && EquivalentUseCB && !EquivalentUseCB(*OldUse, UU)) { + LLVM_DEBUG(dbgs() << "[Attributor] Potential copy was " + "rejected by the equivalence call back: " + << *UU << "!\n"); + return false; + } + + Worklist.push_back(&UU); + } + return true; + }; + + AddUsers(V, /* OldUse */ nullptr); LLVM_DEBUG(dbgs() << "[Attributor] Got " << Worklist.size() << " initial uses to check\n"); @@ -1342,15 +1399,8 @@ bool Attributor::checkForAllUses( << PotentialCopies.size() << " potential copies instead!\n"); for (Value *PotentialCopy : PotentialCopies) - for (const Use &CopyUse : PotentialCopy->uses()) { - if (EquivalentUseCB && !EquivalentUseCB(*U, CopyUse)) { - LLVM_DEBUG(dbgs() << "[Attributor] Potential copy was " - "rejected by the equivalence call back: " - << *CopyUse << "!\n"); - return false; - } - Worklist.push_back(&CopyUse); - } + if (!AddUsers(*PotentialCopy, U)) + return false; continue; } } @@ -1361,8 +1411,25 @@ bool Attributor::checkForAllUses( return false; if (!Follow) continue; - for (const Use &UU : U->getUser()->uses()) - Worklist.push_back(&UU); + + User &Usr = *U->getUser(); + AddUsers(Usr, /* OldUse */ nullptr); + + auto *RI = dyn_cast<ReturnInst>(&Usr); + if (!RI) + continue; + + Function &F = *RI->getFunction(); + auto CallSitePred = [&](AbstractCallSite ACS) { + return AddUsers(*ACS.getInstruction(), U); + }; + if (!checkForAllCallSites(CallSitePred, F, /* RequireAllCallSites */ true, + &QueryingAA, UsedAssumedInformation)) { + LLVM_DEBUG(dbgs() << "[Attributor] Could not follow return instruction " + "to all call sites: " + << *RI << "\n"); + return false; + } } return true; @@ -1918,7 +1985,8 @@ ChangeStatus Attributor::cleanupIR() { << ToBeDeletedInsts.size() << " instructions and " << ToBeChangedValues.size() << " values and " << ToBeChangedUses.size() << " uses. To insert " - << ToBeChangedToUnreachableInsts.size() << " unreachables." + << ToBeChangedToUnreachableInsts.size() + << " unreachables.\n" << "Preserve manifest added " << ManifestAddedBlocks.size() << " blocks\n"); @@ -2046,6 +2114,8 @@ ChangeStatus Attributor::cleanupIR() { } for (auto &V : ToBeChangedToUnreachableInsts) if (Instruction *I = dyn_cast_or_null<Instruction>(V)) { + LLVM_DEBUG(dbgs() << "[Attributor] Change to unreachable: " << *I + << "\n"); assert(isRunOn(*I->getFunction()) && "Cannot replace an instruction outside the current SCC!"); CGModifiedFunctions.insert(I->getFunction()); @@ -2877,7 +2947,8 @@ void Attributor::identifyDefaultAbstractAttributes(Function &F) { // Every function might be simplified. bool UsedAssumedInformation = false; - getAssumedSimplified(RetPos, nullptr, UsedAssumedInformation); + getAssumedSimplified(RetPos, nullptr, UsedAssumedInformation, + AA::Intraprocedural); // Every returned value might be marked noundef. getOrCreateAAFor<AANoUndef>(RetPos); @@ -2906,7 +2977,8 @@ void Attributor::identifyDefaultAbstractAttributes(Function &F) { // interface though as outside AAs can register custom simplification // callbacks. bool UsedAssumedInformation = false; - getAssumedSimplified(ArgPos, /* AA */ nullptr, UsedAssumedInformation); + getAssumedSimplified(ArgPos, /* AA */ nullptr, UsedAssumedInformation, + AA::Intraprocedural); // Every argument might be dead. getOrCreateAAFor<AAIsDead>(ArgPos); @@ -2970,7 +3042,8 @@ void Attributor::identifyDefaultAbstractAttributes(Function &F) { IRPosition CBRetPos = IRPosition::callsite_returned(CB); bool UsedAssumedInformation = false; - getAssumedSimplified(CBRetPos, nullptr, UsedAssumedInformation); + getAssumedSimplified(CBRetPos, nullptr, UsedAssumedInformation, + AA::Intraprocedural); } for (int I = 0, E = CB.arg_size(); I < E; ++I) { @@ -2984,7 +3057,8 @@ void Attributor::identifyDefaultAbstractAttributes(Function &F) { // Attributor interface though as outside AAs can register custom // simplification callbacks. bool UsedAssumedInformation = false; - getAssumedSimplified(CBArgPos, /* AA */ nullptr, UsedAssumedInformation); + getAssumedSimplified(CBArgPos, /* AA */ nullptr, UsedAssumedInformation, + AA::Intraprocedural); // Every call site argument might be marked "noundef". getOrCreateAAFor<AANoUndef>(CBArgPos); @@ -3034,12 +3108,12 @@ void Attributor::identifyDefaultAbstractAttributes(Function &F) { IRPosition::value(*cast<LoadInst>(I).getPointerOperand())); if (SimplifyAllLoads) getAssumedSimplified(IRPosition::value(I), nullptr, - UsedAssumedInformation); + UsedAssumedInformation, AA::Intraprocedural); } else { auto &SI = cast<StoreInst>(I); getOrCreateAAFor<AAIsDead>(IRPosition::inst(I)); getAssumedSimplified(IRPosition::value(*SI.getValueOperand()), nullptr, - UsedAssumedInformation); + UsedAssumedInformation, AA::Intraprocedural); getOrCreateAAFor<AAAlign>(IRPosition::value(*SI.getPointerOperand())); } return true; @@ -3126,6 +3200,26 @@ raw_ostream &llvm::operator<<(raw_ostream &OS, return OS; } +raw_ostream &llvm::operator<<(raw_ostream &OS, + const PotentialLLVMValuesState &S) { + OS << "set-state(< {"; + if (!S.isValidState()) + OS << "full-set"; + else { + for (auto &It : S.getAssumedSet()) { + if (auto *F = dyn_cast<Function>(It.first.getValue())) + OS << "@" << F->getName() << "[" << int(It.second) << "], "; + else + OS << *It.first.getValue() << "[" << int(It.second) << "], "; + } + if (S.undefIsContained()) + OS << "undef "; + } + OS << "} >)"; + + return OS; +} + void AbstractAttribute::print(raw_ostream &OS) const { OS << "["; OS << getName(); diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp index 1ff54b78e27e..660ff3ee9563 100644 --- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp +++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp @@ -14,12 +14,14 @@ #include "llvm/Transforms/IPO/Attributor.h" #include "llvm/ADT/APInt.h" +#include "llvm/ADT/DenseMapInfo.h" #include "llvm/ADT/MapVector.h" #include "llvm/ADT/SCCIterator.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SetOperations.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/AssumeBundleQueries.h" @@ -35,11 +37,13 @@ #include "llvm/IR/Argument.h" #include "llvm/IR/Assumptions.h" #include "llvm/IR/BasicBlock.h" +#include "llvm/IR/Constant.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/GlobalValue.h" #include "llvm/IR/IRBuilder.h" +#include "llvm/IR/InstrTypes.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" @@ -72,6 +76,8 @@ static cl::opt<int> MaxHeapToStackSize("max-heap-to-stack-size", cl::init(128), template <> unsigned llvm::PotentialConstantIntValuesState::MaxPotentialValues = 0; +template <> unsigned llvm::PotentialLLVMValuesState::MaxPotentialValues = -1; + static cl::opt<unsigned, true> MaxPotentialValues( "attributor-max-potential-values", cl::Hidden, cl::desc("Maximum number of potential values to be " @@ -79,6 +85,12 @@ static cl::opt<unsigned, true> MaxPotentialValues( cl::location(llvm::PotentialConstantIntValuesState::MaxPotentialValues), cl::init(7)); +static cl::opt<int> MaxPotentialValuesIterations( + "attributor-max-potential-values-iterations", cl::Hidden, + cl::desc( + "Maximum number of iterations we keep dismantling potential values."), + cl::init(64)); + static cl::opt<unsigned> MaxInterferingAccesses( "attributor-max-interfering-accesses", cl::Hidden, cl::desc("Maximum number of interfering accesses to " @@ -162,6 +174,7 @@ PIPE_OPERATOR(AAValueConstantRange) PIPE_OPERATOR(AAPrivatizablePtr) PIPE_OPERATOR(AAUndefinedBehavior) PIPE_OPERATOR(AAPotentialConstantValues) +PIPE_OPERATOR(AAPotentialValues) PIPE_OPERATOR(AANoUndef) PIPE_OPERATOR(AACallEdges) PIPE_OPERATOR(AAFunctionReachability) @@ -293,228 +306,35 @@ static Value *constructPointer(Type *ResTy, Type *PtrElemTy, Value *Ptr, return Ptr; } -/// Recursively visit all values that might become \p IRP at some point. This -/// will be done by looking through cast instructions, selects, phis, and calls -/// with the "returned" attribute. Once we cannot look through the value any -/// further, the callback \p VisitValueCB is invoked and passed the current -/// value, the \p State, and a flag to indicate if we stripped anything. -/// Stripped means that we unpacked the value associated with \p IRP at least -/// once. Note that the value used for the callback may still be the value -/// associated with \p IRP (due to PHIs). To limit how much effort is invested, -/// we will never visit more values than specified by \p MaxValues. -/// If \p VS does not contain the Interprocedural bit, only values valid in the -/// scope of \p CtxI will be visited and simplification into other scopes is -/// prevented. -template <typename StateTy> -static bool genericValueTraversal( - Attributor &A, IRPosition IRP, const AbstractAttribute &QueryingAA, - StateTy &State, - function_ref<bool(Value &, const Instruction *, StateTy &, bool)> - VisitValueCB, - const Instruction *CtxI, bool &UsedAssumedInformation, - bool UseValueSimplify = true, int MaxValues = 16, - function_ref<Value *(Value *)> StripCB = nullptr, - AA::ValueScope VS = AA::Interprocedural) { - - struct LivenessInfo { - const AAIsDead *LivenessAA = nullptr; - bool AnyDead = false; - }; - SmallMapVector<const Function *, LivenessInfo, 4> LivenessAAs; - auto GetLivenessInfo = [&](const Function &F) -> LivenessInfo & { - LivenessInfo &LI = LivenessAAs[&F]; - if (!LI.LivenessAA) - LI.LivenessAA = &A.getAAFor<AAIsDead>(QueryingAA, IRPosition::function(F), - DepClassTy::NONE); - return LI; - }; - - Value *InitialV = &IRP.getAssociatedValue(); - using Item = std::pair<Value *, const Instruction *>; - SmallSet<Item, 16> Visited; - SmallVector<Item, 16> Worklist; - Worklist.push_back({InitialV, CtxI}); - - int Iteration = 0; - do { - Item I = Worklist.pop_back_val(); - Value *V = I.first; - CtxI = I.second; - if (StripCB) - V = StripCB(V); - - // Check if we should process the current value. To prevent endless - // recursion keep a record of the values we followed! - if (!Visited.insert(I).second) - continue; - - // Make sure we limit the compile time for complex expressions. - if (Iteration++ >= MaxValues) { - LLVM_DEBUG(dbgs() << "Generic value traversal reached iteration limit: " - << Iteration << "!\n"); - return false; - } - - // Explicitly look through calls with a "returned" attribute if we do - // not have a pointer as stripPointerCasts only works on them. - Value *NewV = nullptr; - if (V->getType()->isPointerTy()) { - NewV = V->stripPointerCasts(); - } else { - auto *CB = dyn_cast<CallBase>(V); - if (CB && CB->getCalledFunction()) { - for (Argument &Arg : CB->getCalledFunction()->args()) - if (Arg.hasReturnedAttr()) { - NewV = CB->getArgOperand(Arg.getArgNo()); - break; - } - } - } - if (NewV && NewV != V) { - Worklist.push_back({NewV, CtxI}); - continue; - } - - // Look through select instructions, visit assumed potential values. - if (auto *SI = dyn_cast<SelectInst>(V)) { - Optional<Constant *> C = A.getAssumedConstant( - *SI->getCondition(), QueryingAA, UsedAssumedInformation); - bool NoValueYet = !C; - if (NoValueYet || isa_and_nonnull<UndefValue>(*C)) - continue; - if (auto *CI = dyn_cast_or_null<ConstantInt>(*C)) { - if (CI->isZero()) - Worklist.push_back({SI->getFalseValue(), CtxI}); - else - Worklist.push_back({SI->getTrueValue(), CtxI}); - continue; - } - // We could not simplify the condition, assume both values.( - Worklist.push_back({SI->getTrueValue(), CtxI}); - Worklist.push_back({SI->getFalseValue(), CtxI}); - continue; - } - - // Look through phi nodes, visit all live operands. - if (auto *PHI = dyn_cast<PHINode>(V)) { - LivenessInfo &LI = GetLivenessInfo(*PHI->getFunction()); - for (unsigned u = 0, e = PHI->getNumIncomingValues(); u < e; u++) { - BasicBlock *IncomingBB = PHI->getIncomingBlock(u); - if (LI.LivenessAA->isEdgeDead(IncomingBB, PHI->getParent())) { - LI.AnyDead = true; - UsedAssumedInformation |= !LI.LivenessAA->isAtFixpoint(); - continue; - } - Worklist.push_back( - {PHI->getIncomingValue(u), IncomingBB->getTerminator()}); - } - continue; - } - - if (auto *Arg = dyn_cast<Argument>(V)) { - if ((VS & AA::Interprocedural) && !Arg->hasPassPointeeByValueCopyAttr()) { - SmallVector<Item> CallSiteValues; - bool UsedAssumedInformation = false; - if (A.checkForAllCallSites( - [&](AbstractCallSite ACS) { - // Callbacks might not have a corresponding call site operand, - // stick with the argument in that case. - Value *CSOp = ACS.getCallArgOperand(*Arg); - if (!CSOp) - return false; - CallSiteValues.push_back({CSOp, ACS.getInstruction()}); - return true; - }, - *Arg->getParent(), true, &QueryingAA, UsedAssumedInformation)) { - Worklist.append(CallSiteValues); - continue; - } - } - } - - if (UseValueSimplify && !isa<Constant>(V)) { - Optional<Value *> SimpleV = - A.getAssumedSimplified(*V, QueryingAA, UsedAssumedInformation); - if (!SimpleV) - continue; - Value *NewV = SimpleV.value(); - if (NewV && NewV != V) { - if ((VS & AA::Interprocedural) || !CtxI || - AA::isValidInScope(*NewV, CtxI->getFunction())) { - Worklist.push_back({NewV, CtxI}); - continue; - } - } - } - - if (auto *LI = dyn_cast<LoadInst>(V)) { - bool UsedAssumedInformation = false; - // If we ask for the potentially loaded values from the initial pointer we - // will simply end up here again. The load is as far as we can make it. - if (LI->getPointerOperand() != InitialV) { - SmallSetVector<Value *, 4> PotentialCopies; - SmallSetVector<Instruction *, 4> PotentialValueOrigins; - if (AA::getPotentiallyLoadedValues(A, *LI, PotentialCopies, - PotentialValueOrigins, QueryingAA, - UsedAssumedInformation, - /* OnlyExact */ true)) { - // Values have to be dynamically unique or we loose the fact that a - // single llvm::Value might represent two runtime values (e.g., stack - // locations in different recursive calls). - bool DynamicallyUnique = - llvm::all_of(PotentialCopies, [&A, &QueryingAA](Value *PC) { - return AA::isDynamicallyUnique(A, QueryingAA, *PC); - }); - if (DynamicallyUnique && - ((VS & AA::Interprocedural) || !CtxI || - llvm::all_of(PotentialCopies, [CtxI](Value *PC) { - return AA::isValidInScope(*PC, CtxI->getFunction()); - }))) { - for (auto *PotentialCopy : PotentialCopies) - Worklist.push_back({PotentialCopy, CtxI}); - continue; - } - } - } - } - - // Once a leaf is reached we inform the user through the callback. - if (!VisitValueCB(*V, CtxI, State, Iteration > 1)) { - LLVM_DEBUG(dbgs() << "Generic value traversal visit callback failed for: " - << *V << "!\n"); - return false; - } - } while (!Worklist.empty()); - - // If we actually used liveness information so we have to record a dependence. - for (auto &It : LivenessAAs) - if (It.second.AnyDead) - A.recordDependence(*It.second.LivenessAA, QueryingAA, - DepClassTy::OPTIONAL); - - // All values have been visited. - return true; -} - bool AA::getAssumedUnderlyingObjects(Attributor &A, const Value &Ptr, - SmallVectorImpl<Value *> &Objects, + SmallSetVector<Value *, 8> &Objects, const AbstractAttribute &QueryingAA, const Instruction *CtxI, bool &UsedAssumedInformation, - AA::ValueScope VS) { - auto StripCB = [&](Value *V) { return getUnderlyingObject(V); }; - SmallPtrSet<Value *, 8> SeenObjects; - auto VisitValueCB = [&SeenObjects](Value &Val, const Instruction *, - SmallVectorImpl<Value *> &Objects, - bool) -> bool { - if (SeenObjects.insert(&Val).second) - Objects.push_back(&Val); + AA::ValueScope S, + SmallPtrSetImpl<Value *> *SeenObjects) { + SmallPtrSet<Value *, 8> LocalSeenObjects; + if (!SeenObjects) + SeenObjects = &LocalSeenObjects; + + SmallVector<AA::ValueAndContext> Values; + if (!A.getAssumedSimplifiedValues(IRPosition::value(Ptr), &QueryingAA, Values, + S, UsedAssumedInformation)) { + Objects.insert(const_cast<Value *>(&Ptr)); return true; - }; - if (!genericValueTraversal<decltype(Objects)>( - A, IRPosition::value(Ptr), QueryingAA, Objects, VisitValueCB, CtxI, - UsedAssumedInformation, true, 32, StripCB, VS)) - return false; + } + + for (auto &VAC : Values) { + Value *UO = getUnderlyingObject(VAC.getValue()); + if (UO && UO != VAC.getValue() && SeenObjects->insert(UO).second) { + if (!getAssumedUnderlyingObjects(A, *UO, Objects, QueryingAA, + VAC.getCtxI(), UsedAssumedInformation, S, + SeenObjects)) + return false; + continue; + } + Objects.insert(VAC.getValue()); + } return true; } @@ -1122,9 +942,6 @@ struct AAPointerInfoImpl using BaseTy = StateWrapper<AA::PointerInfo::State, AAPointerInfo>; AAPointerInfoImpl(const IRPosition &IRP, Attributor &A) : BaseTy(IRP) {} - /// See AbstractAttribute::initialize(...). - void initialize(Attributor &A) override { AAPointerInfo::initialize(A); } - /// See AbstractAttribute::getAsStr(). const std::string getAsStr() const override { return std::string("PointerInfo ") + @@ -1144,9 +961,14 @@ struct AAPointerInfoImpl const override { return State::forallInterferingAccesses(OAS, CB); } - bool forallInterferingAccesses( - Attributor &A, const AbstractAttribute &QueryingAA, Instruction &I, - function_ref<bool(const Access &, bool)> UserCB) const override { + + bool + forallInterferingAccesses(Attributor &A, const AbstractAttribute &QueryingAA, + Instruction &I, + function_ref<bool(const Access &, bool)> UserCB, + bool &HasBeenWrittenTo) const override { + HasBeenWrittenTo = false; + SmallPtrSet<const Access *, 8> DominatingWrites; SmallVector<std::pair<const Access *, bool>, 8> InterferingAccesses; @@ -1182,14 +1004,12 @@ struct AAPointerInfoImpl const bool FindInterferingWrites = I.mayReadFromMemory(); const bool FindInterferingReads = I.mayWriteToMemory(); - const bool UseDominanceReasoning = FindInterferingWrites; + const bool UseDominanceReasoning = + FindInterferingWrites && NoRecurseAA.isKnownNoRecurse(); const bool CanUseCFGResoning = CanIgnoreThreading(I); InformationCache &InfoCache = A.getInfoCache(); const DominatorTree *DT = - NoRecurseAA.isKnownNoRecurse() && UseDominanceReasoning - ? InfoCache.getAnalysisResultForFunction<DominatorTreeAnalysis>( - Scope) - : nullptr; + InfoCache.getAnalysisResultForFunction<DominatorTreeAnalysis>(Scope); enum GPUAddressSpace : unsigned { Generic = 0, @@ -1246,22 +1066,17 @@ struct AAPointerInfoImpl (!FindInterferingReads || !Acc.isRead())) return true; + bool Dominates = DT && Exact && Acc.isMustAccess() && + (Acc.getLocalInst()->getFunction() == &Scope) && + DT->dominates(Acc.getRemoteInst(), &I); + if (FindInterferingWrites && Dominates) + HasBeenWrittenTo = true; + // For now we only filter accesses based on CFG reasoning which does not // work yet if we have threading effects, or the access is complicated. - if (CanUseCFGResoning) { - if ((!Acc.isWrite() || - !AA::isPotentiallyReachable(A, *Acc.getLocalInst(), I, QueryingAA, - IsLiveInCalleeCB)) && - (!Acc.isRead() || - !AA::isPotentiallyReachable(A, I, *Acc.getLocalInst(), QueryingAA, - IsLiveInCalleeCB))) - return true; - if (DT && Exact && (Acc.getLocalInst()->getFunction() == &Scope) && - IsSameThreadAsLoad(Acc)) { - if (DT->dominates(Acc.getLocalInst(), &I)) - DominatingWrites.insert(&Acc); - } - } + if (CanUseCFGResoning && Dominates && UseDominanceReasoning && + IsSameThreadAsLoad(Acc)) + DominatingWrites.insert(&Acc); InterferingAccesses.push_back({&Acc, Exact}); return true; @@ -1269,19 +1084,27 @@ struct AAPointerInfoImpl if (!State::forallInterferingAccesses(I, AccessCB)) return false; - // If we cannot use CFG reasoning we only filter the non-write accesses - // and are done here. - if (!CanUseCFGResoning) { - for (auto &It : InterferingAccesses) - if (!UserCB(*It.first, It.second)) - return false; - return true; + if (HasBeenWrittenTo) { + const Function *ScopePtr = &Scope; + IsLiveInCalleeCB = [ScopePtr](const Function &Fn) { + return ScopePtr != &Fn; + }; } // Helper to determine if we can skip a specific write access. This is in // the worst case quadratic as we are looking for another write that will // hide the effect of this one. auto CanSkipAccess = [&](const Access &Acc, bool Exact) { + if ((!Acc.isWrite() || + !AA::isPotentiallyReachable(A, *Acc.getLocalInst(), I, QueryingAA, + IsLiveInCalleeCB)) && + (!Acc.isRead() || + !AA::isPotentiallyReachable(A, I, *Acc.getLocalInst(), QueryingAA, + IsLiveInCalleeCB))) + return true; + + if (!DT || !UseDominanceReasoning) + return false; if (!IsSameThreadAsLoad(Acc)) return false; if (!DominatingWrites.count(&Acc)) @@ -1303,7 +1126,7 @@ struct AAPointerInfoImpl // succeeded for all or not. unsigned NumInterferingAccesses = InterferingAccesses.size(); for (auto &It : InterferingAccesses) { - if (!DT || NumInterferingAccesses > MaxInterferingAccesses || + if (NumInterferingAccesses > MaxInterferingAccesses || !CanSkipAccess(*It.first, It.second)) { if (!UserCB(*It.first, It.second)) return false; @@ -1339,8 +1162,9 @@ struct AAPointerInfoImpl if (FromCallee) { Content = A.translateArgumentToCallSiteContent( RAcc.getContent(), CB, *this, UsedAssumedInformation); - AK = AccessKind( - AK & (IsByval ? AccessKind::AK_READ : AccessKind::AK_READ_WRITE)); + AK = + AccessKind(AK & (IsByval ? AccessKind::AK_R : AccessKind::AK_RW)); + AK = AccessKind(AK | (RAcc.isMayAccess() ? AK_MAY : AK_MUST)); } Changed = Changed | addAccess(A, OAS.getOffset(), OAS.getSize(), CB, Content, @@ -1353,6 +1177,27 @@ struct AAPointerInfoImpl /// Statistic tracking for all AAPointerInfo implementations. /// See AbstractAttribute::trackStatistics(). void trackPointerInfoStatistics(const IRPosition &IRP) const {} + + /// Dump the state into \p O. + void dumpState(raw_ostream &O) { + for (auto &It : AccessBins) { + O << "[" << It.first.getOffset() << "-" + << It.first.getOffset() + It.first.getSize() + << "] : " << It.getSecond()->size() << "\n"; + for (auto &Acc : *It.getSecond()) { + O << " - " << Acc.getKind() << " - " << *Acc.getLocalInst() << "\n"; + if (Acc.getLocalInst() != Acc.getRemoteInst()) + O << " --> " << *Acc.getRemoteInst() + << "\n"; + if (!Acc.isWrittenValueYetUndetermined()) { + if (Acc.getWrittenValue()) + O << " - c: " << *Acc.getWrittenValue() << "\n"; + else + O << " - c: <unknown>\n"; + } + } + } + } }; struct AAPointerInfoFloating : public AAPointerInfoImpl { @@ -1360,9 +1205,6 @@ struct AAPointerInfoFloating : public AAPointerInfoImpl { AAPointerInfoFloating(const IRPosition &IRP, Attributor &A) : AAPointerInfoImpl(IRP, A) {} - /// See AbstractAttribute::initialize(...). - void initialize(Attributor &A) override { AAPointerInfoImpl::initialize(A); } - /// Deal with an access and signal if it was handled successfully. bool handleAccess(Attributor &A, Instruction &I, Value &Ptr, Optional<Value *> Content, AccessKind Kind, int64_t Offset, @@ -1460,7 +1302,7 @@ struct AAPointerInfoFloating : public AAPointerInfoImpl { Follow = true; return true; } - if (isa<CastInst>(Usr) || isa<SelectInst>(Usr)) + if (isa<CastInst>(Usr) || isa<SelectInst>(Usr) || isa<ReturnInst>(Usr)) return HandlePassthroughUser(Usr, OffsetInfoMap[CurPtr], Follow); // For PHIs we need to take care of the recurrence explicitly as the value @@ -1469,6 +1311,7 @@ struct AAPointerInfoFloating : public AAPointerInfoImpl { if (isa<PHINode>(Usr)) { // Note the order here, the Usr access might change the map, CurPtr is // already in it though. + bool IsFirstPHIUser = !OffsetInfoMap.count(Usr); OffsetInfo &UsrOI = OffsetInfoMap[Usr]; OffsetInfo &PtrOI = OffsetInfoMap[CurPtr]; // Check if the PHI is invariant (so far). @@ -1484,52 +1327,69 @@ struct AAPointerInfoFloating : public AAPointerInfoImpl { } // Check if the PHI operand is not dependent on the PHI itself. - // TODO: This is not great as we look at the pointer type. However, it - // is unclear where the Offset size comes from with typeless pointers. APInt Offset( DL.getIndexSizeInBits(CurPtr->getType()->getPointerAddressSpace()), 0); - if (&AssociatedValue == CurPtr->stripAndAccumulateConstantOffsets( - DL, Offset, /* AllowNonInbounds */ true)) { - if (Offset != PtrOI.Offset) { - LLVM_DEBUG(dbgs() - << "[AAPointerInfo] PHI operand pointer offset mismatch " - << *CurPtr << " in " << *Usr << "\n"); - return false; - } - return HandlePassthroughUser(Usr, PtrOI, Follow); + Value *CurPtrBase = CurPtr->stripAndAccumulateConstantOffsets( + DL, Offset, /* AllowNonInbounds */ true); + auto It = OffsetInfoMap.find(CurPtrBase); + if (It != OffsetInfoMap.end()) { + Offset += It->getSecond().Offset; + if (IsFirstPHIUser || Offset == UsrOI.Offset) + return HandlePassthroughUser(Usr, PtrOI, Follow); + LLVM_DEBUG(dbgs() + << "[AAPointerInfo] PHI operand pointer offset mismatch " + << *CurPtr << " in " << *Usr << "\n"); + } else { + LLVM_DEBUG(dbgs() << "[AAPointerInfo] PHI operand is too complex " + << *CurPtr << " in " << *Usr << "\n"); } // TODO: Approximate in case we know the direction of the recurrence. - LLVM_DEBUG(dbgs() << "[AAPointerInfo] PHI operand is too complex " - << *CurPtr << " in " << *Usr << "\n"); UsrOI = PtrOI; UsrOI.Offset = OffsetAndSize::Unknown; Follow = true; return true; } - if (auto *LoadI = dyn_cast<LoadInst>(Usr)) - return handleAccess(A, *LoadI, *CurPtr, /* Content */ nullptr, - AccessKind::AK_READ, OffsetInfoMap[CurPtr].Offset, - Changed, LoadI->getType()); + if (auto *LoadI = dyn_cast<LoadInst>(Usr)) { + // If the access is to a pointer that may or may not be the associated + // value, e.g. due to a PHI, we cannot assume it will be read. + AccessKind AK = AccessKind::AK_R; + if (getUnderlyingObject(CurPtr) == &AssociatedValue) + AK = AccessKind(AK | AccessKind::AK_MUST); + else + AK = AccessKind(AK | AccessKind::AK_MAY); + return handleAccess(A, *LoadI, *CurPtr, /* Content */ nullptr, AK, + OffsetInfoMap[CurPtr].Offset, Changed, + LoadI->getType()); + } + if (auto *StoreI = dyn_cast<StoreInst>(Usr)) { if (StoreI->getValueOperand() == CurPtr) { LLVM_DEBUG(dbgs() << "[AAPointerInfo] Escaping use in store " << *StoreI << "\n"); return false; } + // If the access is to a pointer that may or may not be the associated + // value, e.g. due to a PHI, we cannot assume it will be written. + AccessKind AK = AccessKind::AK_W; + if (getUnderlyingObject(CurPtr) == &AssociatedValue) + AK = AccessKind(AK | AccessKind::AK_MUST); + else + AK = AccessKind(AK | AccessKind::AK_MAY); bool UsedAssumedInformation = false; - Optional<Value *> Content = A.getAssumedSimplified( - *StoreI->getValueOperand(), *this, UsedAssumedInformation); - return handleAccess(A, *StoreI, *CurPtr, Content, AccessKind::AK_WRITE, + Optional<Value *> Content = + A.getAssumedSimplified(*StoreI->getValueOperand(), *this, + UsedAssumedInformation, AA::Interprocedural); + return handleAccess(A, *StoreI, *CurPtr, Content, AK, OffsetInfoMap[CurPtr].Offset, Changed, StoreI->getValueOperand()->getType()); } if (auto *CB = dyn_cast<CallBase>(Usr)) { if (CB->isLifetimeStartOrEnd()) return true; - if (TLI && isFreeCall(CB, TLI)) + if (getFreedOperand(CB, TLI) == U) return true; if (CB->isArgOperand(&U)) { unsigned ArgNo = CB->getArgOperandNo(&U); @@ -1539,7 +1399,7 @@ struct AAPointerInfoFloating : public AAPointerInfoImpl { Changed = translateAndAddState(A, CSArgPI, OffsetInfoMap[CurPtr].Offset, *CB) | Changed; - return true; + return isValidState(); } LLVM_DEBUG(dbgs() << "[AAPointerInfo] Call user not handled " << *CB << "\n"); @@ -1551,36 +1411,30 @@ struct AAPointerInfoFloating : public AAPointerInfoImpl { return false; }; auto EquivalentUseCB = [&](const Use &OldU, const Use &NewU) { - if (OffsetInfoMap.count(NewU)) + if (OffsetInfoMap.count(NewU)) { + LLVM_DEBUG({ + if (!(OffsetInfoMap[NewU] == OffsetInfoMap[OldU])) { + dbgs() << "[AAPointerInfo] Equivalent use callback failed: " + << OffsetInfoMap[NewU].Offset << " vs " + << OffsetInfoMap[OldU].Offset << "\n"; + } + }); return OffsetInfoMap[NewU] == OffsetInfoMap[OldU]; + } OffsetInfoMap[NewU] = OffsetInfoMap[OldU]; return true; }; if (!A.checkForAllUses(UsePred, *this, AssociatedValue, /* CheckBBLivenessOnly */ true, DepClassTy::OPTIONAL, - /* IgnoreDroppableUses */ true, EquivalentUseCB)) + /* IgnoreDroppableUses */ true, EquivalentUseCB)) { + LLVM_DEBUG( + dbgs() << "[AAPointerInfo] Check for all uses failed, abort!\n"); return indicatePessimisticFixpoint(); + } LLVM_DEBUG({ dbgs() << "Accesses by bin after update:\n"; - for (auto &It : AccessBins) { - dbgs() << "[" << It.first.getOffset() << "-" - << It.first.getOffset() + It.first.getSize() - << "] : " << It.getSecond()->size() << "\n"; - for (auto &Acc : *It.getSecond()) { - dbgs() << " - " << Acc.getKind() << " - " << *Acc.getLocalInst() - << "\n"; - if (Acc.getLocalInst() != Acc.getRemoteInst()) - dbgs() << " --> " - << *Acc.getRemoteInst() << "\n"; - if (!Acc.isWrittenValueYetUndetermined()) { - if (Acc.getWrittenValue()) - dbgs() << " - c: " << *Acc.getWrittenValue() << "\n"; - else - dbgs() << " - c: <unknown>\n"; - } - } - } + dumpState(dbgs()); }); return Changed; @@ -1643,16 +1497,22 @@ struct AAPointerInfoCallSiteArgument final : AAPointerInfoFloating { unsigned ArgNo = getIRPosition().getCallSiteArgNo(); ChangeStatus Changed = ChangeStatus::UNCHANGED; if (ArgNo == 0) { - handleAccess(A, *MI, Ptr, nullptr, AccessKind::AK_WRITE, 0, Changed, - nullptr, LengthVal); + handleAccess(A, *MI, Ptr, nullptr, AccessKind::AK_MUST_WRITE, 0, + Changed, nullptr, LengthVal); } else if (ArgNo == 1) { - handleAccess(A, *MI, Ptr, nullptr, AccessKind::AK_READ, 0, Changed, + handleAccess(A, *MI, Ptr, nullptr, AccessKind::AK_MUST_READ, 0, Changed, nullptr, LengthVal); } else { LLVM_DEBUG(dbgs() << "[AAPointerInfo] Unhandled memory intrinsic " << *MI << "\n"); return indicatePessimisticFixpoint(); } + + LLVM_DEBUG({ + dbgs() << "Accesses by bin after update:\n"; + dumpState(dbgs()); + }); + return Changed; } @@ -1954,23 +1814,23 @@ bool AAReturnedValuesImpl::checkForAllReturnedValuesAndReturnInsts( ChangeStatus AAReturnedValuesImpl::updateImpl(Attributor &A) { ChangeStatus Changed = ChangeStatus::UNCHANGED; - auto ReturnValueCB = [&](Value &V, const Instruction *CtxI, ReturnInst &Ret, - bool) -> bool { - assert(AA::isValidInScope(V, Ret.getFunction()) && - "Assumed returned value should be valid in function scope!"); - if (ReturnedValues[&V].insert(&Ret)) - Changed = ChangeStatus::CHANGED; - return true; - }; - + SmallVector<AA::ValueAndContext> Values; bool UsedAssumedInformation = false; auto ReturnInstCB = [&](Instruction &I) { ReturnInst &Ret = cast<ReturnInst>(I); - return genericValueTraversal<ReturnInst>( - A, IRPosition::value(*Ret.getReturnValue()), *this, Ret, ReturnValueCB, - &I, UsedAssumedInformation, /* UseValueSimplify */ true, - /* MaxValues */ 16, - /* StripCB */ nullptr, AA::Intraprocedural); + Values.clear(); + if (!A.getAssumedSimplifiedValues(IRPosition::value(*Ret.getReturnValue()), + *this, Values, AA::Intraprocedural, + UsedAssumedInformation)) + Values.push_back({*Ret.getReturnValue(), Ret}); + + for (auto &VAC : Values) { + assert(AA::isValidInScope(*VAC.getValue(), Ret.getFunction()) && + "Assumed returned value should be valid in function scope!"); + if (ReturnedValues[VAC.getValue()].insert(&Ret)) + Changed = ChangeStatus::CHANGED; + } + return true; }; // Discover returned values from all live returned instructions in the @@ -2472,6 +2332,18 @@ struct AANonNullFloating : public AANonNullImpl { ChangeStatus updateImpl(Attributor &A) override { const DataLayout &DL = A.getDataLayout(); + bool Stripped; + bool UsedAssumedInformation = false; + SmallVector<AA::ValueAndContext> Values; + if (!A.getAssumedSimplifiedValues(getIRPosition(), *this, Values, + AA::AnyScope, UsedAssumedInformation)) { + Values.push_back({getAssociatedValue(), getCtxI()}); + Stripped = false; + } else { + Stripped = Values.size() != 1 || + Values.front().getValue() != &getAssociatedValue(); + } + DominatorTree *DT = nullptr; AssumptionCache *AC = nullptr; InformationCache &InfoCache = A.getInfoCache(); @@ -2480,8 +2352,8 @@ struct AANonNullFloating : public AANonNullImpl { AC = InfoCache.getAnalysisResultForFunction<AssumptionAnalysis>(*Fn); } - auto VisitValueCB = [&](Value &V, const Instruction *CtxI, - AANonNull::StateType &T, bool Stripped) -> bool { + AANonNull::StateType T; + auto VisitValueCB = [&](Value &V, const Instruction *CtxI) -> bool { const auto &AA = A.getAAFor<AANonNull>(*this, IRPosition::value(V), DepClassTy::REQUIRED); if (!Stripped && this == &AA) { @@ -2495,12 +2367,9 @@ struct AANonNullFloating : public AANonNullImpl { return T.isValidState(); }; - StateType T; - bool UsedAssumedInformation = false; - if (!genericValueTraversal<StateType>(A, getIRPosition(), *this, T, - VisitValueCB, getCtxI(), - UsedAssumedInformation)) - return indicatePessimisticFixpoint(); + for (const auto &VAC : Values) + if (!VisitValueCB(*VAC.getValue(), VAC.getCtxI())) + return indicatePessimisticFixpoint(); return clampStateAndIndicateChange(getState(), T); } @@ -2753,8 +2622,9 @@ struct AAUndefinedBehaviorImpl : public AAUndefinedBehavior { if (!NoUndefAA.isKnownNoUndef()) continue; bool UsedAssumedInformation = false; - Optional<Value *> SimplifiedVal = A.getAssumedSimplified( - IRPosition::value(*ArgVal), *this, UsedAssumedInformation); + Optional<Value *> SimplifiedVal = + A.getAssumedSimplified(IRPosition::value(*ArgVal), *this, + UsedAssumedInformation, AA::Interprocedural); if (UsedAssumedInformation) continue; if (SimplifiedVal && !SimplifiedVal.value()) @@ -2925,8 +2795,9 @@ private: Optional<Value *> stopOnUndefOrAssumed(Attributor &A, Value *V, Instruction *I) { bool UsedAssumedInformation = false; - Optional<Value *> SimplifiedV = A.getAssumedSimplified( - IRPosition::value(*V), *this, UsedAssumedInformation); + Optional<Value *> SimplifiedV = + A.getAssumedSimplified(IRPosition::value(*V), *this, + UsedAssumedInformation, AA::Interprocedural); if (!UsedAssumedInformation) { // Don't depend on assumed values. if (!SimplifiedV) { @@ -3369,7 +3240,9 @@ struct AANoAliasCallSiteArgument final : AANoAliasImpl { } } - if (!AA::isPotentiallyReachable(A, *UserI, *getCtxI(), *this)) + if (!AA::isPotentiallyReachable( + A, *UserI, *getCtxI(), *this, + [ScopeFn](const Function &Fn) { return &Fn != ScopeFn; })) return true; } @@ -4364,10 +4237,23 @@ struct AADereferenceableFloating : AADereferenceableImpl { /// See AbstractAttribute::updateImpl(...). ChangeStatus updateImpl(Attributor &A) override { + + bool Stripped; + bool UsedAssumedInformation = false; + SmallVector<AA::ValueAndContext> Values; + if (!A.getAssumedSimplifiedValues(getIRPosition(), *this, Values, + AA::AnyScope, UsedAssumedInformation)) { + Values.push_back({getAssociatedValue(), getCtxI()}); + Stripped = false; + } else { + Stripped = Values.size() != 1 || + Values.front().getValue() != &getAssociatedValue(); + } + const DataLayout &DL = A.getDataLayout(); + DerefState T; - auto VisitValueCB = [&](const Value &V, const Instruction *, DerefState &T, - bool Stripped) -> bool { + auto VisitValueCB = [&](const Value &V) -> bool { unsigned IdxWidth = DL.getIndexSizeInBits(V.getType()->getPointerAddressSpace()); APInt Offset(IdxWidth, 0); @@ -4420,12 +4306,9 @@ struct AADereferenceableFloating : AADereferenceableImpl { return T.isValidState(); }; - DerefState T; - bool UsedAssumedInformation = false; - if (!genericValueTraversal<DerefState>(A, getIRPosition(), *this, T, - VisitValueCB, getCtxI(), - UsedAssumedInformation)) - return indicatePessimisticFixpoint(); + for (const auto &VAC : Values) + if (!VisitValueCB(*VAC.getValue())) + return indicatePessimisticFixpoint(); return clampStateAndIndicateChange(getState(), T); } @@ -4652,8 +4535,20 @@ struct AAAlignFloating : AAAlignImpl { ChangeStatus updateImpl(Attributor &A) override { const DataLayout &DL = A.getDataLayout(); - auto VisitValueCB = [&](Value &V, const Instruction *, - AAAlign::StateType &T, bool Stripped) -> bool { + bool Stripped; + bool UsedAssumedInformation = false; + SmallVector<AA::ValueAndContext> Values; + if (!A.getAssumedSimplifiedValues(getIRPosition(), *this, Values, + AA::AnyScope, UsedAssumedInformation)) { + Values.push_back({getAssociatedValue(), getCtxI()}); + Stripped = false; + } else { + Stripped = Values.size() != 1 || + Values.front().getValue() != &getAssociatedValue(); + } + + StateType T; + auto VisitValueCB = [&](Value &V) -> bool { if (isa<UndefValue>(V) || isa<ConstantPointerNull>(V)) return true; const auto &AA = A.getAAFor<AAAlign>(*this, IRPosition::value(V), @@ -4686,15 +4581,13 @@ struct AAAlignFloating : AAAlignImpl { return T.isValidState(); }; - StateType T; - bool UsedAssumedInformation = false; - if (!genericValueTraversal<StateType>(A, getIRPosition(), *this, T, - VisitValueCB, getCtxI(), - UsedAssumedInformation)) - return indicatePessimisticFixpoint(); + for (const auto &VAC : Values) { + if (!VisitValueCB(*VAC.getValue())) + return indicatePessimisticFixpoint(); + } - // TODO: If we know we visited all incoming values, thus no are assumed - // dead, we can take the known information from the state T. + // TODO: If we know we visited all incoming values, thus no are assumed + // dead, we can take the known information from the state T. return clampStateAndIndicateChange(getState(), T); } @@ -4941,7 +4834,9 @@ struct AAInstanceInfoImpl : public AAInstanceInfo { return false; // If this call base might reach the scope again we might forward the // argument back here. This is very conservative. - if (AA::isPotentiallyReachable(A, *CB, *Scope, *this, nullptr)) + if (AA::isPotentiallyReachable( + A, *CB, *Scope, *this, + [Scope](const Function &Fn) { return &Fn != Scope; })) return false; return true; } @@ -5518,9 +5413,9 @@ struct AAValueSimplifyImpl : AAValueSimplify { if (const auto &NewV = VMap.lookup(&V)) return NewV; bool UsedAssumedInformation = false; - Optional<Value *> SimpleV = - A.getAssumedSimplified(V, QueryingAA, UsedAssumedInformation); - if (!SimpleV) + Optional<Value *> SimpleV = A.getAssumedSimplified( + V, QueryingAA, UsedAssumedInformation, AA::Interprocedural); + if (!SimpleV.has_value()) return PoisonValue::get(&Ty); Value *EffectiveV = &V; if (SimpleV.value()) @@ -5561,8 +5456,8 @@ struct AAValueSimplifyImpl : AAValueSimplify { bool UsedAssumedInformation = false; Optional<Value *> QueryingValueSimplified = &IRP.getAssociatedValue(); if (Simplify) - QueryingValueSimplified = - A.getAssumedSimplified(IRP, QueryingAA, UsedAssumedInformation); + QueryingValueSimplified = A.getAssumedSimplified( + IRP, QueryingAA, UsedAssumedInformation, AA::Interprocedural); return unionAssumed(QueryingValueSimplified); } @@ -5763,209 +5658,11 @@ struct AAValueSimplifyFloating : AAValueSimplifyImpl { indicatePessimisticFixpoint(); } - /// Check if \p Cmp is a comparison we can simplify. - /// - /// We handle multiple cases, one in which at least one operand is an - /// (assumed) nullptr. If so, try to simplify it using AANonNull on the other - /// operand. Return true if successful, in that case SimplifiedAssociatedValue - /// will be updated. - bool handleCmp(Attributor &A, CmpInst &Cmp) { - auto Union = [&](Value &V) { - SimplifiedAssociatedValue = AA::combineOptionalValuesInAAValueLatice( - SimplifiedAssociatedValue, &V, V.getType()); - return SimplifiedAssociatedValue != Optional<Value *>(nullptr); - }; - - Value *LHS = Cmp.getOperand(0); - Value *RHS = Cmp.getOperand(1); - - // Simplify the operands first. - bool UsedAssumedInformation = false; - const auto &SimplifiedLHS = - A.getAssumedSimplified(IRPosition::value(*LHS, getCallBaseContext()), - *this, UsedAssumedInformation); - if (!SimplifiedLHS) - return true; - if (!SimplifiedLHS.value()) - return false; - LHS = *SimplifiedLHS; - - const auto &SimplifiedRHS = - A.getAssumedSimplified(IRPosition::value(*RHS, getCallBaseContext()), - *this, UsedAssumedInformation); - if (!SimplifiedRHS) - return true; - if (!SimplifiedRHS.value()) - return false; - RHS = *SimplifiedRHS; - - LLVMContext &Ctx = Cmp.getContext(); - // Handle the trivial case first in which we don't even need to think about - // null or non-null. - if (LHS == RHS && (Cmp.isTrueWhenEqual() || Cmp.isFalseWhenEqual())) { - Constant *NewVal = - ConstantInt::get(Type::getInt1Ty(Ctx), Cmp.isTrueWhenEqual()); - if (!Union(*NewVal)) - return false; - if (!UsedAssumedInformation) - indicateOptimisticFixpoint(); - return true; - } - - // From now on we only handle equalities (==, !=). - ICmpInst *ICmp = dyn_cast<ICmpInst>(&Cmp); - if (!ICmp || !ICmp->isEquality()) - return false; - - bool LHSIsNull = isa<ConstantPointerNull>(LHS); - bool RHSIsNull = isa<ConstantPointerNull>(RHS); - if (!LHSIsNull && !RHSIsNull) - return false; - - // Left is the nullptr ==/!= non-nullptr case. We'll use AANonNull on the - // non-nullptr operand and if we assume it's non-null we can conclude the - // result of the comparison. - assert((LHSIsNull || RHSIsNull) && - "Expected nullptr versus non-nullptr comparison at this point"); - - // The index is the operand that we assume is not null. - unsigned PtrIdx = LHSIsNull; - auto &PtrNonNullAA = A.getAAFor<AANonNull>( - *this, IRPosition::value(*ICmp->getOperand(PtrIdx)), - DepClassTy::REQUIRED); - if (!PtrNonNullAA.isAssumedNonNull()) - return false; - UsedAssumedInformation |= !PtrNonNullAA.isKnownNonNull(); - - // The new value depends on the predicate, true for != and false for ==. - Constant *NewVal = ConstantInt::get( - Type::getInt1Ty(Ctx), ICmp->getPredicate() == CmpInst::ICMP_NE); - if (!Union(*NewVal)) - return false; - - if (!UsedAssumedInformation) - indicateOptimisticFixpoint(); - - return true; - } - - /// Use the generic, non-optimistic InstSimplfy functionality if we managed to - /// simplify any operand of the instruction \p I. Return true if successful, - /// in that case SimplifiedAssociatedValue will be updated. - bool handleGenericInst(Attributor &A, Instruction &I) { - bool SomeSimplified = false; - bool UsedAssumedInformation = false; - - SmallVector<Value *, 8> NewOps(I.getNumOperands()); - int Idx = 0; - for (Value *Op : I.operands()) { - const auto &SimplifiedOp = - A.getAssumedSimplified(IRPosition::value(*Op, getCallBaseContext()), - *this, UsedAssumedInformation); - // If we are not sure about any operand we are not sure about the entire - // instruction, we'll wait. - if (!SimplifiedOp) - return true; - - if (SimplifiedOp.value()) - NewOps[Idx] = SimplifiedOp.value(); - else - NewOps[Idx] = Op; - - SomeSimplified |= (NewOps[Idx] != Op); - ++Idx; - } - - // We won't bother with the InstSimplify interface if we didn't simplify any - // operand ourselves. - if (!SomeSimplified) - return false; - - InformationCache &InfoCache = A.getInfoCache(); - Function *F = I.getFunction(); - const auto *DT = - InfoCache.getAnalysisResultForFunction<DominatorTreeAnalysis>(*F); - const auto *TLI = A.getInfoCache().getTargetLibraryInfoForFunction(*F); - auto *AC = InfoCache.getAnalysisResultForFunction<AssumptionAnalysis>(*F); - OptimizationRemarkEmitter *ORE = nullptr; - - const DataLayout &DL = I.getModule()->getDataLayout(); - SimplifyQuery Q(DL, TLI, DT, AC, &I); - if (Value *SimplifiedI = - simplifyInstructionWithOperands(&I, NewOps, Q, ORE)) { - SimplifiedAssociatedValue = AA::combineOptionalValuesInAAValueLatice( - SimplifiedAssociatedValue, SimplifiedI, I.getType()); - return SimplifiedAssociatedValue != Optional<Value *>(nullptr); - } - return false; - } - /// See AbstractAttribute::updateImpl(...). ChangeStatus updateImpl(Attributor &A) override { auto Before = SimplifiedAssociatedValue; - - // Do not simplify loads that are only used in llvm.assume if we cannot also - // remove all stores that may feed into the load. The reason is that the - // assume is probably worth something as long as the stores are around. - if (auto *LI = dyn_cast<LoadInst>(&getAssociatedValue())) { - InformationCache &InfoCache = A.getInfoCache(); - if (InfoCache.isOnlyUsedByAssume(*LI)) { - SmallSetVector<Value *, 4> PotentialCopies; - SmallSetVector<Instruction *, 4> PotentialValueOrigins; - bool UsedAssumedInformation = false; - if (AA::getPotentiallyLoadedValues(A, *LI, PotentialCopies, - PotentialValueOrigins, *this, - UsedAssumedInformation, - /* OnlyExact */ true)) { - if (!llvm::all_of(PotentialValueOrigins, [&](Instruction *I) { - if (!I) - return true; - if (auto *SI = dyn_cast<StoreInst>(I)) - return A.isAssumedDead(SI->getOperandUse(0), this, - /* LivenessAA */ nullptr, - UsedAssumedInformation, - /* CheckBBLivenessOnly */ false); - return A.isAssumedDead(*I, this, /* LivenessAA */ nullptr, - UsedAssumedInformation, - /* CheckBBLivenessOnly */ false); - })) - return indicatePessimisticFixpoint(); - } - } - } - - auto VisitValueCB = [&](Value &V, const Instruction *CtxI, bool &, - bool Stripped) -> bool { - auto &AA = A.getAAFor<AAValueSimplify>( - *this, IRPosition::value(V, getCallBaseContext()), - DepClassTy::REQUIRED); - if (!Stripped && this == &AA) { - - if (auto *I = dyn_cast<Instruction>(&V)) { - if (auto *Cmp = dyn_cast<CmpInst>(&V)) - if (handleCmp(A, *Cmp)) - return true; - if (handleGenericInst(A, *I)) - return true; - } - // TODO: Look the instruction and check recursively. - - LLVM_DEBUG(dbgs() << "[ValueSimplify] Can't be stripped more : " << V - << "\n"); - return false; - } - return checkAndUpdate(A, *this, - IRPosition::value(V, getCallBaseContext())); - }; - - bool Dummy = false; - bool UsedAssumedInformation = false; - if (!genericValueTraversal<bool>(A, getIRPosition(), *this, Dummy, - VisitValueCB, getCtxI(), - UsedAssumedInformation, - /* UseValueSimplify */ false)) - if (!askSimplifiedValueForOtherAAs(A)) - return indicatePessimisticFixpoint(); + if (!askSimplifiedValueForOtherAAs(A)) + return indicatePessimisticFixpoint(); // If a candicate was found in this update, return CHANGED. return Before == SimplifiedAssociatedValue ? ChangeStatus::UNCHANGED @@ -6122,6 +5819,8 @@ struct AAHeapToStackFunction final : public AAHeapToStack { struct DeallocationInfo { /// The call that deallocates the memory. CallBase *const CB; + /// The value freed by the call. + Value *FreedOp; /// Flag to indicate if we don't know all objects this deallocation might /// free. @@ -6153,14 +5852,14 @@ struct AAHeapToStackFunction final : public AAHeapToStack { CallBase *CB = dyn_cast<CallBase>(&I); if (!CB) return true; - if (isFreeCall(CB, TLI)) { - DeallocationInfos[CB] = new (A.Allocator) DeallocationInfo{CB}; + if (Value *FreedOp = getFreedOperand(CB, TLI)) { + DeallocationInfos[CB] = new (A.Allocator) DeallocationInfo{CB, FreedOp}; return true; } // To do heap to stack, we need to know that the allocation itself is // removable once uses are rewritten, and that we can initialize the // alloca to the same pattern as the original allocation result. - if (isAllocationFn(CB, TLI) && isAllocRemovable(CB, TLI)) { + if (isRemovableAlloc(CB, TLI)) { auto *I8Ty = Type::getInt8Ty(CB->getParent()->getContext()); if (nullptr != getInitialValueOfAllocation(CB, TLI, I8Ty)) { AllocationInfo *AI = new (A.Allocator) AllocationInfo{CB}; @@ -6427,44 +6126,36 @@ ChangeStatus AAHeapToStackFunction::updateImpl(Attributor &A) { /* CheckBBLivenessOnly */ true)) continue; - // Use the optimistic version to get the freed objects, ignoring dead - // branches etc. - SmallVector<Value *, 8> Objects; - if (!AA::getAssumedUnderlyingObjects(A, *DI.CB->getArgOperand(0), Objects, - *this, DI.CB, - UsedAssumedInformation)) { - LLVM_DEBUG( - dbgs() - << "[H2S] Unexpected failure in getAssumedUnderlyingObjects!\n"); + // Use the non-optimistic version to get the freed object. + Value *Obj = getUnderlyingObject(DI.FreedOp); + if (!Obj) { + LLVM_DEBUG(dbgs() << "[H2S] Unknown underlying object for free!\n"); DI.MightFreeUnknownObjects = true; continue; } - // Check each object explicitly. - for (auto *Obj : Objects) { - // Free of null and undef can be ignored as no-ops (or UB in the latter - // case). - if (isa<ConstantPointerNull>(Obj) || isa<UndefValue>(Obj)) - continue; - - CallBase *ObjCB = dyn_cast<CallBase>(Obj); - if (!ObjCB) { - LLVM_DEBUG(dbgs() - << "[H2S] Free of a non-call object: " << *Obj << "\n"); - DI.MightFreeUnknownObjects = true; - continue; - } + // Free of null and undef can be ignored as no-ops (or UB in the latter + // case). + if (isa<ConstantPointerNull>(Obj) || isa<UndefValue>(Obj)) + continue; - AllocationInfo *AI = AllocationInfos.lookup(ObjCB); - if (!AI) { - LLVM_DEBUG(dbgs() << "[H2S] Free of a non-allocation object: " << *Obj - << "\n"); - DI.MightFreeUnknownObjects = true; - continue; - } + CallBase *ObjCB = dyn_cast<CallBase>(Obj); + if (!ObjCB) { + LLVM_DEBUG(dbgs() << "[H2S] Free of a non-call object: " << *Obj + << "\n"); + DI.MightFreeUnknownObjects = true; + continue; + } - DI.PotentialAllocationCalls.insert(ObjCB); + AllocationInfo *AI = AllocationInfos.lookup(ObjCB); + if (!AI) { + LLVM_DEBUG(dbgs() << "[H2S] Free of a non-allocation object: " << *Obj + << "\n"); + DI.MightFreeUnknownObjects = true; + continue; } + + DI.PotentialAllocationCalls.insert(ObjCB); } }; @@ -7692,7 +7383,7 @@ bool AAMemoryBehaviorFloating::followUsersOfUseIn(Attributor &A, const Use &U, const Instruction *UserI) { // The loaded value is unrelated to the pointer argument, no need to // follow the users of the load. - if (isa<LoadInst>(UserI)) + if (isa<LoadInst>(UserI) || isa<ReturnInst>(UserI)) return false; // By default we follow all uses assuming UserI might leak information on U, @@ -7822,16 +7513,15 @@ struct AAMemoryLocationImpl : public AAMemoryLocation { AAMemoryLocationImpl(const IRPosition &IRP, Attributor &A) : AAMemoryLocation(IRP, A), Allocator(A.Allocator) { - for (unsigned u = 0; u < llvm::CTLog2<VALID_STATE>(); ++u) - AccessKind2Accesses[u] = nullptr; + AccessKind2Accesses.fill(nullptr); } ~AAMemoryLocationImpl() { // The AccessSets are allocated via a BumpPtrAllocator, we call // the destructor manually. - for (unsigned u = 0; u < llvm::CTLog2<VALID_STATE>(); ++u) - if (AccessKind2Accesses[u]) - AccessKind2Accesses[u]->~AccessSet(); + for (AccessSet *AS : AccessKind2Accesses) + if (AS) + AS->~AccessSet(); } /// See AbstractAttribute::initialize(...). @@ -7999,7 +7689,7 @@ protected: /// Mapping from *single* memory location kinds, e.g., LOCAL_MEM with the /// value of NO_LOCAL_MEM, to the accesses encountered for this memory kind. using AccessSet = SmallSet<AccessInfo, 2, AccessInfo>; - AccessSet *AccessKind2Accesses[llvm::CTLog2<VALID_STATE>()]; + std::array<AccessSet *, llvm::CTLog2<VALID_STATE>()> AccessKind2Accesses; /// Categorize the pointer arguments of CB that might access memory in /// AccessedLoc and update the state and access map accordingly. @@ -8061,7 +7751,7 @@ void AAMemoryLocationImpl::categorizePtrValue( << Ptr << " [" << getMemoryLocationsAsStr(State.getAssumed()) << "]\n"); - SmallVector<Value *, 8> Objects; + SmallSetVector<Value *, 8> Objects; bool UsedAssumedInformation = false; if (!AA::getAssumedUnderlyingObjects(A, Ptr, Objects, *this, &I, UsedAssumedInformation, @@ -8670,19 +8360,19 @@ struct AAValueConstantRangeFloating : AAValueConstantRangeImpl { // Simplify the operands first. bool UsedAssumedInformation = false; - const auto &SimplifiedLHS = - A.getAssumedSimplified(IRPosition::value(*LHS, getCallBaseContext()), - *this, UsedAssumedInformation); - if (!SimplifiedLHS) + const auto &SimplifiedLHS = A.getAssumedSimplified( + IRPosition::value(*LHS, getCallBaseContext()), *this, + UsedAssumedInformation, AA::Interprocedural); + if (!SimplifiedLHS.has_value()) return true; if (!SimplifiedLHS.value()) return false; LHS = *SimplifiedLHS; - const auto &SimplifiedRHS = - A.getAssumedSimplified(IRPosition::value(*RHS, getCallBaseContext()), - *this, UsedAssumedInformation); - if (!SimplifiedRHS) + const auto &SimplifiedRHS = A.getAssumedSimplified( + IRPosition::value(*RHS, getCallBaseContext()), *this, + UsedAssumedInformation, AA::Interprocedural); + if (!SimplifiedRHS.has_value()) return true; if (!SimplifiedRHS.value()) return false; @@ -8723,10 +8413,10 @@ struct AAValueConstantRangeFloating : AAValueConstantRangeImpl { // Simplify the operand first. bool UsedAssumedInformation = false; - const auto &SimplifiedOpV = - A.getAssumedSimplified(IRPosition::value(*OpV, getCallBaseContext()), - *this, UsedAssumedInformation); - if (!SimplifiedOpV) + const auto &SimplifiedOpV = A.getAssumedSimplified( + IRPosition::value(*OpV, getCallBaseContext()), *this, + UsedAssumedInformation, AA::Interprocedural); + if (!SimplifiedOpV.has_value()) return true; if (!SimplifiedOpV.value()) return false; @@ -8753,19 +8443,19 @@ struct AAValueConstantRangeFloating : AAValueConstantRangeImpl { // Simplify the operands first. bool UsedAssumedInformation = false; - const auto &SimplifiedLHS = - A.getAssumedSimplified(IRPosition::value(*LHS, getCallBaseContext()), - *this, UsedAssumedInformation); - if (!SimplifiedLHS) + const auto &SimplifiedLHS = A.getAssumedSimplified( + IRPosition::value(*LHS, getCallBaseContext()), *this, + UsedAssumedInformation, AA::Interprocedural); + if (!SimplifiedLHS.has_value()) return true; if (!SimplifiedLHS.value()) return false; LHS = *SimplifiedLHS; - const auto &SimplifiedRHS = - A.getAssumedSimplified(IRPosition::value(*RHS, getCallBaseContext()), - *this, UsedAssumedInformation); - if (!SimplifiedRHS) + const auto &SimplifiedRHS = A.getAssumedSimplified( + IRPosition::value(*RHS, getCallBaseContext()), *this, + UsedAssumedInformation, AA::Interprocedural); + if (!SimplifiedRHS.has_value()) return true; if (!SimplifiedRHS.value()) return false; @@ -8820,17 +8510,18 @@ struct AAValueConstantRangeFloating : AAValueConstantRangeImpl { /// See AbstractAttribute::updateImpl(...). ChangeStatus updateImpl(Attributor &A) override { - auto VisitValueCB = [&](Value &V, const Instruction *CtxI, - IntegerRangeState &T, bool Stripped) -> bool { + + IntegerRangeState T(getBitWidth()); + auto VisitValueCB = [&](Value &V, const Instruction *CtxI) -> bool { Instruction *I = dyn_cast<Instruction>(&V); if (!I || isa<CallBase>(I)) { // Simplify the operand first. bool UsedAssumedInformation = false; - const auto &SimplifiedOpV = - A.getAssumedSimplified(IRPosition::value(V, getCallBaseContext()), - *this, UsedAssumedInformation); - if (!SimplifiedOpV) + const auto &SimplifiedOpV = A.getAssumedSimplified( + IRPosition::value(V, getCallBaseContext()), *this, + UsedAssumedInformation, AA::Interprocedural); + if (!SimplifiedOpV.has_value()) return true; if (!SimplifiedOpV.value()) return false; @@ -8880,13 +8571,7 @@ struct AAValueConstantRangeFloating : AAValueConstantRangeImpl { return T.isValidState(); }; - IntegerRangeState T(getBitWidth()); - - bool UsedAssumedInformation = false; - if (!genericValueTraversal<IntegerRangeState>(A, getIRPosition(), *this, T, - VisitValueCB, getCtxI(), - UsedAssumedInformation, - /* UseValueSimplify */ false)) + if (!VisitValueCB(getAssociatedValue(), getCtxI())) return indicatePessimisticFixpoint(); // Ensure that long def-use chains can't cause circular reasoning either by @@ -8998,6 +8683,36 @@ struct AAPotentialConstantValuesImpl : AAPotentialConstantValues { AAPotentialConstantValues::initialize(A); } + bool fillSetWithConstantValues(Attributor &A, const IRPosition &IRP, SetTy &S, + bool &ContainsUndef) { + SmallVector<AA::ValueAndContext> Values; + bool UsedAssumedInformation = false; + if (!A.getAssumedSimplifiedValues(IRP, *this, Values, AA::Interprocedural, + UsedAssumedInformation)) { + if (!IRP.getAssociatedType()->isIntegerTy()) + return false; + auto &PotentialValuesAA = A.getAAFor<AAPotentialConstantValues>( + *this, IRP, DepClassTy::REQUIRED); + if (!PotentialValuesAA.getState().isValidState()) + return false; + ContainsUndef = PotentialValuesAA.getState().undefIsContained(); + S = PotentialValuesAA.getState().getAssumedSet(); + return true; + } + + for (auto &It : Values) { + if (isa<UndefValue>(It.getValue())) + continue; + auto *CI = dyn_cast<ConstantInt>(It.getValue()); + if (!CI) + return false; + S.insert(CI->getValue()); + } + ContainsUndef = S.empty(); + + return true; + } + /// See AbstractAttribute::getAsStr(). const std::string getAsStr() const override { std::string Str; @@ -9186,50 +8901,22 @@ struct AAPotentialConstantValuesFloating : AAPotentialConstantValuesImpl { Value *LHS = ICI->getOperand(0); Value *RHS = ICI->getOperand(1); - // Simplify the operands first. - bool UsedAssumedInformation = false; - const auto &SimplifiedLHS = - A.getAssumedSimplified(IRPosition::value(*LHS, getCallBaseContext()), - *this, UsedAssumedInformation); - if (!SimplifiedLHS) - return ChangeStatus::UNCHANGED; - if (!SimplifiedLHS.value()) - return indicatePessimisticFixpoint(); - LHS = *SimplifiedLHS; - - const auto &SimplifiedRHS = - A.getAssumedSimplified(IRPosition::value(*RHS, getCallBaseContext()), - *this, UsedAssumedInformation); - if (!SimplifiedRHS) - return ChangeStatus::UNCHANGED; - if (!SimplifiedRHS.value()) - return indicatePessimisticFixpoint(); - RHS = *SimplifiedRHS; - - if (!LHS->getType()->isIntegerTy() || !RHS->getType()->isIntegerTy()) - return indicatePessimisticFixpoint(); - - auto &LHSAA = A.getAAFor<AAPotentialConstantValues>( - *this, IRPosition::value(*LHS), DepClassTy::REQUIRED); - if (!LHSAA.isValidState()) + bool LHSContainsUndef = false, RHSContainsUndef = false; + SetTy LHSAAPVS, RHSAAPVS; + if (!fillSetWithConstantValues(A, IRPosition::value(*LHS), LHSAAPVS, + LHSContainsUndef) || + !fillSetWithConstantValues(A, IRPosition::value(*RHS), RHSAAPVS, + RHSContainsUndef)) return indicatePessimisticFixpoint(); - auto &RHSAA = A.getAAFor<AAPotentialConstantValues>( - *this, IRPosition::value(*RHS), DepClassTy::REQUIRED); - if (!RHSAA.isValidState()) - return indicatePessimisticFixpoint(); - - const SetTy &LHSAAPVS = LHSAA.getAssumedSet(); - const SetTy &RHSAAPVS = RHSAA.getAssumedSet(); - // TODO: make use of undef flag to limit potential values aggressively. bool MaybeTrue = false, MaybeFalse = false; const APInt Zero(RHS->getType()->getIntegerBitWidth(), 0); - if (LHSAA.undefIsContained() && RHSAA.undefIsContained()) { + if (LHSContainsUndef && RHSContainsUndef) { // The result of any comparison between undefs can be soundly replaced // with undef. unionAssumedWithUndef(); - } else if (LHSAA.undefIsContained()) { + } else if (LHSContainsUndef) { for (const APInt &R : RHSAAPVS) { bool CmpResult = calculateICmpInst(ICI, Zero, R); MaybeTrue |= CmpResult; @@ -9237,7 +8924,7 @@ struct AAPotentialConstantValuesFloating : AAPotentialConstantValuesImpl { if (MaybeTrue & MaybeFalse) return indicatePessimisticFixpoint(); } - } else if (RHSAA.undefIsContained()) { + } else if (RHSContainsUndef) { for (const APInt &L : LHSAAPVS) { bool CmpResult = calculateICmpInst(ICI, L, Zero); MaybeTrue |= CmpResult; @@ -9269,29 +8956,7 @@ struct AAPotentialConstantValuesFloating : AAPotentialConstantValuesImpl { Value *LHS = SI->getTrueValue(); Value *RHS = SI->getFalseValue(); - // Simplify the operands first. bool UsedAssumedInformation = false; - const auto &SimplifiedLHS = - A.getAssumedSimplified(IRPosition::value(*LHS, getCallBaseContext()), - *this, UsedAssumedInformation); - if (!SimplifiedLHS) - return ChangeStatus::UNCHANGED; - if (!SimplifiedLHS.value()) - return indicatePessimisticFixpoint(); - LHS = *SimplifiedLHS; - - const auto &SimplifiedRHS = - A.getAssumedSimplified(IRPosition::value(*RHS, getCallBaseContext()), - *this, UsedAssumedInformation); - if (!SimplifiedRHS) - return ChangeStatus::UNCHANGED; - if (!SimplifiedRHS.value()) - return indicatePessimisticFixpoint(); - RHS = *SimplifiedRHS; - - if (!LHS->getType()->isIntegerTy() || !RHS->getType()->isIntegerTy()) - return indicatePessimisticFixpoint(); - Optional<Constant *> C = A.getAssumedConstant(*SI->getCondition(), *this, UsedAssumedInformation); @@ -9302,35 +8967,36 @@ struct AAPotentialConstantValuesFloating : AAPotentialConstantValuesImpl { else if (C && *C && (*C)->isZeroValue()) OnlyRight = true; - const AAPotentialConstantValues *LHSAA = nullptr, *RHSAA = nullptr; - if (!OnlyRight) { - LHSAA = &A.getAAFor<AAPotentialConstantValues>( - *this, IRPosition::value(*LHS), DepClassTy::REQUIRED); - if (!LHSAA->isValidState()) - return indicatePessimisticFixpoint(); - } - if (!OnlyLeft) { - RHSAA = &A.getAAFor<AAPotentialConstantValues>( - *this, IRPosition::value(*RHS), DepClassTy::REQUIRED); - if (!RHSAA->isValidState()) - return indicatePessimisticFixpoint(); - } + bool LHSContainsUndef = false, RHSContainsUndef = false; + SetTy LHSAAPVS, RHSAAPVS; + if (!OnlyRight && !fillSetWithConstantValues(A, IRPosition::value(*LHS), + LHSAAPVS, LHSContainsUndef)) + return indicatePessimisticFixpoint(); + + if (!OnlyLeft && !fillSetWithConstantValues(A, IRPosition::value(*RHS), + RHSAAPVS, RHSContainsUndef)) + return indicatePessimisticFixpoint(); - if (!LHSAA || !RHSAA) { + if (OnlyLeft || OnlyRight) { // select (true/false), lhs, rhs - auto *OpAA = LHSAA ? LHSAA : RHSAA; + auto *OpAA = OnlyLeft ? &LHSAAPVS : &RHSAAPVS; + auto Undef = OnlyLeft ? LHSContainsUndef : RHSContainsUndef; - if (OpAA->undefIsContained()) + if (Undef) unionAssumedWithUndef(); - else - unionAssumed(*OpAA); + else { + for (auto &It : *OpAA) + unionAssumed(It); + } - } else if (LHSAA->undefIsContained() && RHSAA->undefIsContained()) { + } else if (LHSContainsUndef && RHSContainsUndef) { // select i1 *, undef , undef => undef unionAssumedWithUndef(); } else { - unionAssumed(*LHSAA); - unionAssumed(*RHSAA); + for (auto &It : LHSAAPVS) + unionAssumed(It); + for (auto &It : RHSAAPVS) + unionAssumed(It); } return AssumedBefore == getAssumed() ? ChangeStatus::UNCHANGED : ChangeStatus::CHANGED; @@ -9344,26 +9010,16 @@ struct AAPotentialConstantValuesFloating : AAPotentialConstantValuesImpl { uint32_t ResultBitWidth = CI->getDestTy()->getIntegerBitWidth(); Value *Src = CI->getOperand(0); - // Simplify the operand first. - bool UsedAssumedInformation = false; - const auto &SimplifiedSrc = - A.getAssumedSimplified(IRPosition::value(*Src, getCallBaseContext()), - *this, UsedAssumedInformation); - if (!SimplifiedSrc) - return ChangeStatus::UNCHANGED; - if (!SimplifiedSrc.value()) + bool SrcContainsUndef = false; + SetTy SrcPVS; + if (!fillSetWithConstantValues(A, IRPosition::value(*Src), SrcPVS, + SrcContainsUndef)) return indicatePessimisticFixpoint(); - Src = *SimplifiedSrc; - auto &SrcAA = A.getAAFor<AAPotentialConstantValues>( - *this, IRPosition::value(*Src), DepClassTy::REQUIRED); - if (!SrcAA.isValidState()) - return indicatePessimisticFixpoint(); - const SetTy &SrcAAPVS = SrcAA.getAssumedSet(); - if (SrcAA.undefIsContained()) + if (SrcContainsUndef) unionAssumedWithUndef(); else { - for (const APInt &S : SrcAAPVS) { + for (const APInt &S : SrcPVS) { APInt T = calculateCastInst(CI, S, ResultBitWidth); unionAssumed(T); } @@ -9377,53 +9033,26 @@ struct AAPotentialConstantValuesFloating : AAPotentialConstantValuesImpl { Value *LHS = BinOp->getOperand(0); Value *RHS = BinOp->getOperand(1); - // Simplify the operands first. - bool UsedAssumedInformation = false; - const auto &SimplifiedLHS = - A.getAssumedSimplified(IRPosition::value(*LHS, getCallBaseContext()), - *this, UsedAssumedInformation); - if (!SimplifiedLHS) - return ChangeStatus::UNCHANGED; - if (!SimplifiedLHS.value()) + bool LHSContainsUndef = false, RHSContainsUndef = false; + SetTy LHSAAPVS, RHSAAPVS; + if (!fillSetWithConstantValues(A, IRPosition::value(*LHS), LHSAAPVS, + LHSContainsUndef) || + !fillSetWithConstantValues(A, IRPosition::value(*RHS), RHSAAPVS, + RHSContainsUndef)) return indicatePessimisticFixpoint(); - LHS = *SimplifiedLHS; - const auto &SimplifiedRHS = - A.getAssumedSimplified(IRPosition::value(*RHS, getCallBaseContext()), - *this, UsedAssumedInformation); - if (!SimplifiedRHS) - return ChangeStatus::UNCHANGED; - if (!SimplifiedRHS.value()) - return indicatePessimisticFixpoint(); - RHS = *SimplifiedRHS; - - if (!LHS->getType()->isIntegerTy() || !RHS->getType()->isIntegerTy()) - return indicatePessimisticFixpoint(); - - auto &LHSAA = A.getAAFor<AAPotentialConstantValues>( - *this, IRPosition::value(*LHS), DepClassTy::REQUIRED); - if (!LHSAA.isValidState()) - return indicatePessimisticFixpoint(); - - auto &RHSAA = A.getAAFor<AAPotentialConstantValues>( - *this, IRPosition::value(*RHS), DepClassTy::REQUIRED); - if (!RHSAA.isValidState()) - return indicatePessimisticFixpoint(); - - const SetTy &LHSAAPVS = LHSAA.getAssumedSet(); - const SetTy &RHSAAPVS = RHSAA.getAssumedSet(); const APInt Zero = APInt(LHS->getType()->getIntegerBitWidth(), 0); // TODO: make use of undef flag to limit potential values aggressively. - if (LHSAA.undefIsContained() && RHSAA.undefIsContained()) { + if (LHSContainsUndef && RHSContainsUndef) { if (!calculateBinaryOperatorAndTakeUnion(BinOp, Zero, Zero)) return indicatePessimisticFixpoint(); - } else if (LHSAA.undefIsContained()) { + } else if (LHSContainsUndef) { for (const APInt &R : RHSAAPVS) { if (!calculateBinaryOperatorAndTakeUnion(BinOp, Zero, R)) return indicatePessimisticFixpoint(); } - } else if (RHSAA.undefIsContained()) { + } else if (RHSContainsUndef) { for (const APInt &L : LHSAAPVS) { if (!calculateBinaryOperatorAndTakeUnion(BinOp, L, Zero)) return indicatePessimisticFixpoint(); @@ -9440,35 +9069,6 @@ struct AAPotentialConstantValuesFloating : AAPotentialConstantValuesImpl { : ChangeStatus::CHANGED; } - ChangeStatus updateWithPHINode(Attributor &A, PHINode *PHI) { - auto AssumedBefore = getAssumed(); - for (unsigned u = 0, e = PHI->getNumIncomingValues(); u < e; u++) { - Value *IncomingValue = PHI->getIncomingValue(u); - - // Simplify the operand first. - bool UsedAssumedInformation = false; - const auto &SimplifiedIncomingValue = A.getAssumedSimplified( - IRPosition::value(*IncomingValue, getCallBaseContext()), *this, - UsedAssumedInformation); - if (!SimplifiedIncomingValue) - continue; - if (!SimplifiedIncomingValue.value()) - return indicatePessimisticFixpoint(); - IncomingValue = *SimplifiedIncomingValue; - - auto &PotentialValuesAA = A.getAAFor<AAPotentialConstantValues>( - *this, IRPosition::value(*IncomingValue), DepClassTy::REQUIRED); - if (!PotentialValuesAA.isValidState()) - return indicatePessimisticFixpoint(); - if (PotentialValuesAA.undefIsContained()) - unionAssumedWithUndef(); - else - unionAssumed(PotentialValuesAA.getAssumed()); - } - return AssumedBefore == getAssumed() ? ChangeStatus::UNCHANGED - : ChangeStatus::CHANGED; - } - /// See AbstractAttribute::updateImpl(...). ChangeStatus updateImpl(Attributor &A) override { Value &V = getAssociatedValue(); @@ -9486,9 +9086,6 @@ struct AAPotentialConstantValuesFloating : AAPotentialConstantValuesImpl { if (auto *BinOp = dyn_cast<BinaryOperator>(I)) return updateWithBinaryOperator(A, BinOp); - if (auto *PHI = dyn_cast<PHINode>(I)) - return updateWithPHINode(A, PHI); - return indicatePessimisticFixpoint(); } @@ -9642,7 +9239,8 @@ struct AANoUndefImpl : AANoUndef { // A position whose simplified value does not have any value is // considered to be dead. We don't manifest noundef in such positions for // the same reason above. - if (!A.getAssumedSimplified(getIRPosition(), *this, UsedAssumedInformation) + if (!A.getAssumedSimplified(getIRPosition(), *this, UsedAssumedInformation, + AA::Interprocedural) .has_value()) return ChangeStatus::UNCHANGED; return AANoUndef::manifest(A); @@ -9663,11 +9261,19 @@ struct AANoUndefFloating : public AANoUndefImpl { /// See AbstractAttribute::updateImpl(...). ChangeStatus updateImpl(Attributor &A) override { - auto VisitValueCB = [&](Value &V, const Instruction *CtxI, - AANoUndef::StateType &T, bool Stripped) -> bool { + + SmallVector<AA::ValueAndContext> Values; + bool UsedAssumedInformation = false; + if (!A.getAssumedSimplifiedValues(getIRPosition(), *this, Values, + AA::AnyScope, UsedAssumedInformation)) { + Values.push_back({getAssociatedValue(), getCtxI()}); + } + + StateType T; + auto VisitValueCB = [&](Value &V, const Instruction *CtxI) -> bool { const auto &AA = A.getAAFor<AANoUndef>(*this, IRPosition::value(V), DepClassTy::REQUIRED); - if (!Stripped && this == &AA) { + if (this == &AA) { T.indicatePessimisticFixpoint(); } else { const AANoUndef::StateType &S = @@ -9677,12 +9283,9 @@ struct AANoUndefFloating : public AANoUndefImpl { return T.isValidState(); }; - StateType T; - bool UsedAssumedInformation = false; - if (!genericValueTraversal<StateType>(A, getIRPosition(), *this, T, - VisitValueCB, getCtxI(), - UsedAssumedInformation)) - return indicatePessimisticFixpoint(); + for (const auto &VAC : Values) + if (!VisitValueCB(*VAC.getValue(), VAC.getCtxI())) + return indicatePessimisticFixpoint(); return clampStateAndIndicateChange(getState(), T); } @@ -9782,8 +9385,7 @@ struct AACallEdgesCallSite : public AACallEdgesImpl { ChangeStatus updateImpl(Attributor &A) override { ChangeStatus Change = ChangeStatus::UNCHANGED; - auto VisitValue = [&](Value &V, const Instruction *CtxI, bool &HasUnknown, - bool Stripped) -> bool { + auto VisitValue = [&](Value &V, const Instruction *CtxI) -> bool { if (Function *Fn = dyn_cast<Function>(&V)) { addCalledFunction(Fn, Change); } else { @@ -9795,17 +9397,17 @@ struct AACallEdgesCallSite : public AACallEdgesImpl { return true; }; + SmallVector<AA::ValueAndContext> Values; // Process any value that we might call. - auto ProcessCalledOperand = [&](Value *V) { - bool DummyValue = false; + auto ProcessCalledOperand = [&](Value *V, Instruction *CtxI) { bool UsedAssumedInformation = false; - if (!genericValueTraversal<bool>(A, IRPosition::value(*V), *this, - DummyValue, VisitValue, nullptr, - UsedAssumedInformation, false)) { - // If we haven't gone through all values, assume that there are unknown - // callees. - setHasUnknownCallee(true, Change); + Values.clear(); + if (!A.getAssumedSimplifiedValues(IRPosition::value(*V), *this, Values, + AA::AnyScope, UsedAssumedInformation)) { + Values.push_back({*V, CtxI}); } + for (auto &VAC : Values) + VisitValue(*VAC.getValue(), VAC.getCtxI()); }; CallBase *CB = cast<CallBase>(getCtxI()); @@ -9828,13 +9430,13 @@ struct AACallEdgesCallSite : public AACallEdgesImpl { } // The most simple case. - ProcessCalledOperand(CB->getCalledOperand()); + ProcessCalledOperand(CB->getCalledOperand(), CB); // Process callback functions. SmallVector<const Use *, 4u> CallbackUses; AbstractCallSite::getCallbackUses(*CB, CallbackUses); for (const Use *U : CallbackUses) - ProcessCalledOperand(U->get()); + ProcessCalledOperand(U->get(), CB); return Change; } @@ -9920,8 +9522,11 @@ private: for (auto *AAEdges : AAEdgesList) { if (AAEdges->hasUnknownCallee()) { - if (!CanReachUnknownCallee) + if (!CanReachUnknownCallee) { + LLVM_DEBUG(dbgs() + << "[QueryResolver] Edges include unknown callee!\n"); Change = ChangeStatus::CHANGED; + } CanReachUnknownCallee = true; return Change; } @@ -10065,14 +9670,10 @@ public: } bool instructionCanReach(Attributor &A, const Instruction &Inst, - const Function &Fn, - bool UseBackwards) const override { + const Function &Fn) const override { if (!isValidState()) return true; - if (UseBackwards) - return AA::isPotentiallyReachable(A, Inst, Fn, *this, nullptr); - const auto &Reachability = A.getAAFor<AAReachability>( *this, IRPosition::function(*getAssociatedFunction()), DepClassTy::REQUIRED); @@ -10085,8 +9686,11 @@ public: // This is a hack for us to be able to cache queries. auto *NonConstThis = const_cast<AAFunctionReachabilityFunction *>(this); QueryResolver &InstQSet = NonConstThis->InstQueries[&Inst]; - if (!AllKnown) + if (!AllKnown) { + LLVM_DEBUG(dbgs() << "[AAReachability] Not all reachable edges known, " + "may reach unknown callee!\n"); InstQSet.CanReachUnknownCallee = true; + } return InstQSet.isReachable(A, *NonConstThis, CallEdges, Fn); } @@ -10119,8 +9723,11 @@ public: bool AllKnown = getReachableCallEdges(A, *Reachability, *InstPair.first, CallEdges); // Update will return change if we this effects any queries. - if (!AllKnown) + if (!AllKnown) { + LLVM_DEBUG(dbgs() << "[AAReachability] Not all reachable edges " + "known, may reach unknown callee!\n"); InstPair.second.CanReachUnknownCallee = true; + } Change |= InstPair.second.update(A, *this, CallEdges); } } @@ -10133,8 +9740,11 @@ public: WholeFunction.Reachable.size() + WholeFunction.Unreachable.size(); return "FunctionReachability [" + - std::to_string(WholeFunction.Reachable.size()) + "," + - std::to_string(QueryCount) + "]"; + (canReachUnknownCallee() + ? "unknown" + : (std::to_string(WholeFunction.Reachable.size()) + "," + + std::to_string(QueryCount))) + + "]"; } void trackStatistics() const override {} @@ -10156,6 +9766,822 @@ private: }; } // namespace +template <typename AAType> +static Optional<Constant *> +askForAssumedConstant(Attributor &A, const AbstractAttribute &QueryingAA, + const IRPosition &IRP, Type &Ty) { + if (!Ty.isIntegerTy()) + return nullptr; + + // This will also pass the call base context. + const auto &AA = A.getAAFor<AAType>(QueryingAA, IRP, DepClassTy::NONE); + + Optional<Constant *> COpt = AA.getAssumedConstant(A); + + if (!COpt.has_value()) { + A.recordDependence(AA, QueryingAA, DepClassTy::OPTIONAL); + return llvm::None; + } + if (auto *C = COpt.value()) { + A.recordDependence(AA, QueryingAA, DepClassTy::OPTIONAL); + return C; + } + return nullptr; +} + +Value *AAPotentialValues::getSingleValue( + Attributor &A, const AbstractAttribute &AA, const IRPosition &IRP, + SmallVectorImpl<AA::ValueAndContext> &Values) { + Type &Ty = *IRP.getAssociatedType(); + Optional<Value *> V; + for (auto &It : Values) { + V = AA::combineOptionalValuesInAAValueLatice(V, It.getValue(), &Ty); + if (V.has_value() && !V.value()) + break; + } + if (!V.has_value()) + return UndefValue::get(&Ty); + return V.value(); +} + +namespace { +struct AAPotentialValuesImpl : AAPotentialValues { + using StateType = PotentialLLVMValuesState; + + AAPotentialValuesImpl(const IRPosition &IRP, Attributor &A) + : AAPotentialValues(IRP, A) {} + + /// See AbstractAttribute::initialize(..). + void initialize(Attributor &A) override { + if (A.hasSimplificationCallback(getIRPosition())) { + indicatePessimisticFixpoint(); + return; + } + Value *Stripped = getAssociatedValue().stripPointerCasts(); + if (isa<Constant>(Stripped)) { + addValue(A, getState(), *Stripped, getCtxI(), AA::AnyScope, + getAnchorScope()); + indicateOptimisticFixpoint(); + return; + } + AAPotentialValues::initialize(A); + } + + /// See AbstractAttribute::getAsStr(). + const std::string getAsStr() const override { + std::string Str; + llvm::raw_string_ostream OS(Str); + OS << getState(); + return OS.str(); + } + + template <typename AAType> + static Optional<Value *> askOtherAA(Attributor &A, + const AbstractAttribute &AA, + const IRPosition &IRP, Type &Ty) { + if (isa<Constant>(IRP.getAssociatedValue())) + return &IRP.getAssociatedValue(); + Optional<Constant *> C = askForAssumedConstant<AAType>(A, AA, IRP, Ty); + if (!C) + return llvm::None; + if (C.value()) + if (auto *CC = AA::getWithType(**C, Ty)) + return CC; + return nullptr; + } + + void addValue(Attributor &A, StateType &State, Value &V, + const Instruction *CtxI, AA::ValueScope S, + Function *AnchorScope) const { + + IRPosition ValIRP = IRPosition::value(V); + if (auto *CB = dyn_cast_or_null<CallBase>(CtxI)) { + for (auto &U : CB->args()) { + if (U.get() != &V) + continue; + ValIRP = IRPosition::callsite_argument(*CB, CB->getArgOperandNo(&U)); + break; + } + } + + Value *VPtr = &V; + if (ValIRP.getAssociatedType()->isIntegerTy()) { + Type &Ty = *getAssociatedType(); + Optional<Value *> SimpleV = + askOtherAA<AAValueConstantRange>(A, *this, ValIRP, Ty); + if (SimpleV.has_value() && !SimpleV.value()) { + auto &PotentialConstantsAA = A.getAAFor<AAPotentialConstantValues>( + *this, ValIRP, DepClassTy::OPTIONAL); + if (PotentialConstantsAA.isValidState()) { + for (auto &It : PotentialConstantsAA.getAssumedSet()) { + State.unionAssumed({{*ConstantInt::get(&Ty, It), nullptr}, S}); + } + assert(!PotentialConstantsAA.undefIsContained() && + "Undef should be an explicit value!"); + return; + } + } + if (!SimpleV.has_value()) + return; + + if (SimpleV.value()) + VPtr = SimpleV.value(); + } + + if (isa<ConstantInt>(VPtr)) + CtxI = nullptr; + if (!AA::isValidInScope(*VPtr, AnchorScope)) + S = AA::ValueScope(S | AA::Interprocedural); + + State.unionAssumed({{*VPtr, CtxI}, S}); + } + + /// Helper struct to tie a value+context pair together with the scope for + /// which this is the simplified version. + struct ItemInfo { + AA::ValueAndContext I; + AA::ValueScope S; + }; + + bool recurseForValue(Attributor &A, const IRPosition &IRP, AA::ValueScope S) { + SmallMapVector<AA::ValueAndContext, int, 8> ValueScopeMap; + for (auto CS : {AA::Intraprocedural, AA::Interprocedural}) { + if (!(CS & S)) + continue; + + bool UsedAssumedInformation = false; + SmallVector<AA::ValueAndContext> Values; + if (!A.getAssumedSimplifiedValues(IRP, this, Values, CS, + UsedAssumedInformation)) + return false; + + for (auto &It : Values) + ValueScopeMap[It] += CS; + } + for (auto &It : ValueScopeMap) + addValue(A, getState(), *It.first.getValue(), It.first.getCtxI(), + AA::ValueScope(It.second), getAnchorScope()); + + return true; + } + + void giveUpOnIntraprocedural(Attributor &A) { + auto NewS = StateType::getBestState(getState()); + for (auto &It : getAssumedSet()) { + if (It.second == AA::Intraprocedural) + continue; + addValue(A, NewS, *It.first.getValue(), It.first.getCtxI(), + AA::Interprocedural, getAnchorScope()); + } + assert(!undefIsContained() && "Undef should be an explicit value!"); + addValue(A, NewS, getAssociatedValue(), getCtxI(), AA::Intraprocedural, + getAnchorScope()); + getState() = NewS; + } + + /// See AbstractState::indicatePessimisticFixpoint(...). + ChangeStatus indicatePessimisticFixpoint() override { + getState() = StateType::getBestState(getState()); + getState().unionAssumed({{getAssociatedValue(), getCtxI()}, AA::AnyScope}); + AAPotentialValues::indicateOptimisticFixpoint(); + return ChangeStatus::CHANGED; + } + + /// See AbstractAttribute::updateImpl(...). + ChangeStatus updateImpl(Attributor &A) override { + return indicatePessimisticFixpoint(); + } + + /// See AbstractAttribute::manifest(...). + ChangeStatus manifest(Attributor &A) override { + SmallVector<AA::ValueAndContext> Values; + for (AA::ValueScope S : {AA::Interprocedural, AA::Intraprocedural}) { + Values.clear(); + if (!getAssumedSimplifiedValues(A, Values, S)) + continue; + Value &OldV = getAssociatedValue(); + if (isa<UndefValue>(OldV)) + continue; + Value *NewV = getSingleValue(A, *this, getIRPosition(), Values); + if (!NewV || NewV == &OldV) + continue; + if (getCtxI() && + !AA::isValidAtPosition({*NewV, *getCtxI()}, A.getInfoCache())) + continue; + if (A.changeAfterManifest(getIRPosition(), *NewV)) + return ChangeStatus::CHANGED; + } + return ChangeStatus::UNCHANGED; + } + + bool getAssumedSimplifiedValues(Attributor &A, + SmallVectorImpl<AA::ValueAndContext> &Values, + AA::ValueScope S) const override { + if (!isValidState()) + return false; + for (auto &It : getAssumedSet()) + if (It.second & S) + Values.push_back(It.first); + assert(!undefIsContained() && "Undef should be an explicit value!"); + return true; + } +}; + +struct AAPotentialValuesFloating : AAPotentialValuesImpl { + AAPotentialValuesFloating(const IRPosition &IRP, Attributor &A) + : AAPotentialValuesImpl(IRP, A) {} + + /// See AbstractAttribute::updateImpl(...). + ChangeStatus updateImpl(Attributor &A) override { + auto AssumedBefore = getAssumed(); + + genericValueTraversal(A); + + return (AssumedBefore == getAssumed()) ? ChangeStatus::UNCHANGED + : ChangeStatus::CHANGED; + } + + /// Helper struct to remember which AAIsDead instances we actually used. + struct LivenessInfo { + const AAIsDead *LivenessAA = nullptr; + bool AnyDead = false; + }; + + /// Check if \p Cmp is a comparison we can simplify. + /// + /// We handle multiple cases, one in which at least one operand is an + /// (assumed) nullptr. If so, try to simplify it using AANonNull on the other + /// operand. Return true if successful, in that case Worklist will be updated. + bool handleCmp(Attributor &A, CmpInst &Cmp, ItemInfo II, + SmallVectorImpl<ItemInfo> &Worklist) { + Value *LHS = Cmp.getOperand(0); + Value *RHS = Cmp.getOperand(1); + + // Simplify the operands first. + bool UsedAssumedInformation = false; + const auto &SimplifiedLHS = A.getAssumedSimplified( + IRPosition::value(*LHS, getCallBaseContext()), *this, + UsedAssumedInformation, AA::Intraprocedural); + if (!SimplifiedLHS.has_value()) + return true; + if (!SimplifiedLHS.value()) + return false; + LHS = *SimplifiedLHS; + + const auto &SimplifiedRHS = A.getAssumedSimplified( + IRPosition::value(*RHS, getCallBaseContext()), *this, + UsedAssumedInformation, AA::Intraprocedural); + if (!SimplifiedRHS.has_value()) + return true; + if (!SimplifiedRHS.value()) + return false; + RHS = *SimplifiedRHS; + + LLVMContext &Ctx = Cmp.getContext(); + // Handle the trivial case first in which we don't even need to think about + // null or non-null. + if (LHS == RHS && (Cmp.isTrueWhenEqual() || Cmp.isFalseWhenEqual())) { + Constant *NewV = + ConstantInt::get(Type::getInt1Ty(Ctx), Cmp.isTrueWhenEqual()); + addValue(A, getState(), *NewV, /* CtxI */ nullptr, II.S, + getAnchorScope()); + return true; + } + + // From now on we only handle equalities (==, !=). + ICmpInst *ICmp = dyn_cast<ICmpInst>(&Cmp); + if (!ICmp || !ICmp->isEquality()) + return false; + + bool LHSIsNull = isa<ConstantPointerNull>(LHS); + bool RHSIsNull = isa<ConstantPointerNull>(RHS); + if (!LHSIsNull && !RHSIsNull) + return false; + + // Left is the nullptr ==/!= non-nullptr case. We'll use AANonNull on the + // non-nullptr operand and if we assume it's non-null we can conclude the + // result of the comparison. + assert((LHSIsNull || RHSIsNull) && + "Expected nullptr versus non-nullptr comparison at this point"); + + // The index is the operand that we assume is not null. + unsigned PtrIdx = LHSIsNull; + auto &PtrNonNullAA = A.getAAFor<AANonNull>( + *this, IRPosition::value(*ICmp->getOperand(PtrIdx)), + DepClassTy::REQUIRED); + if (!PtrNonNullAA.isAssumedNonNull()) + return false; + + // The new value depends on the predicate, true for != and false for ==. + Constant *NewV = ConstantInt::get(Type::getInt1Ty(Ctx), + ICmp->getPredicate() == CmpInst::ICMP_NE); + addValue(A, getState(), *NewV, /* CtxI */ nullptr, II.S, getAnchorScope()); + return true; + } + + bool handleSelectInst(Attributor &A, SelectInst &SI, ItemInfo II, + SmallVectorImpl<ItemInfo> &Worklist) { + const Instruction *CtxI = II.I.getCtxI(); + bool UsedAssumedInformation = false; + + Optional<Constant *> C = + A.getAssumedConstant(*SI.getCondition(), *this, UsedAssumedInformation); + bool NoValueYet = !C.has_value(); + if (NoValueYet || isa_and_nonnull<UndefValue>(*C)) + return true; + if (auto *CI = dyn_cast_or_null<ConstantInt>(*C)) { + if (CI->isZero()) + Worklist.push_back({{*SI.getFalseValue(), CtxI}, II.S}); + else + Worklist.push_back({{*SI.getTrueValue(), CtxI}, II.S}); + } else { + // We could not simplify the condition, assume both values. + Worklist.push_back({{*SI.getTrueValue(), CtxI}, II.S}); + Worklist.push_back({{*SI.getFalseValue(), CtxI}, II.S}); + } + return true; + } + + bool handleLoadInst(Attributor &A, LoadInst &LI, ItemInfo II, + SmallVectorImpl<ItemInfo> &Worklist) { + SmallSetVector<Value *, 4> PotentialCopies; + SmallSetVector<Instruction *, 4> PotentialValueOrigins; + bool UsedAssumedInformation = false; + if (!AA::getPotentiallyLoadedValues(A, LI, PotentialCopies, + PotentialValueOrigins, *this, + UsedAssumedInformation, + /* OnlyExact */ true)) { + LLVM_DEBUG(dbgs() << "[AAPotentialValues] Failed to get potentially " + "loaded values for load instruction " + << LI << "\n"); + return false; + } + + // Do not simplify loads that are only used in llvm.assume if we cannot also + // remove all stores that may feed into the load. The reason is that the + // assume is probably worth something as long as the stores are around. + InformationCache &InfoCache = A.getInfoCache(); + if (InfoCache.isOnlyUsedByAssume(LI)) { + if (!llvm::all_of(PotentialValueOrigins, [&](Instruction *I) { + if (!I) + return true; + if (auto *SI = dyn_cast<StoreInst>(I)) + return A.isAssumedDead(SI->getOperandUse(0), this, + /* LivenessAA */ nullptr, + UsedAssumedInformation, + /* CheckBBLivenessOnly */ false); + return A.isAssumedDead(*I, this, /* LivenessAA */ nullptr, + UsedAssumedInformation, + /* CheckBBLivenessOnly */ false); + })) { + LLVM_DEBUG(dbgs() << "[AAPotentialValues] Load is onl used by assumes " + "and we cannot delete all the stores: " + << LI << "\n"); + return false; + } + } + + // Values have to be dynamically unique or we loose the fact that a + // single llvm::Value might represent two runtime values (e.g., + // stack locations in different recursive calls). + const Instruction *CtxI = II.I.getCtxI(); + bool ScopeIsLocal = (II.S & AA::Intraprocedural); + bool AllLocal = ScopeIsLocal; + bool DynamicallyUnique = llvm::all_of(PotentialCopies, [&](Value *PC) { + AllLocal &= AA::isValidInScope(*PC, getAnchorScope()); + return AA::isDynamicallyUnique(A, *this, *PC); + }); + if (!DynamicallyUnique) { + LLVM_DEBUG(dbgs() << "[AAPotentialValues] Not all potentially loaded " + "values are dynamically unique: " + << LI << "\n"); + return false; + } + + for (auto *PotentialCopy : PotentialCopies) { + if (AllLocal) { + Worklist.push_back({{*PotentialCopy, CtxI}, II.S}); + } else { + Worklist.push_back({{*PotentialCopy, CtxI}, AA::Interprocedural}); + } + } + if (!AllLocal && ScopeIsLocal) + addValue(A, getState(), LI, CtxI, AA::Intraprocedural, getAnchorScope()); + return true; + } + + bool handlePHINode( + Attributor &A, PHINode &PHI, ItemInfo II, + SmallVectorImpl<ItemInfo> &Worklist, + SmallMapVector<const Function *, LivenessInfo, 4> &LivenessAAs) { + auto GetLivenessInfo = [&](const Function &F) -> LivenessInfo & { + LivenessInfo &LI = LivenessAAs[&F]; + if (!LI.LivenessAA) + LI.LivenessAA = &A.getAAFor<AAIsDead>(*this, IRPosition::function(F), + DepClassTy::NONE); + return LI; + }; + + LivenessInfo &LI = GetLivenessInfo(*PHI.getFunction()); + for (unsigned u = 0, e = PHI.getNumIncomingValues(); u < e; u++) { + BasicBlock *IncomingBB = PHI.getIncomingBlock(u); + if (LI.LivenessAA->isEdgeDead(IncomingBB, PHI.getParent())) { + LI.AnyDead = true; + continue; + } + Worklist.push_back( + {{*PHI.getIncomingValue(u), IncomingBB->getTerminator()}, II.S}); + } + return true; + } + + /// Use the generic, non-optimistic InstSimplfy functionality if we managed to + /// simplify any operand of the instruction \p I. Return true if successful, + /// in that case Worklist will be updated. + bool handleGenericInst(Attributor &A, Instruction &I, ItemInfo II, + SmallVectorImpl<ItemInfo> &Worklist) { + bool SomeSimplified = false; + bool UsedAssumedInformation = false; + + SmallVector<Value *, 8> NewOps(I.getNumOperands()); + int Idx = 0; + for (Value *Op : I.operands()) { + const auto &SimplifiedOp = A.getAssumedSimplified( + IRPosition::value(*Op, getCallBaseContext()), *this, + UsedAssumedInformation, AA::Intraprocedural); + // If we are not sure about any operand we are not sure about the entire + // instruction, we'll wait. + if (!SimplifiedOp.has_value()) + return true; + + if (SimplifiedOp.value()) + NewOps[Idx] = SimplifiedOp.value(); + else + NewOps[Idx] = Op; + + SomeSimplified |= (NewOps[Idx] != Op); + ++Idx; + } + + // We won't bother with the InstSimplify interface if we didn't simplify any + // operand ourselves. + if (!SomeSimplified) + return false; + + InformationCache &InfoCache = A.getInfoCache(); + Function *F = I.getFunction(); + const auto *DT = + InfoCache.getAnalysisResultForFunction<DominatorTreeAnalysis>(*F); + const auto *TLI = A.getInfoCache().getTargetLibraryInfoForFunction(*F); + auto *AC = InfoCache.getAnalysisResultForFunction<AssumptionAnalysis>(*F); + OptimizationRemarkEmitter *ORE = nullptr; + + const DataLayout &DL = I.getModule()->getDataLayout(); + SimplifyQuery Q(DL, TLI, DT, AC, &I); + Value *NewV = simplifyInstructionWithOperands(&I, NewOps, Q, ORE); + if (!NewV || NewV == &I) + return false; + + LLVM_DEBUG(dbgs() << "Generic inst " << I << " assumed simplified to " + << *NewV << "\n"); + Worklist.push_back({{*NewV, II.I.getCtxI()}, II.S}); + return true; + } + + bool simplifyInstruction( + Attributor &A, Instruction &I, ItemInfo II, + SmallVectorImpl<ItemInfo> &Worklist, + SmallMapVector<const Function *, LivenessInfo, 4> &LivenessAAs) { + if (auto *CI = dyn_cast<CmpInst>(&I)) + if (handleCmp(A, *CI, II, Worklist)) + return true; + + switch (I.getOpcode()) { + case Instruction::Select: + return handleSelectInst(A, cast<SelectInst>(I), II, Worklist); + case Instruction::PHI: + return handlePHINode(A, cast<PHINode>(I), II, Worklist, LivenessAAs); + case Instruction::Load: + return handleLoadInst(A, cast<LoadInst>(I), II, Worklist); + default: + return handleGenericInst(A, I, II, Worklist); + }; + return false; + } + + void genericValueTraversal(Attributor &A) { + SmallMapVector<const Function *, LivenessInfo, 4> LivenessAAs; + + Value *InitialV = &getAssociatedValue(); + SmallSet<AA::ValueAndContext, 16> Visited; + SmallVector<ItemInfo, 16> Worklist; + Worklist.push_back({{*InitialV, getCtxI()}, AA::AnyScope}); + + int Iteration = 0; + do { + ItemInfo II = Worklist.pop_back_val(); + Value *V = II.I.getValue(); + assert(V); + const Instruction *CtxI = II.I.getCtxI(); + AA::ValueScope S = II.S; + + // Check if we should process the current value. To prevent endless + // recursion keep a record of the values we followed! + if (!Visited.insert(II.I).second) + continue; + + // Make sure we limit the compile time for complex expressions. + if (Iteration++ >= MaxPotentialValuesIterations) { + LLVM_DEBUG(dbgs() << "Generic value traversal reached iteration limit: " + << Iteration << "!\n"); + addValue(A, getState(), *V, CtxI, S, getAnchorScope()); + continue; + } + + // Explicitly look through calls with a "returned" attribute if we do + // not have a pointer as stripPointerCasts only works on them. + Value *NewV = nullptr; + if (V->getType()->isPointerTy()) { + NewV = AA::getWithType(*V->stripPointerCasts(), *V->getType()); + } else { + auto *CB = dyn_cast<CallBase>(V); + if (CB && CB->getCalledFunction()) { + for (Argument &Arg : CB->getCalledFunction()->args()) + if (Arg.hasReturnedAttr()) { + NewV = CB->getArgOperand(Arg.getArgNo()); + break; + } + } + } + if (NewV && NewV != V) { + Worklist.push_back({{*NewV, CtxI}, S}); + continue; + } + + if (auto *I = dyn_cast<Instruction>(V)) { + if (simplifyInstruction(A, *I, II, Worklist, LivenessAAs)) + continue; + } + + if (V != InitialV || isa<Argument>(V)) + if (recurseForValue(A, IRPosition::value(*V), II.S)) + continue; + + // If we haven't stripped anything we give up. + if (V == InitialV && CtxI == getCtxI()) { + indicatePessimisticFixpoint(); + return; + } + + addValue(A, getState(), *V, CtxI, S, getAnchorScope()); + } while (!Worklist.empty()); + + // If we actually used liveness information so we have to record a + // dependence. + for (auto &It : LivenessAAs) + if (It.second.AnyDead) + A.recordDependence(*It.second.LivenessAA, *this, DepClassTy::OPTIONAL); + } + + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override { + STATS_DECLTRACK_FLOATING_ATTR(potential_values) + } +}; + +struct AAPotentialValuesArgument final : AAPotentialValuesImpl { + using Base = AAPotentialValuesImpl; + AAPotentialValuesArgument(const IRPosition &IRP, Attributor &A) + : Base(IRP, A) {} + + /// See AbstractAttribute::initialize(..). + void initialize(Attributor &A) override { + auto &Arg = cast<Argument>(getAssociatedValue()); + if (Arg.hasPointeeInMemoryValueAttr()) + indicatePessimisticFixpoint(); + } + + /// See AbstractAttribute::updateImpl(...). + ChangeStatus updateImpl(Attributor &A) override { + auto AssumedBefore = getAssumed(); + + unsigned CSArgNo = getCallSiteArgNo(); + + bool UsedAssumedInformation = false; + SmallVector<AA::ValueAndContext> Values; + auto CallSitePred = [&](AbstractCallSite ACS) { + const auto CSArgIRP = IRPosition::callsite_argument(ACS, CSArgNo); + if (CSArgIRP.getPositionKind() == IRP_INVALID) + return false; + + if (!A.getAssumedSimplifiedValues(CSArgIRP, this, Values, + AA::Interprocedural, + UsedAssumedInformation)) + return false; + + return isValidState(); + }; + + if (!A.checkForAllCallSites(CallSitePred, *this, + /* RequireAllCallSites */ true, + UsedAssumedInformation)) + return indicatePessimisticFixpoint(); + + Function *Fn = getAssociatedFunction(); + bool AnyNonLocal = false; + for (auto &It : Values) { + if (isa<Constant>(It.getValue())) { + addValue(A, getState(), *It.getValue(), It.getCtxI(), AA::AnyScope, + getAnchorScope()); + continue; + } + if (!AA::isDynamicallyUnique(A, *this, *It.getValue())) + return indicatePessimisticFixpoint(); + + if (auto *Arg = dyn_cast<Argument>(It.getValue())) + if (Arg->getParent() == Fn) { + addValue(A, getState(), *It.getValue(), It.getCtxI(), AA::AnyScope, + getAnchorScope()); + continue; + } + addValue(A, getState(), *It.getValue(), It.getCtxI(), AA::Interprocedural, + getAnchorScope()); + AnyNonLocal = true; + } + if (undefIsContained()) + unionAssumedWithUndef(); + if (AnyNonLocal) + giveUpOnIntraprocedural(A); + + return (AssumedBefore == getAssumed()) ? ChangeStatus::UNCHANGED + : ChangeStatus::CHANGED; + } + + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override { + STATS_DECLTRACK_ARG_ATTR(potential_values) + } +}; + +struct AAPotentialValuesReturned + : AAReturnedFromReturnedValues<AAPotentialValues, AAPotentialValuesImpl> { + using Base = + AAReturnedFromReturnedValues<AAPotentialValues, AAPotentialValuesImpl>; + AAPotentialValuesReturned(const IRPosition &IRP, Attributor &A) + : Base(IRP, A) {} + + /// See AbstractAttribute::initialize(..). + void initialize(Attributor &A) override { + if (A.hasSimplificationCallback(getIRPosition())) + indicatePessimisticFixpoint(); + else + AAPotentialValues::initialize(A); + } + + ChangeStatus manifest(Attributor &A) override { + // We queried AAValueSimplify for the returned values so they will be + // replaced if a simplified form was found. Nothing to do here. + return ChangeStatus::UNCHANGED; + } + + ChangeStatus indicatePessimisticFixpoint() override { + return AAPotentialValues::indicatePessimisticFixpoint(); + } + + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override { + STATS_DECLTRACK_FNRET_ATTR(potential_values) + } +}; + +struct AAPotentialValuesFunction : AAPotentialValuesImpl { + AAPotentialValuesFunction(const IRPosition &IRP, Attributor &A) + : AAPotentialValuesImpl(IRP, A) {} + + /// See AbstractAttribute::updateImpl(...). + ChangeStatus updateImpl(Attributor &A) override { + llvm_unreachable("AAPotentialValues(Function|CallSite)::updateImpl will " + "not be called"); + } + + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override { + STATS_DECLTRACK_FN_ATTR(potential_values) + } +}; + +struct AAPotentialValuesCallSite : AAPotentialValuesFunction { + AAPotentialValuesCallSite(const IRPosition &IRP, Attributor &A) + : AAPotentialValuesFunction(IRP, A) {} + + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override { + STATS_DECLTRACK_CS_ATTR(potential_values) + } +}; + +struct AAPotentialValuesCallSiteReturned : AAPotentialValuesImpl { + AAPotentialValuesCallSiteReturned(const IRPosition &IRP, Attributor &A) + : AAPotentialValuesImpl(IRP, A) {} + + /// See AbstractAttribute::updateImpl(...). + ChangeStatus updateImpl(Attributor &A) override { + auto AssumedBefore = getAssumed(); + + Function *Callee = getAssociatedFunction(); + if (!Callee) + return indicatePessimisticFixpoint(); + + bool UsedAssumedInformation = false; + auto *CB = cast<CallBase>(getCtxI()); + if (CB->isMustTailCall() && + !A.isAssumedDead(IRPosition::inst(*CB), this, nullptr, + UsedAssumedInformation)) + return indicatePessimisticFixpoint(); + + SmallVector<AA::ValueAndContext> Values; + if (!A.getAssumedSimplifiedValues(IRPosition::returned(*Callee), this, + Values, AA::Intraprocedural, + UsedAssumedInformation)) + return indicatePessimisticFixpoint(); + + Function *Caller = CB->getCaller(); + + bool AnyNonLocal = false; + for (auto &It : Values) { + Value *V = It.getValue(); + Optional<Value *> CallerV = A.translateArgumentToCallSiteContent( + V, *CB, *this, UsedAssumedInformation); + if (!CallerV.has_value()) { + // Nothing to do as long as no value was determined. + continue; + } + V = CallerV.value() ? CallerV.value() : V; + if (AA::isDynamicallyUnique(A, *this, *V) && + AA::isValidInScope(*V, Caller)) { + if (CallerV.value()) { + SmallVector<AA::ValueAndContext> ArgValues; + IRPosition IRP = IRPosition::value(*V); + if (auto *Arg = dyn_cast<Argument>(V)) + if (Arg->getParent() == CB->getCalledFunction()) + IRP = IRPosition::callsite_argument(*CB, Arg->getArgNo()); + if (recurseForValue(A, IRP, AA::AnyScope)) + continue; + } + addValue(A, getState(), *V, CB, AA::AnyScope, getAnchorScope()); + } else { + AnyNonLocal = true; + break; + } + } + if (AnyNonLocal) { + Values.clear(); + if (!A.getAssumedSimplifiedValues(IRPosition::returned(*Callee), this, + Values, AA::Interprocedural, + UsedAssumedInformation)) + return indicatePessimisticFixpoint(); + AnyNonLocal = false; + getState() = PotentialLLVMValuesState::getBestState(); + for (auto &It : Values) { + Value *V = It.getValue(); + if (!AA::isDynamicallyUnique(A, *this, *V)) + return indicatePessimisticFixpoint(); + if (AA::isValidInScope(*V, Caller)) { + addValue(A, getState(), *V, CB, AA::AnyScope, getAnchorScope()); + } else { + AnyNonLocal = true; + addValue(A, getState(), *V, CB, AA::Interprocedural, + getAnchorScope()); + } + } + if (AnyNonLocal) + giveUpOnIntraprocedural(A); + } + return (AssumedBefore == getAssumed()) ? ChangeStatus::UNCHANGED + : ChangeStatus::CHANGED; + } + + ChangeStatus indicatePessimisticFixpoint() override { + return AAPotentialValues::indicatePessimisticFixpoint(); + } + + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override { + STATS_DECLTRACK_CSRET_ATTR(potential_values) + } +}; + +struct AAPotentialValuesCallSiteArgument : AAPotentialValuesFloating { + AAPotentialValuesCallSiteArgument(const IRPosition &IRP, Attributor &A) + : AAPotentialValuesFloating(IRP, A) {} + + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override { + STATS_DECLTRACK_CSARG_ATTR(potential_values) + } +}; +} // namespace + /// ---------------------- Assumption Propagation ------------------------------ namespace { struct AAAssumptionInfoImpl : public AAAssumptionInfo { @@ -10323,6 +10749,7 @@ const char AAMemoryBehavior::ID = 0; const char AAMemoryLocation::ID = 0; const char AAValueConstantRange::ID = 0; const char AAPotentialConstantValues::ID = 0; +const char AAPotentialValues::ID = 0; const char AANoUndef::ID = 0; const char AACallEdges::ID = 0; const char AAFunctionReachability::ID = 0; @@ -10441,6 +10868,7 @@ CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAInstanceInfo) CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AANoCapture) CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAValueConstantRange) CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAPotentialConstantValues) +CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAPotentialValues) CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AANoUndef) CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAPointerInfo) diff --git a/llvm/lib/Transforms/IPO/FunctionImport.cpp b/llvm/lib/Transforms/IPO/FunctionImport.cpp index 56e2df14ff38..360ec24a0509 100644 --- a/llvm/lib/Transforms/IPO/FunctionImport.cpp +++ b/llvm/lib/Transforms/IPO/FunctionImport.cpp @@ -1147,6 +1147,14 @@ void llvm::thinLTOInternalizeModule(Module &TheModule, // Declare a callback for the internalize pass that will ask for every // candidate GlobalValue if it can be internalized or not. auto MustPreserveGV = [&](const GlobalValue &GV) -> bool { + // It may be the case that GV is on a chain of an ifunc, its alias and + // subsequent aliases. In this case, the summary for the value is not + // available. + if (isa<GlobalIFunc>(&GV) || + (isa<GlobalAlias>(&GV) && + isa<GlobalIFunc>(cast<GlobalAlias>(&GV)->getAliaseeObject()))) + return true; + // Lookup the linkage recorded in the summaries during global analysis. auto GS = DefinedGlobals.find(GV.getGUID()); if (GS == DefinedGlobals.end()) { @@ -1277,7 +1285,7 @@ Expected<bool> FunctionImporter::importFunctions( } } for (GlobalAlias &GA : SrcModule->aliases()) { - if (!GA.hasName()) + if (!GA.hasName() || isa<GlobalIFunc>(GA.getAliaseeObject())) continue; auto GUID = GA.getGUID(); auto Import = ImportGUIDs.count(GUID); @@ -1413,29 +1421,6 @@ static bool doImportingForModule(Module &M) { return *Result; } -namespace { - -/// Pass that performs cross-module function import provided a summary file. -class FunctionImportLegacyPass : public ModulePass { -public: - /// Pass identification, replacement for typeid - static char ID; - - explicit FunctionImportLegacyPass() : ModulePass(ID) {} - - /// Specify pass name for debug output - StringRef getPassName() const override { return "Function Importing"; } - - bool runOnModule(Module &M) override { - if (skipModule(M)) - return false; - - return doImportingForModule(M); - } -}; - -} // end anonymous namespace - PreservedAnalyses FunctionImportPass::run(Module &M, ModuleAnalysisManager &AM) { if (!doImportingForModule(M)) @@ -1443,15 +1428,3 @@ PreservedAnalyses FunctionImportPass::run(Module &M, return PreservedAnalyses::none(); } - -char FunctionImportLegacyPass::ID = 0; -INITIALIZE_PASS(FunctionImportLegacyPass, "function-import", - "Summary Based Function Import", false, false) - -namespace llvm { - -Pass *createFunctionImportPass() { - return new FunctionImportLegacyPass(); -} - -} // end namespace llvm diff --git a/llvm/lib/Transforms/IPO/GlobalOpt.cpp b/llvm/lib/Transforms/IPO/GlobalOpt.cpp index 1ad6e2b2a1d2..ec26db8bfc0b 100644 --- a/llvm/lib/Transforms/IPO/GlobalOpt.cpp +++ b/llvm/lib/Transforms/IPO/GlobalOpt.cpp @@ -1040,7 +1040,7 @@ static bool tryToOptimizeStoreOfAllocationToGlobal(GlobalVariable *GV, CallInst *CI, const DataLayout &DL, TargetLibraryInfo *TLI) { - if (!isAllocRemovable(CI, TLI)) + if (!isRemovableAlloc(CI, TLI)) // Must be able to remove the call when we get done.. return false; diff --git a/llvm/lib/Transforms/IPO/IPO.cpp b/llvm/lib/Transforms/IPO/IPO.cpp index ec2b80012ed6..dfd434e61d5b 100644 --- a/llvm/lib/Transforms/IPO/IPO.cpp +++ b/llvm/lib/Transforms/IPO/IPO.cpp @@ -44,7 +44,6 @@ void llvm::initializeIPO(PassRegistry &Registry) { initializeLoopExtractorLegacyPassPass(Registry); initializeBlockExtractorLegacyPassPass(Registry); initializeSingleLoopExtractorPass(Registry); - initializeLowerTypeTestsPass(Registry); initializeMergeFunctionsLegacyPassPass(Registry); initializePartialInlinerLegacyPassPass(Registry); initializeAttributorLegacyPassPass(Registry); @@ -60,9 +59,6 @@ void llvm::initializeIPO(PassRegistry &Registry) { initializeStripNonDebugSymbolsPass(Registry); initializeBarrierNoopPass(Registry); initializeEliminateAvailableExternallyLegacyPassPass(Registry); - initializeSampleProfileLoaderLegacyPassPass(Registry); - initializeFunctionImportLegacyPassPass(Registry); - initializeWholeProgramDevirtPass(Registry); } void LLVMInitializeIPO(LLVMPassRegistryRef R) { diff --git a/llvm/lib/Transforms/IPO/Internalize.cpp b/llvm/lib/Transforms/IPO/Internalize.cpp index 5aa5b905f06c..85b1a8303d33 100644 --- a/llvm/lib/Transforms/IPO/Internalize.cpp +++ b/llvm/lib/Transforms/IPO/Internalize.cpp @@ -28,6 +28,7 @@ #include "llvm/Pass.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/GlobPattern.h" #include "llvm/Support/LineIterator.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/raw_ostream.h" @@ -40,13 +41,13 @@ STATISTIC(NumAliases, "Number of aliases internalized"); STATISTIC(NumFunctions, "Number of functions internalized"); STATISTIC(NumGlobals, "Number of global vars internalized"); -// APIFile - A file which contains a list of symbols that should not be marked -// external. +// APIFile - A file which contains a list of symbol glob patterns that should +// not be marked external. static cl::opt<std::string> APIFile("internalize-public-api-file", cl::value_desc("filename"), cl::desc("A file containing list of symbol names to preserve")); -// APIList - A list of symbols that should not be marked internal. +// APIList - A list of symbol glob patterns that should not be marked internal. static cl::list<std::string> APIList("internalize-public-api-list", cl::value_desc("list"), cl::desc("A list of symbol names to preserve"), cl::CommaSeparated); @@ -59,29 +60,44 @@ public: PreserveAPIList() { if (!APIFile.empty()) LoadFile(APIFile); - ExternalNames.insert(APIList.begin(), APIList.end()); + for (StringRef Pattern : APIList) + addGlob(Pattern); } bool operator()(const GlobalValue &GV) { - return ExternalNames.count(GV.getName()); + return llvm::any_of( + ExternalNames, [&](GlobPattern &GP) { return GP.match(GV.getName()); }); } private: // Contains the set of symbols loaded from file - StringSet<> ExternalNames; + SmallVector<GlobPattern> ExternalNames; + + void addGlob(StringRef Pattern) { + auto GlobOrErr = GlobPattern::create(Pattern); + if (!GlobOrErr) { + errs() << "WARNING: when loading pattern: '" + << toString(GlobOrErr.takeError()) << "' ignoring"; + return; + } + ExternalNames.emplace_back(std::move(*GlobOrErr)); + } void LoadFile(StringRef Filename) { // Load the APIFile... - ErrorOr<std::unique_ptr<MemoryBuffer>> Buf = + ErrorOr<std::unique_ptr<MemoryBuffer>> BufOrErr = MemoryBuffer::getFile(Filename); - if (!Buf) { + if (!BufOrErr) { errs() << "WARNING: Internalize couldn't load file '" << Filename << "'! Continuing as if it's empty.\n"; return; // Just continue as if the file were empty } - for (line_iterator I(*Buf->get(), true), E; I != E; ++I) - ExternalNames.insert(*I); + Buf = std::move(*BufOrErr); + for (line_iterator I(*Buf, true), E; I != E; ++I) + addGlob(*I); } + + std::shared_ptr<MemoryBuffer> Buf; }; } // end anonymous namespace diff --git a/llvm/lib/Transforms/IPO/LowerTypeTests.cpp b/llvm/lib/Transforms/IPO/LowerTypeTests.cpp index d5f1d291f41f..6bf25df101fa 100644 --- a/llvm/lib/Transforms/IPO/LowerTypeTests.cpp +++ b/llvm/lib/Transforms/IPO/LowerTypeTests.cpp @@ -528,50 +528,8 @@ public: // arguments. For testing purposes only. static bool runForTesting(Module &M); }; - -struct LowerTypeTests : public ModulePass { - static char ID; - - bool UseCommandLine = false; - - ModuleSummaryIndex *ExportSummary; - const ModuleSummaryIndex *ImportSummary; - bool DropTypeTests; - - LowerTypeTests() : ModulePass(ID), UseCommandLine(true) { - initializeLowerTypeTestsPass(*PassRegistry::getPassRegistry()); - } - - LowerTypeTests(ModuleSummaryIndex *ExportSummary, - const ModuleSummaryIndex *ImportSummary, bool DropTypeTests) - : ModulePass(ID), ExportSummary(ExportSummary), - ImportSummary(ImportSummary), - DropTypeTests(DropTypeTests || ClDropTypeTests) { - initializeLowerTypeTestsPass(*PassRegistry::getPassRegistry()); - } - - bool runOnModule(Module &M) override { - if (UseCommandLine) - return LowerTypeTestsModule::runForTesting(M); - return LowerTypeTestsModule(M, ExportSummary, ImportSummary, DropTypeTests) - .lower(); - } -}; - } // end anonymous namespace -char LowerTypeTests::ID = 0; - -INITIALIZE_PASS(LowerTypeTests, "lowertypetests", "Lower type metadata", false, - false) - -ModulePass * -llvm::createLowerTypeTestsPass(ModuleSummaryIndex *ExportSummary, - const ModuleSummaryIndex *ImportSummary, - bool DropTypeTests) { - return new LowerTypeTests(ExportSummary, ImportSummary, DropTypeTests); -} - /// Build a bit set for TypeId using the object layouts in /// GlobalLayout. BitSetInfo LowerTypeTestsModule::buildBitSet( diff --git a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp index 8e0ca8c6c997..0b42fc151991 100644 --- a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp +++ b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp @@ -4808,7 +4808,7 @@ void OpenMPOpt::registerAAs(bool IsModulePass) { if (auto *LI = dyn_cast<LoadInst>(&I)) { bool UsedAssumedInformation = false; A.getAssumedSimplified(IRPosition::value(*LI), /* AA */ nullptr, - UsedAssumedInformation); + UsedAssumedInformation, AA::Interprocedural); } else if (auto *SI = dyn_cast<StoreInst>(&I)) { A.getOrCreateAAFor<AAIsDead>(IRPosition::value(*SI)); } diff --git a/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp b/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp index 8eef82675e86..f1b6f2bb7de4 100644 --- a/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp +++ b/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp @@ -91,14 +91,6 @@ cl::opt<bool> EnableDFAJumpThreading("enable-dfa-jump-thread", cl::desc("Enable DFA jump threading."), cl::init(false), cl::Hidden); -static cl::opt<bool> - EnablePrepareForThinLTO("prepare-for-thinlto", cl::init(false), cl::Hidden, - cl::desc("Enable preparation for ThinLTO.")); - -static cl::opt<bool> - EnablePerformThinLTO("perform-thinlto", cl::init(false), cl::Hidden, - cl::desc("Enable performing ThinLTO.")); - cl::opt<bool> EnableHotColdSplit("hot-cold-split", cl::desc("Enable hot-cold splitting pass")); @@ -192,15 +184,6 @@ PassManagerBuilder::PassManagerBuilder() { VerifyInput = false; VerifyOutput = false; MergeFunctions = false; - PrepareForLTO = false; - EnablePGOInstrGen = false; - EnablePGOCSInstrGen = false; - EnablePGOCSInstrUse = false; - PGOInstrGen = ""; - PGOInstrUse = ""; - PGOSampleUse = ""; - PrepareForThinLTO = EnablePrepareForThinLTO; - PerformThinLTO = EnablePerformThinLTO; DivergentTarget = false; CallGraphProfile = true; } @@ -390,7 +373,7 @@ void PassManagerBuilder::addFunctionSimplificationPasses( MPM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap, /*AllowSpeculation=*/false)); // Rotate Loop - disable header duplication at -Oz - MPM.add(createLoopRotatePass(SizeLevel == 2 ? 0 : -1, PrepareForLTO)); + MPM.add(createLoopRotatePass(SizeLevel == 2 ? 0 : -1, false)); // TODO: Investigate promotion cap for O1. MPM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap, /*AllowSpeculation=*/true)); @@ -470,10 +453,6 @@ void PassManagerBuilder::addFunctionSimplificationPasses( // Clean up after everything. MPM.add(createInstructionCombiningPass()); addExtensionsToPM(EP_Peephole, MPM); - - if (EnableCHR && OptLevel >= 3 && - (!PGOInstrUse.empty() || !PGOSampleUse.empty() || EnablePGOCSInstrGen)) - MPM.add(createControlHeightReductionLegacyPass()); } /// FIXME: Should LTO cause any differences to this set of passes? @@ -598,15 +577,6 @@ void PassManagerBuilder::populateModulePassManager( legacy::PassManagerBase &MPM) { MPM.add(createAnnotation2MetadataLegacyPass()); - if (!PGOSampleUse.empty()) { - MPM.add(createPruneEHPass()); - // In ThinLTO mode, when flattened profile is used, all the available - // profile information will be annotated in PreLink phase so there is - // no need to load the profile again in PostLink. - if (!(FlattenedProfileUsed && PerformThinLTO)) - MPM.add(createSampleProfileLoaderPass(PGOSampleUse)); - } - // Allow forcing function attributes as a debugging and tuning aid. MPM.add(createForceFunctionAttrsLegacyPass()); @@ -628,26 +598,8 @@ void PassManagerBuilder::populateModulePassManager( else if (GlobalExtensionsNotEmpty() || !Extensions.empty()) MPM.add(createBarrierNoopPass()); - if (PerformThinLTO) { - MPM.add(createLowerTypeTestsPass(nullptr, nullptr, true)); - // Drop available_externally and unreferenced globals. This is necessary - // with ThinLTO in order to avoid leaving undefined references to dead - // globals in the object file. - MPM.add(createEliminateAvailableExternallyPass()); - MPM.add(createGlobalDCEPass()); - } - addExtensionsToPM(EP_EnabledOnOptLevel0, MPM); - if (PrepareForLTO || PrepareForThinLTO) { - MPM.add(createCanonicalizeAliasesPass()); - // Rename anon globals to be able to export them in the summary. - // This has to be done after we add the extensions to the pass manager - // as there could be passes (e.g. Adddress sanitizer) which introduce - // new unnamed globals. - MPM.add(createNameAnonGlobalPass()); - } - MPM.add(createAnnotationRemarksLegacyPass()); return; } @@ -658,25 +610,6 @@ void PassManagerBuilder::populateModulePassManager( addInitialAliasAnalysisPasses(MPM); - // For ThinLTO there are two passes of indirect call promotion. The - // first is during the compile phase when PerformThinLTO=false and - // intra-module indirect call targets are promoted. The second is during - // the ThinLTO backend when PerformThinLTO=true, when we promote imported - // inter-module indirect calls. For that we perform indirect call promotion - // earlier in the pass pipeline, here before globalopt. Otherwise imported - // available_externally functions look unreferenced and are removed. - if (PerformThinLTO) { - MPM.add(createLowerTypeTestsPass(nullptr, nullptr, true)); - } - - // For SamplePGO in ThinLTO compile phase, we do not want to unroll loops - // as it will change the CFG too much to make the 2nd profile annotation - // in backend more difficult. - bool PrepareForThinLTOUsingPGOSampleProfile = - PrepareForThinLTO && !PGOSampleUse.empty(); - if (PrepareForThinLTOUsingPGOSampleProfile) - DisableUnrollLoops = true; - // Infer attributes about declarations if possible. MPM.add(createInferFunctionAttrsLegacyPass()); @@ -744,7 +677,7 @@ void PassManagerBuilder::populateModulePassManager( if (RunPartialInlining) MPM.add(createPartialInliningPass()); - if (OptLevel > 1 && !PrepareForLTO && !PrepareForThinLTO) + if (OptLevel > 1) // Remove avail extern fns and globals definitions if we aren't // compiling an object file for later LTO. For LTO we want to preserve // these so they are eligible for inlining at link-time. Note if they @@ -756,9 +689,6 @@ void PassManagerBuilder::populateModulePassManager( // and saves running remaining passes on the eliminated functions. MPM.add(createEliminateAvailableExternallyPass()); - if (EnableOrderFileInstrumentation) - MPM.add(createInstrOrderFilePass()); - MPM.add(createReversePostOrderFunctionAttrsPass()); // The inliner performs some kind of dead code elimination as it goes, @@ -772,24 +702,6 @@ void PassManagerBuilder::populateModulePassManager( MPM.add(createGlobalDCEPass()); } - // If we are planning to perform ThinLTO later, let's not bloat the code with - // unrolling/vectorization/... now. We'll first run the inliner + CGSCC passes - // during ThinLTO and perform the rest of the optimizations afterward. - if (PrepareForThinLTO) { - // Ensure we perform any last passes, but do so before renaming anonymous - // globals in case the passes add any. - addExtensionsToPM(EP_OptimizerLast, MPM); - MPM.add(createCanonicalizeAliasesPass()); - // Rename anon globals to be able to export them in the summary. - MPM.add(createNameAnonGlobalPass()); - return; - } - - if (PerformThinLTO) - // Optimize globals now when performing ThinLTO, this enables more - // optimizations later. - MPM.add(createGlobalOptimizerPass()); - // Scheduling LoopVersioningLICM when inlining is over, because after that // we may see more accurate aliasing. Reason to run this late is that too // early versioning may prevent further inlining due to increase of code @@ -834,7 +746,7 @@ void PassManagerBuilder::populateModulePassManager( // Re-rotate loops in all our loop nests. These may have fallout out of // rotated form due to GVN or other transformations, and the vectorizer relies // on the rotated form. Disable header duplication at -Oz. - MPM.add(createLoopRotatePass(SizeLevel == 2 ? 0 : -1, PrepareForLTO)); + MPM.add(createLoopRotatePass(SizeLevel == 2 ? 0 : -1, false)); // Distribute loops to allow partial vectorization. I.e. isolate dependences // into separate loop that would otherwise inhibit vectorization. This is @@ -856,7 +768,7 @@ void PassManagerBuilder::populateModulePassManager( // See comment in the new PM for justification of scheduling splitting at // this stage (\ref buildModuleSimplificationPipeline). - if (EnableHotColdSplit && !(PrepareForLTO || PrepareForThinLTO)) + if (EnableHotColdSplit) MPM.add(createHotColdSplittingPass()); if (EnableIROutliner) @@ -865,10 +777,6 @@ void PassManagerBuilder::populateModulePassManager( if (MergeFunctions) MPM.add(createMergeFunctionsPass()); - // Add Module flag "CG Profile" based on Branch Frequency Information. - if (CallGraphProfile) - MPM.add(createCGProfileLegacyPass()); - // LoopSink pass sinks instructions hoisted by LICM, which serves as a // canonicalization pass that enables other optimizations. As a result, // LoopSink pass needs to be a very late IR pass to avoid undoing LICM @@ -889,12 +797,6 @@ void PassManagerBuilder::populateModulePassManager( addExtensionsToPM(EP_OptimizerLast, MPM); - if (PrepareForLTO) { - MPM.add(createCanonicalizeAliasesPass()); - // Rename anon globals to be able to handle them in the summary - MPM.add(createNameAnonGlobalPass()); - } - MPM.add(createAnnotationRemarksLegacyPass()); } diff --git a/llvm/lib/Transforms/IPO/SampleProfile.cpp b/llvm/lib/Transforms/IPO/SampleProfile.cpp index 55fee213cd5f..f76b886e810a 100644 --- a/llvm/lib/Transforms/IPO/SampleProfile.cpp +++ b/llvm/lib/Transforms/IPO/SampleProfile.cpp @@ -546,53 +546,6 @@ private: return AnnotatedPassName.c_str(); } }; - -class SampleProfileLoaderLegacyPass : public ModulePass { -public: - // Class identification, replacement for typeinfo - static char ID; - - SampleProfileLoaderLegacyPass( - StringRef Name = SampleProfileFile, - ThinOrFullLTOPhase LTOPhase = ThinOrFullLTOPhase::None) - : ModulePass(ID), SampleLoader( - Name, SampleProfileRemappingFile, LTOPhase, - [&](Function &F) -> AssumptionCache & { - return ACT->getAssumptionCache(F); - }, - [&](Function &F) -> TargetTransformInfo & { - return TTIWP->getTTI(F); - }, - [&](Function &F) -> TargetLibraryInfo & { - return TLIWP->getTLI(F); - }) { - initializeSampleProfileLoaderLegacyPassPass( - *PassRegistry::getPassRegistry()); - } - - void dump() { SampleLoader.dump(); } - - bool doInitialization(Module &M) override { - return SampleLoader.doInitialization(M); - } - - StringRef getPassName() const override { return "Sample profile pass"; } - bool runOnModule(Module &M) override; - - void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.addRequired<AssumptionCacheTracker>(); - AU.addRequired<TargetTransformInfoWrapperPass>(); - AU.addRequired<TargetLibraryInfoWrapperPass>(); - AU.addRequired<ProfileSummaryInfoWrapperPass>(); - } - -private: - SampleProfileLoader SampleLoader; - AssumptionCacheTracker *ACT = nullptr; - TargetTransformInfoWrapperPass *TTIWP = nullptr; - TargetLibraryInfoWrapperPass *TLIWP = nullptr; -}; - } // end anonymous namespace ErrorOr<uint64_t> SampleProfileLoader::getInstWeight(const Instruction &Inst) { @@ -734,8 +687,8 @@ SampleProfileLoader::findIndirectCallFunctionSamples( auto FSCompare = [](const FunctionSamples *L, const FunctionSamples *R) { assert(L && R && "Expect non-null FunctionSamples"); - if (L->getEntrySamples() != R->getEntrySamples()) - return L->getEntrySamples() > R->getEntrySamples(); + if (L->getHeadSamplesEstimate() != R->getHeadSamplesEstimate()) + return L->getHeadSamplesEstimate() > R->getHeadSamplesEstimate(); return FunctionSamples::getGUID(L->getName()) < FunctionSamples::getGUID(R->getName()); }; @@ -750,7 +703,7 @@ SampleProfileLoader::findIndirectCallFunctionSamples( // as that already includes both inlined callee and non-inlined ones.. Sum = 0; for (const auto *const FS : CalleeSamples) { - Sum += FS->getEntrySamples(); + Sum += FS->getHeadSamplesEstimate(); R.push_back(FS); } llvm::sort(R, FSCompare); @@ -771,7 +724,7 @@ SampleProfileLoader::findIndirectCallFunctionSamples( if (M->empty()) return R; for (const auto &NameFS : *M) { - Sum += NameFS.second.getEntrySamples(); + Sum += NameFS.second.getHeadSamplesEstimate(); R.push_back(&NameFS.second); } llvm::sort(R, FSCompare); @@ -1090,7 +1043,7 @@ void SampleProfileLoader::findExternalInlineCandidate( bool PreInline = UsePreInlinerDecision && CalleeSample->getContext().hasAttribute(ContextShouldBeInlined); - if (!PreInline && CalleeSample->getEntrySamples() < Threshold) + if (!PreInline && CalleeSample->getHeadSamplesEstimate() < Threshold) continue; StringRef Name = CalleeSample->getFuncName(); @@ -1171,7 +1124,8 @@ bool SampleProfileLoader::inlineHotFunctions( assert((!FunctionSamples::UseMD5 || FS->GUIDToFuncNameMap) && "GUIDToFuncNameMap has to be populated"); AllCandidates.push_back(CB); - if (FS->getEntrySamples() > 0 || FunctionSamples::ProfileIsCS) + if (FS->getHeadSamplesEstimate() > 0 || + FunctionSamples::ProfileIsCS) LocalNotInlinedCallSites.try_emplace(CB, FS); if (callsiteIsHot(FS, PSI, ProfAccForSymsInList)) Hot = true; @@ -1211,7 +1165,7 @@ bool SampleProfileLoader::inlineHotFunctions( if (!callsiteIsHot(FS, PSI, ProfAccForSymsInList)) continue; - Candidate = {I, FS, FS->getEntrySamples(), 1.0}; + Candidate = {I, FS, FS->getHeadSamplesEstimate(), 1.0}; if (tryPromoteAndInlineCandidate(F, Candidate, SumOrigin, Sum)) { LocalNotInlinedCallSites.erase(I); LocalChanged = true; @@ -1325,7 +1279,7 @@ bool SampleProfileLoader::getInlineCandidate(InlineCandidate *NewCandidate, Factor = Probe->Factor; uint64_t CallsiteCount = - CalleeSamples ? CalleeSamples->getEntrySamples() * Factor : 0; + CalleeSamples ? CalleeSamples->getHeadSamplesEstimate() * Factor : 0; *NewCandidate = {CB, CalleeSamples, CallsiteCount, Factor}; return true; } @@ -1481,7 +1435,7 @@ bool SampleProfileLoader::inlineHotFunctionsWithPriority( continue; } uint64_t EntryCountDistributed = - FS->getEntrySamples() * Candidate.CallsiteDistribution; + FS->getHeadSamplesEstimate() * Candidate.CallsiteDistribution; // In addition to regular inline cost check, we also need to make sure // ICP isn't introducing excessive speculative checks even if individual // target looks beneficial to promote and inline. That means we should @@ -1568,7 +1522,7 @@ void SampleProfileLoader::promoteMergeNotInlinedContextSamples( ++NumCSNotInlined; const FunctionSamples *FS = Pair.getSecond(); - if (FS->getTotalSamples() == 0 && FS->getEntrySamples() == 0) { + if (FS->getTotalSamples() == 0 && FS->getHeadSamplesEstimate() == 0) { continue; } @@ -1586,7 +1540,7 @@ void SampleProfileLoader::promoteMergeNotInlinedContextSamples( // Use entry samples as head samples during the merge, as inlinees // don't have head samples. const_cast<FunctionSamples *>(FS)->addHeadSamples( - FS->getEntrySamples()); + FS->getHeadSamplesEstimate()); // Note that we have to do the merge right after processing function. // This allows OutlineFS's profile to be used for annotation during @@ -1599,7 +1553,7 @@ void SampleProfileLoader::promoteMergeNotInlinedContextSamples( } else { auto pair = notInlinedCallInfo.try_emplace(Callee, NotInlinedProfileInfo{0}); - pair.first->second.entryCount += FS->getEntrySamples(); + pair.first->second.entryCount += FS->getHeadSamplesEstimate(); } } } @@ -1663,7 +1617,7 @@ void SampleProfileLoader::generateMDProfMetadata(Function &F) { if (const FunctionSamplesMap *M = FS->findFunctionSamplesMapAt(CallSite)) { for (const auto &NameFS : *M) - Sum += NameFS.second.getEntrySamples(); + Sum += NameFS.second.getHeadSamplesEstimate(); } } if (Sum) @@ -1825,17 +1779,6 @@ bool SampleProfileLoader::emitAnnotations(Function &F) { return Changed; } -char SampleProfileLoaderLegacyPass::ID = 0; - -INITIALIZE_PASS_BEGIN(SampleProfileLoaderLegacyPass, "sample-profile", - "Sample Profile loader", false, false) -INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) -INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) -INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) -INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass) -INITIALIZE_PASS_END(SampleProfileLoaderLegacyPass, "sample-profile", - "Sample Profile loader", false, false) - std::unique_ptr<ProfiledCallGraph> SampleProfileLoader::buildProfiledCallGraph(CallGraph &CG) { std::unique_ptr<ProfiledCallGraph> ProfiledCG; @@ -2073,14 +2016,6 @@ bool SampleProfileLoader::doInitialization(Module &M, return true; } -ModulePass *llvm::createSampleProfileLoaderPass() { - return new SampleProfileLoaderLegacyPass(); -} - -ModulePass *llvm::createSampleProfileLoaderPass(StringRef Name) { - return new SampleProfileLoaderLegacyPass(Name); -} - bool SampleProfileLoader::runOnModule(Module &M, ModuleAnalysisManager *AM, ProfileSummaryInfo *_PSI, CallGraph *CG) { GUIDToFuncNameMapper Mapper(M, *Reader, GUIDToFuncNameMap); @@ -2141,15 +2076,6 @@ bool SampleProfileLoader::runOnModule(Module &M, ModuleAnalysisManager *AM, return retval; } -bool SampleProfileLoaderLegacyPass::runOnModule(Module &M) { - ACT = &getAnalysis<AssumptionCacheTracker>(); - TTIWP = &getAnalysis<TargetTransformInfoWrapperPass>(); - TLIWP = &getAnalysis<TargetLibraryInfoWrapperPass>(); - ProfileSummaryInfo *PSI = - &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI(); - return SampleLoader.runOnModule(M, nullptr, PSI, nullptr); -} - bool SampleProfileLoader::runOnFunction(Function &F, ModuleAnalysisManager *AM) { LLVM_DEBUG(dbgs() << "\n\nProcessing Function " << F.getName() << "\n"); DILocation2SampleMap.clear(); diff --git a/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp b/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp index 898a213d0849..ad00c116ce0a 100644 --- a/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp +++ b/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp @@ -747,78 +747,8 @@ struct DevirtIndex { void run(); }; - -struct WholeProgramDevirt : public ModulePass { - static char ID; - - bool UseCommandLine = false; - - ModuleSummaryIndex *ExportSummary = nullptr; - const ModuleSummaryIndex *ImportSummary = nullptr; - - WholeProgramDevirt() : ModulePass(ID), UseCommandLine(true) { - initializeWholeProgramDevirtPass(*PassRegistry::getPassRegistry()); - } - - WholeProgramDevirt(ModuleSummaryIndex *ExportSummary, - const ModuleSummaryIndex *ImportSummary) - : ModulePass(ID), ExportSummary(ExportSummary), - ImportSummary(ImportSummary) { - initializeWholeProgramDevirtPass(*PassRegistry::getPassRegistry()); - } - - bool runOnModule(Module &M) override { - if (skipModule(M)) - return false; - - // In the new pass manager, we can request the optimization - // remark emitter pass on a per-function-basis, which the - // OREGetter will do for us. - // In the old pass manager, this is harder, so we just build - // an optimization remark emitter on the fly, when we need it. - std::unique_ptr<OptimizationRemarkEmitter> ORE; - auto OREGetter = [&](Function *F) -> OptimizationRemarkEmitter & { - ORE = std::make_unique<OptimizationRemarkEmitter>(F); - return *ORE; - }; - - auto LookupDomTree = [this](Function &F) -> DominatorTree & { - return this->getAnalysis<DominatorTreeWrapperPass>(F).getDomTree(); - }; - - if (UseCommandLine) - return DevirtModule::runForTesting(M, LegacyAARGetter(*this), OREGetter, - LookupDomTree); - - return DevirtModule(M, LegacyAARGetter(*this), OREGetter, LookupDomTree, - ExportSummary, ImportSummary) - .run(); - } - - void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.addRequired<AssumptionCacheTracker>(); - AU.addRequired<TargetLibraryInfoWrapperPass>(); - AU.addRequired<DominatorTreeWrapperPass>(); - } -}; - } // end anonymous namespace -INITIALIZE_PASS_BEGIN(WholeProgramDevirt, "wholeprogramdevirt", - "Whole program devirtualization", false, false) -INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) -INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) -INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) -INITIALIZE_PASS_END(WholeProgramDevirt, "wholeprogramdevirt", - "Whole program devirtualization", false, false) -char WholeProgramDevirt::ID = 0; - -ModulePass * -llvm::createWholeProgramDevirtPass(ModuleSummaryIndex *ExportSummary, - const ModuleSummaryIndex *ImportSummary) { - return new WholeProgramDevirt(ExportSummary, ImportSummary); -} - PreservedAnalyses WholeProgramDevirtPass::run(Module &M, ModuleAnalysisManager &AM) { auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager(); diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp index 535a7736454c..4a459ec6c550 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp @@ -1966,12 +1966,14 @@ Instruction *InstCombinerImpl::visitSub(BinaryOperator &I) { return BinaryOperator::CreateAdd(X, ConstantExpr::getSub(C, C2)); } - // If there's no chance any bit will need to borrow from an adjacent bit: - // sub C, X --> xor X, C const APInt *Op0C; - if (match(Op0, m_APInt(Op0C)) && - (~computeKnownBits(Op1, 0, &I).Zero).isSubsetOf(*Op0C)) - return BinaryOperator::CreateXor(Op1, Op0); + if (match(Op0, m_APInt(Op0C)) && Op0C->isMask()) { + // Turn this into a xor if LHS is 2^n-1 and the remaining bits are known + // zero. + KnownBits RHSKnown = computeKnownBits(Op1, 0, &I); + if ((*Op0C | RHSKnown.Zero).isAllOnes()) + return BinaryOperator::CreateXor(Op1, Op0); + } { Value *Y; diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp index a8f2cd79830a..8253c575bc37 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp @@ -2664,8 +2664,8 @@ Value *InstCombinerImpl::foldAndOrOfICmps(ICmpInst *LHS, ICmpInst *RHS, // Inverted form (example): // (icmp slt (X | Y), 0) & (icmp sgt (X & Y), -1) -> (icmp slt (X ^ Y), 0) bool TrueIfSignedL, TrueIfSignedR; - if (InstCombiner::isSignBitCheck(PredL, *LHSC, TrueIfSignedL) && - InstCombiner::isSignBitCheck(PredR, *RHSC, TrueIfSignedR) && + if (isSignBitCheck(PredL, *LHSC, TrueIfSignedL) && + isSignBitCheck(PredR, *RHSC, TrueIfSignedR) && (RHS->hasOneUse() || LHS->hasOneUse())) { Value *X, *Y; if (IsAnd) { @@ -3202,25 +3202,38 @@ Value *InstCombinerImpl::foldXorOfICmps(ICmpInst *LHS, ICmpInst *RHS, // TODO: This can be generalized to compares of non-signbits using // decomposeBitTestICmp(). It could be enhanced more by using (something like) // foldLogOpOfMaskedICmps(). - if ((LHS->hasOneUse() || RHS->hasOneUse()) && + const APInt *LC, *RC; + if (match(LHS1, m_APInt(LC)) && match(RHS1, m_APInt(RC)) && LHS0->getType() == RHS0->getType() && - LHS0->getType()->isIntOrIntVectorTy()) { + LHS0->getType()->isIntOrIntVectorTy() && + (LHS->hasOneUse() || RHS->hasOneUse())) { + // Convert xor of signbit tests to signbit test of xor'd values: // (X > -1) ^ (Y > -1) --> (X ^ Y) < 0 // (X < 0) ^ (Y < 0) --> (X ^ Y) < 0 - if ((PredL == CmpInst::ICMP_SGT && match(LHS1, m_AllOnes()) && - PredR == CmpInst::ICMP_SGT && match(RHS1, m_AllOnes())) || - (PredL == CmpInst::ICMP_SLT && match(LHS1, m_Zero()) && - PredR == CmpInst::ICMP_SLT && match(RHS1, m_Zero()))) - return Builder.CreateIsNeg(Builder.CreateXor(LHS0, RHS0)); - // (X > -1) ^ (Y < 0) --> (X ^ Y) > -1 // (X < 0) ^ (Y > -1) --> (X ^ Y) > -1 - if ((PredL == CmpInst::ICMP_SGT && match(LHS1, m_AllOnes()) && - PredR == CmpInst::ICMP_SLT && match(RHS1, m_Zero())) || - (PredL == CmpInst::ICMP_SLT && match(LHS1, m_Zero()) && - PredR == CmpInst::ICMP_SGT && match(RHS1, m_AllOnes()))) - return Builder.CreateIsNotNeg(Builder.CreateXor(LHS0, RHS0)); - + bool TrueIfSignedL, TrueIfSignedR; + if (isSignBitCheck(PredL, *LC, TrueIfSignedL) && + isSignBitCheck(PredR, *RC, TrueIfSignedR)) { + Value *XorLR = Builder.CreateXor(LHS0, RHS0); + return TrueIfSignedL == TrueIfSignedR ? Builder.CreateIsNeg(XorLR) : + Builder.CreateIsNotNeg(XorLR); + } + + // (X > C) ^ (X < C + 2) --> X != C + 1 + // (X < C + 2) ^ (X > C) --> X != C + 1 + // Considering the correctness of this pattern, we should avoid that C is + // non-negative and C + 2 is negative, although it will be matched by other + // patterns. + const APInt *C1, *C2; + if ((PredL == CmpInst::ICMP_SGT && match(LHS1, m_APInt(C1)) && + PredR == CmpInst::ICMP_SLT && match(RHS1, m_APInt(C2))) || + (PredL == CmpInst::ICMP_SLT && match(LHS1, m_APInt(C2)) && + PredR == CmpInst::ICMP_SGT && match(RHS1, m_APInt(C1)))) + if (LHS0 == RHS0 && *C1 + 2 == *C2 && + (C1->isNegative() || C2->isNonNegative())) + return Builder.CreateICmpNE(LHS0, + ConstantInt::get(LHS0->getType(), *C1 + 1)); } // Instead of trying to imitate the folds for and/or, decompose this 'xor' diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp index edfdf70c2b97..bc01d2ef7fe2 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -1140,8 +1140,8 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) { if (Value *V = simplifyCall(&CI, SQ.getWithInstruction(&CI))) return replaceInstUsesWith(CI, V); - if (isFreeCall(&CI, &TLI)) - return visitFree(CI); + if (Value *FreedOp = getFreedOperand(&CI, &TLI)) + return visitFree(CI, FreedOp); // If the caller function (i.e. us, the function that contains this CallInst) // is nounwind, mark the call as nounwind, even if the callee isn't. @@ -1539,8 +1539,7 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) { Type *Ty = II->getType(); unsigned BitWidth = Ty->getScalarSizeInBits(); Constant *ShAmtC; - if (match(II->getArgOperand(2), m_ImmConstant(ShAmtC)) && - !ShAmtC->containsConstantExpression()) { + if (match(II->getArgOperand(2), m_ImmConstant(ShAmtC))) { // Canonicalize a shift amount constant operand to modulo the bit-width. Constant *WidthC = ConstantInt::get(Ty, BitWidth); Constant *ModuloC = @@ -2885,21 +2884,21 @@ bool InstCombinerImpl::annotateAnyAllocSite(CallBase &Call, // of the respective allocator declaration with generic attributes. bool Changed = false; - if (isAllocationFn(&Call, TLI)) { - uint64_t Size; - ObjectSizeOpts Opts; - if (getObjectSize(&Call, Size, DL, TLI, Opts) && Size > 0) { - // TODO: We really should just emit deref_or_null here and then - // let the generic inference code combine that with nonnull. - if (Call.hasRetAttr(Attribute::NonNull)) { - Changed = !Call.hasRetAttr(Attribute::Dereferenceable); - Call.addRetAttr( - Attribute::getWithDereferenceableBytes(Call.getContext(), Size)); - } else { - Changed = !Call.hasRetAttr(Attribute::DereferenceableOrNull); - Call.addRetAttr(Attribute::getWithDereferenceableOrNullBytes( - Call.getContext(), Size)); - } + if (!Call.getType()->isPointerTy()) + return Changed; + + Optional<APInt> Size = getAllocSize(&Call, TLI); + if (Size && *Size != 0) { + // TODO: We really should just emit deref_or_null here and then + // let the generic inference code combine that with nonnull. + if (Call.hasRetAttr(Attribute::NonNull)) { + Changed = !Call.hasRetAttr(Attribute::Dereferenceable); + Call.addRetAttr(Attribute::getWithDereferenceableBytes( + Call.getContext(), Size->getLimitedValue())); + } else { + Changed = !Call.hasRetAttr(Attribute::DereferenceableOrNull); + Call.addRetAttr(Attribute::getWithDereferenceableOrNullBytes( + Call.getContext(), Size->getLimitedValue())); } } @@ -3079,8 +3078,7 @@ Instruction *InstCombinerImpl::visitCallBase(CallBase &Call) { Call, Builder.CreateBitOrPointerCast(ReturnedArg, CallTy)); } - if (isAllocationFn(&Call, &TLI) && - isAllocRemovable(&cast<CallBase>(Call), &TLI)) + if (isRemovableAlloc(&Call, &TLI)) return visitAllocSite(Call); // Handle intrinsics which can be used in both call and invoke context. @@ -3242,15 +3240,16 @@ bool InstCombinerImpl::transformConstExprCastCall(CallBase &Call) { // the call because there is no place to put the cast instruction (without // breaking the critical edge). Bail out in this case. if (!Caller->use_empty()) { - if (InvokeInst *II = dyn_cast<InvokeInst>(Caller)) - for (User *U : II->users()) + BasicBlock *PhisNotSupportedBlock = nullptr; + if (auto *II = dyn_cast<InvokeInst>(Caller)) + PhisNotSupportedBlock = II->getNormalDest(); + if (auto *CB = dyn_cast<CallBrInst>(Caller)) + PhisNotSupportedBlock = CB->getDefaultDest(); + if (PhisNotSupportedBlock) + for (User *U : Caller->users()) if (PHINode *PN = dyn_cast<PHINode>(U)) - if (PN->getParent() == II->getNormalDest() || - PN->getParent() == II->getUnwindDest()) + if (PN->getParent() == PhisNotSupportedBlock) return false; - // FIXME: Be conservative for callbr to avoid a quadratic search. - if (isa<CallBrInst>(Caller)) - return false; } } diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp index 9f6d36b85522..158d2e8289e0 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -2002,9 +2002,12 @@ Instruction *InstCombinerImpl::foldICmpMulConstant(ICmpInst &Cmp, Constant::getNullValue(Mul->getType())); } + if (MulC->isZero() || !(Mul->hasNoSignedWrap() || Mul->hasNoUnsignedWrap())) + return nullptr; + // If the multiply does not wrap, try to divide the compare constant by the // multiplication factor. - if (Cmp.isEquality() && !MulC->isZero()) { + if (Cmp.isEquality()) { // (mul nsw X, MulC) == C --> X == C /s MulC if (Mul->hasNoSignedWrap() && C.srem(*MulC).isZero()) { Constant *NewC = ConstantInt::get(Mul->getType(), C.sdiv(*MulC)); @@ -2017,7 +2020,40 @@ Instruction *InstCombinerImpl::foldICmpMulConstant(ICmpInst &Cmp, } } - return nullptr; + Constant *NewC = nullptr; + + // FIXME: Add assert that Pred is not equal to ICMP_SGE, ICMP_SLE, + // ICMP_UGE, ICMP_ULE. + + if (Mul->hasNoSignedWrap()) { + if (MulC->isNegative()) { + // MININT / -1 --> overflow. + if (C.isMinSignedValue() && MulC->isAllOnes()) + return nullptr; + Pred = ICmpInst::getSwappedPredicate(Pred); + } + if (Pred == ICmpInst::ICMP_SLT || Pred == ICmpInst::ICMP_SGE) + NewC = ConstantInt::get( + Mul->getType(), + APIntOps::RoundingSDiv(C, *MulC, APInt::Rounding::UP)); + if (Pred == ICmpInst::ICMP_SLE || Pred == ICmpInst::ICMP_SGT) + NewC = ConstantInt::get( + Mul->getType(), + APIntOps::RoundingSDiv(C, *MulC, APInt::Rounding::DOWN)); + } + + if (Mul->hasNoUnsignedWrap()) { + if (Pred == ICmpInst::ICMP_ULT || Pred == ICmpInst::ICMP_UGE) + NewC = ConstantInt::get( + Mul->getType(), + APIntOps::RoundingUDiv(C, *MulC, APInt::Rounding::UP)); + if (Pred == ICmpInst::ICMP_ULE || Pred == ICmpInst::ICMP_UGT) + NewC = ConstantInt::get( + Mul->getType(), + APIntOps::RoundingUDiv(C, *MulC, APInt::Rounding::DOWN)); + } + + return NewC ? new ICmpInst(Pred, Mul->getOperand(0), NewC) : nullptr; } /// Fold icmp (shl 1, Y), C. @@ -2235,13 +2271,22 @@ Instruction *InstCombinerImpl::foldICmpShrConstant(ICmpInst &Cmp, bool IsAShr = Shr->getOpcode() == Instruction::AShr; const APInt *ShiftValC; - if (match(Shr->getOperand(0), m_APInt(ShiftValC))) { + if (match(X, m_APInt(ShiftValC))) { if (Cmp.isEquality()) return foldICmpShrConstConst(Cmp, Shr->getOperand(1), C, *ShiftValC); + // (ShiftValC >> Y) >s -1 --> Y != 0 with ShiftValC < 0 + // (ShiftValC >> Y) <s 0 --> Y == 0 with ShiftValC < 0 + bool TrueIfSigned; + if (!IsAShr && ShiftValC->isNegative() && + isSignBitCheck(Pred, C, TrueIfSigned)) + return new ICmpInst(TrueIfSigned ? CmpInst::ICMP_EQ : CmpInst::ICMP_NE, + Shr->getOperand(1), + ConstantInt::getNullValue(X->getType())); + // If the shifted constant is a power-of-2, test the shift amount directly: - // (ShiftValC >> X) >u C --> X <u (LZ(C) - LZ(ShiftValC)) - // (ShiftValC >> X) <u C --> X >=u (LZ(C-1) - LZ(ShiftValC)) + // (ShiftValC >> Y) >u C --> X <u (LZ(C) - LZ(ShiftValC)) + // (ShiftValC >> Y) <u C --> X >=u (LZ(C-1) - LZ(ShiftValC)) if (!IsAShr && ShiftValC->isPowerOf2() && (Pred == CmpInst::ICMP_UGT || Pred == CmpInst::ICMP_ULT)) { bool IsUGT = Pred == CmpInst::ICMP_UGT; @@ -2972,7 +3017,7 @@ Instruction *InstCombinerImpl::foldICmpBitCast(ICmpInst &Cmp) { const APInt *C; bool TrueIfSigned; if (match(Op1, m_APInt(C)) && Bitcast->hasOneUse() && - InstCombiner::isSignBitCheck(Pred, *C, TrueIfSigned)) { + isSignBitCheck(Pred, *C, TrueIfSigned)) { if (match(BCSrcOp, m_FPExt(m_Value(X))) || match(BCSrcOp, m_FPTrunc(m_Value(X)))) { // (bitcast (fpext/fptrunc X)) to iX) < 0 --> (bitcast X to iY) < 0 diff --git a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h index 271154bb3f5a..827b25533513 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h +++ b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h @@ -152,7 +152,7 @@ public: Instruction *visitGEPOfBitcast(BitCastInst *BCI, GetElementPtrInst &GEP); Instruction *visitAllocaInst(AllocaInst &AI); Instruction *visitAllocSite(Instruction &FI); - Instruction *visitFree(CallInst &FI); + Instruction *visitFree(CallInst &FI, Value *FreedOp); Instruction *visitLoadInst(LoadInst &LI); Instruction *visitStoreInst(StoreInst &SI); Instruction *visitAtomicRMWInst(AtomicRMWInst &SI); diff --git a/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp b/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp index f4e2d1239f0f..13c98b935adf 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp @@ -566,6 +566,13 @@ static bool canEvaluateShifted(Value *V, unsigned NumBits, bool IsLeftShift, return false; return true; } + case Instruction::Mul: { + const APInt *MulConst; + // We can fold (shr (mul X, -(1 << C)), C) -> (and (neg X), C`) + return !IsLeftShift && match(I->getOperand(1), m_APInt(MulConst)) && + MulConst->isNegatedPowerOf2() && + MulConst->countTrailingZeros() == NumBits; + } } } @@ -680,6 +687,17 @@ static Value *getShiftedValue(Value *V, unsigned NumBits, bool isLeftShift, isLeftShift, IC, DL)); return PN; } + case Instruction::Mul: { + assert(!isLeftShift && "Unexpected shift direction!"); + auto *Neg = BinaryOperator::CreateNeg(I->getOperand(0)); + IC.InsertNewInstWith(Neg, *I); + unsigned TypeWidth = I->getType()->getScalarSizeInBits(); + APInt Mask = APInt::getLowBitsSet(TypeWidth, TypeWidth - NumBits); + auto *And = BinaryOperator::CreateAnd(Neg, + ConstantInt::get(I->getType(), Mask)); + And->takeName(I); + return IC.InsertNewInstWith(And, *I); + } } } diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp index 75520a0c8d5f..71c763de43b4 100644 --- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -994,6 +994,24 @@ Instruction *InstCombinerImpl::foldBinopOfSextBoolToSelect(BinaryOperator &BO) { return SelectInst::Create(X, TVal, FVal); } +static Constant *constantFoldOperationIntoSelectOperand( + Instruction &I, SelectInst *SI, Value *SO) { + auto *ConstSO = dyn_cast<Constant>(SO); + if (!ConstSO) + return nullptr; + + SmallVector<Constant *> ConstOps; + for (Value *Op : I.operands()) { + if (Op == SI) + ConstOps.push_back(ConstSO); + else if (auto *C = dyn_cast<Constant>(Op)) + ConstOps.push_back(C); + else + llvm_unreachable("Operands should be select or constant"); + } + return ConstantFoldInstOperands(&I, ConstOps, I.getModule()->getDataLayout()); +} + static Value *foldOperationIntoSelectOperand(Instruction &I, Value *SO, InstCombiner::BuilderTy &Builder) { if (auto *Cast = dyn_cast<CastInst>(&I)) @@ -1101,8 +1119,17 @@ Instruction *InstCombinerImpl::FoldOpIntoSelect(Instruction &Op, SelectInst *SI, } } - Value *NewTV = foldOperationIntoSelectOperand(Op, TV, Builder); - Value *NewFV = foldOperationIntoSelectOperand(Op, FV, Builder); + // Make sure that one of the select arms constant folds successfully. + Value *NewTV = constantFoldOperationIntoSelectOperand(Op, SI, TV); + Value *NewFV = constantFoldOperationIntoSelectOperand(Op, SI, FV); + if (!NewTV && !NewFV) + return nullptr; + + // Create an instruction for the arm that did not fold. + if (!NewTV) + NewTV = foldOperationIntoSelectOperand(Op, TV, Builder); + if (!NewFV) + NewFV = foldOperationIntoSelectOperand(Op, FV, Builder); return SelectInst::Create(SI->getCondition(), NewTV, NewFV, "", nullptr, SI); } @@ -2774,13 +2801,14 @@ static bool isAllocSiteRemovable(Instruction *AI, continue; } - if (isFreeCall(I, &TLI) && getAllocationFamily(I, &TLI) == Family) { + if (getFreedOperand(cast<CallBase>(I), &TLI) == PI && + getAllocationFamily(I, &TLI) == Family) { assert(Family); Users.emplace_back(I); continue; } - if (isReallocLikeFn(I, &TLI) && + if (getReallocatedOperand(cast<CallBase>(I), &TLI) == PI && getAllocationFamily(I, &TLI) == Family) { assert(Family); Users.emplace_back(I); @@ -2805,7 +2833,7 @@ static bool isAllocSiteRemovable(Instruction *AI, } Instruction *InstCombinerImpl::visitAllocSite(Instruction &MI) { - assert(isa<AllocaInst>(MI) || isAllocRemovable(&cast<CallBase>(MI), &TLI)); + assert(isa<AllocaInst>(MI) || isRemovableAlloc(&cast<CallBase>(MI), &TLI)); // If we have a malloc call which is only used in any amount of comparisons to // null and free calls, delete the calls and replace the comparisons with true @@ -3007,9 +3035,7 @@ static Instruction *tryToMoveFreeBeforeNullTest(CallInst &FI, return &FI; } -Instruction *InstCombinerImpl::visitFree(CallInst &FI) { - Value *Op = FI.getArgOperand(0); - +Instruction *InstCombinerImpl::visitFree(CallInst &FI, Value *Op) { // free undef -> unreachable. if (isa<UndefValue>(Op)) { // Leave a marker since we can't modify the CFG here. @@ -3024,12 +3050,10 @@ Instruction *InstCombinerImpl::visitFree(CallInst &FI) { // If we had free(realloc(...)) with no intervening uses, then eliminate the // realloc() entirely. - if (CallInst *CI = dyn_cast<CallInst>(Op)) { - if (CI->hasOneUse() && isReallocLikeFn(CI, &TLI)) { - return eraseInstFromFunction( - *replaceInstUsesWith(*CI, CI->getOperand(0))); - } - } + CallInst *CI = dyn_cast<CallInst>(Op); + if (CI && CI->hasOneUse()) + if (Value *ReallocatedOp = getReallocatedOperand(CI, &TLI)) + return eraseInstFromFunction(*replaceInstUsesWith(*CI, ReallocatedOp)); // If we optimize for code size, try to move the call to free before the null // test so that simplify cfg can remove the empty block and dead code diff --git a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp index 4fed4bd18fb1..cf2754b1dd60 100644 --- a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp +++ b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp @@ -108,6 +108,7 @@ static const uint64_t kAArch64_ShadowOffset64 = 1ULL << 36; static const uint64_t kRISCV64_ShadowOffset64 = 0xd55550000; static const uint64_t kFreeBSD_ShadowOffset32 = 1ULL << 30; static const uint64_t kFreeBSD_ShadowOffset64 = 1ULL << 46; +static const uint64_t kFreeBSDAArch64_ShadowOffset64 = 1ULL << 47; static const uint64_t kFreeBSDKasan_ShadowOffset64 = 0xdffff7c000000000; static const uint64_t kNetBSD_ShadowOffset32 = 1ULL << 30; static const uint64_t kNetBSD_ShadowOffset64 = 1ULL << 46; @@ -523,6 +524,8 @@ static ShadowMapping getShadowMapping(const Triple &TargetTriple, int LongSize, Mapping.Offset = kPPC64_ShadowOffset64; else if (IsSystemZ) Mapping.Offset = kSystemZ_ShadowOffset64; + else if (IsFreeBSD && IsAArch64) + Mapping.Offset = kFreeBSDAArch64_ShadowOffset64; else if (IsFreeBSD && !IsMIPS64) { if (IsKasan) Mapping.Offset = kFreeBSDKasan_ShadowOffset64; diff --git a/llvm/lib/Transforms/Instrumentation/CGProfile.cpp b/llvm/lib/Transforms/Instrumentation/CGProfile.cpp index 57c491436b93..27107f46ed92 100644 --- a/llvm/lib/Transforms/Instrumentation/CGProfile.cpp +++ b/llvm/lib/Transforms/Instrumentation/CGProfile.cpp @@ -101,42 +101,6 @@ static bool runCGProfilePass( return addModuleFlags(M, Counts); } -namespace { -struct CGProfileLegacyPass final : public ModulePass { - static char ID; - CGProfileLegacyPass() : ModulePass(ID) { - initializeCGProfileLegacyPassPass(*PassRegistry::getPassRegistry()); - } - - void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.setPreservesCFG(); - AU.addRequired<LazyBlockFrequencyInfoPass>(); - AU.addRequired<TargetTransformInfoWrapperPass>(); - } - - bool runOnModule(Module &M) override { - auto GetBFI = [this](Function &F) -> BlockFrequencyInfo & { - return this->getAnalysis<LazyBlockFrequencyInfoPass>(F).getBFI(); - }; - auto GetTTI = [this](Function &F) -> TargetTransformInfo & { - return this->getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F); - }; - - return runCGProfilePass(M, GetBFI, GetTTI, true); - } -}; - -} // namespace - -char CGProfileLegacyPass::ID = 0; - -INITIALIZE_PASS(CGProfileLegacyPass, "cg-profile", "Call Graph Profile", false, - false) - -ModulePass *llvm::createCGProfileLegacyPass() { - return new CGProfileLegacyPass(); -} - PreservedAnalyses CGProfilePass::run(Module &M, ModuleAnalysisManager &MAM) { FunctionAnalysisManager &FAM = MAM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager(); diff --git a/llvm/lib/Transforms/Instrumentation/ControlHeightReduction.cpp b/llvm/lib/Transforms/Instrumentation/ControlHeightReduction.cpp index e5c0705b916e..adc007dacae4 100644 --- a/llvm/lib/Transforms/Instrumentation/ControlHeightReduction.cpp +++ b/llvm/lib/Transforms/Instrumentation/ControlHeightReduction.cpp @@ -103,47 +103,6 @@ static void parseCHRFilterFiles() { } namespace { -class ControlHeightReductionLegacyPass : public FunctionPass { -public: - static char ID; - - ControlHeightReductionLegacyPass() : FunctionPass(ID) { - initializeControlHeightReductionLegacyPassPass( - *PassRegistry::getPassRegistry()); - parseCHRFilterFiles(); - } - - bool runOnFunction(Function &F) override; - void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.addRequired<BlockFrequencyInfoWrapperPass>(); - AU.addRequired<DominatorTreeWrapperPass>(); - AU.addRequired<ProfileSummaryInfoWrapperPass>(); - AU.addRequired<RegionInfoPass>(); - AU.addPreserved<GlobalsAAWrapperPass>(); - } -}; -} // end anonymous namespace - -char ControlHeightReductionLegacyPass::ID = 0; - -INITIALIZE_PASS_BEGIN(ControlHeightReductionLegacyPass, - "chr", - "Reduce control height in the hot paths", - false, false) -INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass) -INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) -INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass) -INITIALIZE_PASS_DEPENDENCY(RegionInfoPass) -INITIALIZE_PASS_END(ControlHeightReductionLegacyPass, - "chr", - "Reduce control height in the hot paths", - false, false) - -FunctionPass *llvm::createControlHeightReductionLegacyPass() { - return new ControlHeightReductionLegacyPass(); -} - -namespace { struct CHRStats { CHRStats() = default; @@ -2083,18 +2042,6 @@ bool CHR::run() { return Changed; } -bool ControlHeightReductionLegacyPass::runOnFunction(Function &F) { - BlockFrequencyInfo &BFI = - getAnalysis<BlockFrequencyInfoWrapperPass>().getBFI(); - DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree(); - ProfileSummaryInfo &PSI = - getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI(); - RegionInfo &RI = getAnalysis<RegionInfoPass>().getRegionInfo(); - std::unique_ptr<OptimizationRemarkEmitter> OwnedORE = - std::make_unique<OptimizationRemarkEmitter>(&F); - return CHR(F, BFI, DT, PSI, RI, *OwnedORE).run(); -} - namespace llvm { ControlHeightReductionPass::ControlHeightReductionPass() { diff --git a/llvm/lib/Transforms/Instrumentation/InstrOrderFile.cpp b/llvm/lib/Transforms/Instrumentation/InstrOrderFile.cpp index 2091881c29fe..d7561c193aa3 100644 --- a/llvm/lib/Transforms/Instrumentation/InstrOrderFile.cpp +++ b/llvm/lib/Transforms/Instrumentation/InstrOrderFile.cpp @@ -163,42 +163,11 @@ public: } }; // End of InstrOrderFile struct - -class InstrOrderFileLegacyPass : public ModulePass { -public: - static char ID; - - InstrOrderFileLegacyPass() : ModulePass(ID) { - initializeInstrOrderFileLegacyPassPass( - *PassRegistry::getPassRegistry()); - } - - bool runOnModule(Module &M) override; -}; - } // End anonymous namespace -bool InstrOrderFileLegacyPass::runOnModule(Module &M) { - if (skipModule(M)) - return false; - - return InstrOrderFile().run(M); -} - PreservedAnalyses InstrOrderFilePass::run(Module &M, ModuleAnalysisManager &AM) { if (InstrOrderFile().run(M)) return PreservedAnalyses::none(); return PreservedAnalyses::all(); } - -INITIALIZE_PASS_BEGIN(InstrOrderFileLegacyPass, "instrorderfile", - "Instrumentation for Order File", false, false) -INITIALIZE_PASS_END(InstrOrderFileLegacyPass, "instrorderfile", - "Instrumentation for Order File", false, false) - -char InstrOrderFileLegacyPass::ID = 0; - -ModulePass *llvm::createInstrOrderFilePass() { - return new InstrOrderFileLegacyPass(); -} diff --git a/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp b/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp index 3572cb3b50e2..5b7aa304b987 100644 --- a/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp +++ b/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp @@ -147,35 +147,6 @@ cl::opt<bool> SkipRetExitBlock( "skip-ret-exit-block", cl::init(true), cl::desc("Suppress counter promotion if exit blocks contain ret.")); -class InstrProfilingLegacyPass : public ModulePass { - InstrProfiling InstrProf; - -public: - static char ID; - - InstrProfilingLegacyPass() : ModulePass(ID) {} - InstrProfilingLegacyPass(const InstrProfOptions &Options, bool IsCS = false) - : ModulePass(ID), InstrProf(Options, IsCS) { - initializeInstrProfilingLegacyPassPass(*PassRegistry::getPassRegistry()); - } - - StringRef getPassName() const override { - return "Frontend instrumentation-based coverage lowering"; - } - - bool runOnModule(Module &M) override { - auto GetTLI = [this](Function &F) -> TargetLibraryInfo & { - return this->getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F); - }; - return InstrProf.run(M, GetTLI); - } - - void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.setPreservesCFG(); - AU.addRequired<TargetLibraryInfoWrapperPass>(); - } -}; - /// /// A helper class to promote one counter RMW operation in the loop /// into register update. @@ -439,21 +410,6 @@ PreservedAnalyses InstrProfiling::run(Module &M, ModuleAnalysisManager &AM) { return PreservedAnalyses::none(); } -char InstrProfilingLegacyPass::ID = 0; -INITIALIZE_PASS_BEGIN(InstrProfilingLegacyPass, "instrprof", - "Frontend instrumentation-based coverage lowering.", - false, false) -INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) -INITIALIZE_PASS_END(InstrProfilingLegacyPass, "instrprof", - "Frontend instrumentation-based coverage lowering.", false, - false) - -ModulePass * -llvm::createInstrProfilingLegacyPass(const InstrProfOptions &Options, - bool IsCS) { - return new InstrProfilingLegacyPass(Options, IsCS); -} - bool InstrProfiling::lowerIntrinsics(Function *F) { bool MadeChange = false; PromotionCandidates.clear(); diff --git a/llvm/lib/Transforms/Instrumentation/Instrumentation.cpp b/llvm/lib/Transforms/Instrumentation/Instrumentation.cpp index 9ff0e632bd7f..bd575b6cf3b0 100644 --- a/llvm/lib/Transforms/Instrumentation/Instrumentation.cpp +++ b/llvm/lib/Transforms/Instrumentation/Instrumentation.cpp @@ -94,11 +94,6 @@ void llvm::initializeInstrumentation(PassRegistry &Registry) { initializeMemProfilerLegacyPassPass(Registry); initializeModuleMemProfilerLegacyPassPass(Registry); initializeBoundsCheckingLegacyPassPass(Registry); - initializeControlHeightReductionLegacyPassPass(Registry); - initializeCGProfileLegacyPassPass(Registry); - initializeInstrOrderFileLegacyPassPass(Registry); - initializeInstrProfilingLegacyPassPass(Registry); - initializeModuleSanitizerCoverageLegacyPassPass(Registry); initializeDataFlowSanitizerLegacyPassPass(Registry); } diff --git a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp index 4d72f6c3d1a9..4606bd5de6c3 100644 --- a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp +++ b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp @@ -417,6 +417,14 @@ static const MemoryMapParams Linux_AArch64_MemoryMapParams = { 0x01000000000, // OriginBase }; +// aarch64 FreeBSD +static const MemoryMapParams FreeBSD_AArch64_MemoryMapParams = { + 0x1800000000000, // AndMask + 0x0400000000000, // XorMask + 0x0200000000000, // ShadowBase + 0x0700000000000, // OriginBase +}; + // i386 FreeBSD static const MemoryMapParams FreeBSD_I386_MemoryMapParams = { 0x000180000000, // AndMask @@ -466,6 +474,11 @@ static const PlatformMemoryMapParams Linux_ARM_MemoryMapParams = { &Linux_AArch64_MemoryMapParams, }; +static const PlatformMemoryMapParams FreeBSD_ARM_MemoryMapParams = { + nullptr, + &FreeBSD_AArch64_MemoryMapParams, +}; + static const PlatformMemoryMapParams FreeBSD_X86_MemoryMapParams = { &FreeBSD_I386_MemoryMapParams, &FreeBSD_X86_64_MemoryMapParams, @@ -894,6 +907,9 @@ void MemorySanitizer::initializeModule(Module &M) { switch (TargetTriple.getOS()) { case Triple::FreeBSD: switch (TargetTriple.getArch()) { + case Triple::aarch64: + MapParams = FreeBSD_ARM_MemoryMapParams.bits64; + break; case Triple::x86_64: MapParams = FreeBSD_X86_MemoryMapParams.bits64; break; diff --git a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp index 3a29cd70e42e..c4512d0222cd 100644 --- a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp +++ b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp @@ -210,12 +210,11 @@ cl::opt<bool> // Command line option to enable/disable the warning about a hash mismatch in // the profile data for Comdat functions, which often turns out to be false // positive due to the pre-instrumentation inline. -static cl::opt<bool> - NoPGOWarnMismatchComdat("no-pgo-warn-mismatch-comdat", cl::init(true), - cl::Hidden, - cl::desc("The option is used to turn on/off " - "warnings about hash mismatch for comdat " - "functions.")); +static cl::opt<bool> NoPGOWarnMismatchComdatWeak( + "no-pgo-warn-mismatch-comdat-weak", cl::init(true), cl::Hidden, + cl::desc("The option is used to turn on/off " + "warnings about hash mismatch for comdat " + "or weak functions.")); // Command line option to enable/disable select instruction instrumentation. static cl::opt<bool> @@ -287,6 +286,11 @@ static cl::opt<unsigned> PGOVerifyBFICutoff( cl::desc("Set the threshold for pgo-verify-bfi: skip the counts whose " "profile count value is below.")); +static cl::opt<std::string> PGOTraceFuncHash( + "pgo-trace-func-hash", cl::init("-"), cl::Hidden, + cl::value_desc("function name"), + cl::desc("Trace the hash of the function with this name.")); + namespace llvm { // Command line option to turn on CFG dot dump after profile annotation. // Defined in Analysis/BlockFrequencyInfo.cpp: -pgo-view-counts @@ -630,6 +634,10 @@ void FuncPGOInstrumentation<Edge, BBInfo>::computeCFGHash() { << ", High32 CRC = " << JCH.getCRC()); } LLVM_DEBUG(dbgs() << ", Hash = " << FunctionHash << "\n";); + + if (PGOTraceFuncHash != "-" && F.getName().contains(PGOTraceFuncHash)) + dbgs() << "Funcname=" << F.getName() << ", Hash=" << FunctionHash + << " in building " << F.getParent()->getSourceFileName() << "\n"; } // Check if we can safely rename this Comdat function. @@ -832,8 +840,6 @@ static void instrumentOneFunc( auto CFGHash = ConstantInt::get(Type::getInt64Ty(M->getContext()), FuncInfo.FunctionHash); if (PGOFunctionEntryCoverage) { - assert(!IsCS && - "entry coverge does not support context-sensitive instrumentation"); auto &EntryBB = F.getEntryBlock(); IRBuilder<> Builder(&EntryBB, EntryBB.getFirstInsertionPt()); // llvm.instrprof.cover(i8* <name>, i64 <hash>, i32 <num-counters>, @@ -1216,8 +1222,9 @@ static void annotateFunctionWithHashMismatch(Function &F, bool PGOUseFunc::readCounters(IndexedInstrProfReader *PGOReader, bool &AllZeros, bool &AllMinusOnes) { auto &Ctx = M->getContext(); - Expected<InstrProfRecord> Result = - PGOReader->getInstrProfRecord(FuncInfo.FuncName, FuncInfo.FunctionHash); + uint64_t MismatchedFuncSum = 0; + Expected<InstrProfRecord> Result = PGOReader->getInstrProfRecord( + FuncInfo.FuncName, FuncInfo.FunctionHash, &MismatchedFuncSum); if (Error E = Result.takeError()) { handleAllErrors(std::move(E), [&](const InstrProfError &IPE) { auto Err = IPE.get(); @@ -1233,10 +1240,11 @@ bool PGOUseFunc::readCounters(IndexedInstrProfReader *PGOReader, bool &AllZeros, IsCS ? NumOfCSPGOMismatch++ : NumOfPGOMismatch++; SkipWarning = NoPGOWarnMismatch || - (NoPGOWarnMismatchComdat && - (F.hasComdat() || + (NoPGOWarnMismatchComdatWeak && + (F.hasComdat() || F.getLinkage() == GlobalValue::WeakAnyLinkage || F.getLinkage() == GlobalValue::AvailableExternallyLinkage)); - LLVM_DEBUG(dbgs() << "hash mismatch (skip=" << SkipWarning << ")"); + LLVM_DEBUG(dbgs() << "hash mismatch (hash= " << FuncInfo.FunctionHash + << " skip=" << SkipWarning << ")"); // Emit function metadata indicating PGO profile mismatch. annotateFunctionWithHashMismatch(F, M->getContext()); } @@ -1245,9 +1253,11 @@ bool PGOUseFunc::readCounters(IndexedInstrProfReader *PGOReader, bool &AllZeros, if (SkipWarning) return; - std::string Msg = IPE.message() + std::string(" ") + F.getName().str() + - std::string(" Hash = ") + - std::to_string(FuncInfo.FunctionHash); + std::string Msg = + IPE.message() + std::string(" ") + F.getName().str() + + std::string(" Hash = ") + std::to_string(FuncInfo.FunctionHash) + + std::string(" up to ") + std::to_string(MismatchedFuncSum) + + std::string(" count discarded"); Ctx.diagnose( DiagnosticInfoPGOProfile(M->getName().data(), Msg, DS_Warning)); diff --git a/llvm/lib/Transforms/Instrumentation/SanitizerCoverage.cpp b/llvm/lib/Transforms/Instrumentation/SanitizerCoverage.cpp index d9d11cc90d3d..3ca476e74953 100644 --- a/llvm/lib/Transforms/Instrumentation/SanitizerCoverage.cpp +++ b/llvm/lib/Transforms/Instrumentation/SanitizerCoverage.cpp @@ -278,53 +278,6 @@ private: const SpecialCaseList *Allowlist; const SpecialCaseList *Blocklist; }; - -class ModuleSanitizerCoverageLegacyPass : public ModulePass { -public: - ModuleSanitizerCoverageLegacyPass( - const SanitizerCoverageOptions &Options = SanitizerCoverageOptions(), - const std::vector<std::string> &AllowlistFiles = - std::vector<std::string>(), - const std::vector<std::string> &BlocklistFiles = - std::vector<std::string>()) - : ModulePass(ID), Options(Options) { - if (AllowlistFiles.size() > 0) - Allowlist = SpecialCaseList::createOrDie(AllowlistFiles, - *vfs::getRealFileSystem()); - if (BlocklistFiles.size() > 0) - Blocklist = SpecialCaseList::createOrDie(BlocklistFiles, - *vfs::getRealFileSystem()); - initializeModuleSanitizerCoverageLegacyPassPass( - *PassRegistry::getPassRegistry()); - } - bool runOnModule(Module &M) override { - ModuleSanitizerCoverage ModuleSancov(Options, Allowlist.get(), - Blocklist.get()); - auto DTCallback = [this](Function &F) -> const DominatorTree * { - return &this->getAnalysis<DominatorTreeWrapperPass>(F).getDomTree(); - }; - auto PDTCallback = [this](Function &F) -> const PostDominatorTree * { - return &this->getAnalysis<PostDominatorTreeWrapperPass>(F) - .getPostDomTree(); - }; - return ModuleSancov.instrumentModule(M, DTCallback, PDTCallback); - } - - static char ID; // Pass identification, replacement for typeid - StringRef getPassName() const override { return "ModuleSanitizerCoverage"; } - - void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.addRequired<DominatorTreeWrapperPass>(); - AU.addRequired<PostDominatorTreeWrapperPass>(); - } - -private: - SanitizerCoverageOptions Options; - - std::unique_ptr<SpecialCaseList> Allowlist; - std::unique_ptr<SpecialCaseList> Blocklist; -}; - } // namespace PreservedAnalyses ModuleSanitizerCoveragePass::run(Module &M, @@ -1075,20 +1028,3 @@ ModuleSanitizerCoverage::getSectionEnd(const std::string &Section) const { return "\1section$end$__DATA$__" + Section; return "__stop___" + Section; } - -char ModuleSanitizerCoverageLegacyPass::ID = 0; -INITIALIZE_PASS_BEGIN(ModuleSanitizerCoverageLegacyPass, "sancov", - "Pass for instrumenting coverage on functions", false, - false) -INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) -INITIALIZE_PASS_DEPENDENCY(PostDominatorTreeWrapperPass) -INITIALIZE_PASS_END(ModuleSanitizerCoverageLegacyPass, "sancov", - "Pass for instrumenting coverage on functions", false, - false) -ModulePass *llvm::createModuleSanitizerCoverageLegacyPassPass( - const SanitizerCoverageOptions &Options, - const std::vector<std::string> &AllowlistFiles, - const std::vector<std::string> &BlocklistFiles) { - return new ModuleSanitizerCoverageLegacyPass(Options, AllowlistFiles, - BlocklistFiles); -} diff --git a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp index 4c42869dbd58..3f0dad7ee769 100644 --- a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp +++ b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp @@ -776,6 +776,11 @@ struct DSEState { // fall back to CFG scan starting from all non-unreachable roots. bool AnyUnreachableExit; + // Whether or not we should iterate on removing dead stores at the end of the + // function due to removing a store causing a previously captured pointer to + // no longer be captured. + bool ShouldIterateEndOfFunctionDSE; + // Class contains self-reference, make sure it's not copied/moved. DSEState(const DSEState &) = delete; DSEState &operator=(const DSEState &) = delete; @@ -1103,9 +1108,8 @@ struct DSEState { return {std::make_pair(MemoryLocation(Ptr, Len), false)}; if (auto *CB = dyn_cast<CallBase>(I)) { - if (isFreeCall(I, &TLI)) - return {std::make_pair(MemoryLocation::getAfter(CB->getArgOperand(0)), - true)}; + if (Value *FreedOp = getFreedOperand(CB, &TLI)) + return {std::make_pair(MemoryLocation::getAfter(FreedOp), true)}; } return None; @@ -1114,9 +1118,9 @@ struct DSEState { /// Returns true if \p I is a memory terminator instruction like /// llvm.lifetime.end or free. bool isMemTerminatorInst(Instruction *I) const { - IntrinsicInst *II = dyn_cast<IntrinsicInst>(I); - return (II && II->getIntrinsicID() == Intrinsic::lifetime_end) || - isFreeCall(I, &TLI); + auto *CB = dyn_cast<CallBase>(I); + return CB && (CB->getIntrinsicID() == Intrinsic::lifetime_end || + getFreedOperand(CB, &TLI) != nullptr); } /// Returns true if \p MaybeTerm is a memory terminator for \p Loc from @@ -1598,6 +1602,14 @@ struct DSEState { if (MemoryAccess *MA = MSSA.getMemoryAccess(DeadInst)) { if (MemoryDef *MD = dyn_cast<MemoryDef>(MA)) { SkipStores.insert(MD); + if (auto *SI = dyn_cast<StoreInst>(MD->getMemoryInst())) { + if (SI->getValueOperand()->getType()->isPointerTy()) { + const Value *UO = getUnderlyingObject(SI->getValueOperand()); + if (CapturedBeforeReturn.erase(UO)) + ShouldIterateEndOfFunctionDSE = true; + InvisibleToCallerAfterRet.erase(UO); + } + } } Updater.removeMemoryAccess(MA); @@ -1671,33 +1683,36 @@ struct DSEState { LLVM_DEBUG( dbgs() << "Trying to eliminate MemoryDefs at the end of the function\n"); - for (MemoryDef *Def : llvm::reverse(MemDefs)) { - if (SkipStores.contains(Def)) - continue; + do { + ShouldIterateEndOfFunctionDSE = false; + for (MemoryDef *Def : llvm::reverse(MemDefs)) { + if (SkipStores.contains(Def)) + continue; - Instruction *DefI = Def->getMemoryInst(); - auto DefLoc = getLocForWrite(DefI); - if (!DefLoc || !isRemovable(DefI)) - continue; + Instruction *DefI = Def->getMemoryInst(); + auto DefLoc = getLocForWrite(DefI); + if (!DefLoc || !isRemovable(DefI)) + continue; - // NOTE: Currently eliminating writes at the end of a function is limited - // to MemoryDefs with a single underlying object, to save compile-time. In - // practice it appears the case with multiple underlying objects is very - // uncommon. If it turns out to be important, we can use - // getUnderlyingObjects here instead. - const Value *UO = getUnderlyingObject(DefLoc->Ptr); - if (!isInvisibleToCallerAfterRet(UO)) - continue; + // NOTE: Currently eliminating writes at the end of a function is + // limited to MemoryDefs with a single underlying object, to save + // compile-time. In practice it appears the case with multiple + // underlying objects is very uncommon. If it turns out to be important, + // we can use getUnderlyingObjects here instead. + const Value *UO = getUnderlyingObject(DefLoc->Ptr); + if (!isInvisibleToCallerAfterRet(UO)) + continue; - if (isWriteAtEndOfFunction(Def)) { - // See through pointer-to-pointer bitcasts - LLVM_DEBUG(dbgs() << " ... MemoryDef is not accessed until the end " - "of the function\n"); - deleteDeadInstruction(DefI); - ++NumFastStores; - MadeChange = true; + if (isWriteAtEndOfFunction(Def)) { + // See through pointer-to-pointer bitcasts + LLVM_DEBUG(dbgs() << " ... MemoryDef is not accessed until the end " + "of the function\n"); + deleteDeadInstruction(DefI); + ++NumFastStores; + MadeChange = true; + } } - } + } while (ShouldIterateEndOfFunctionDSE); return MadeChange; } diff --git a/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp b/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp index a9ca0bdc8f7b..9698ed97379e 100644 --- a/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp +++ b/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp @@ -1738,7 +1738,7 @@ bool IndVarSimplify::predicateLoopExits(Loop *L, SCEVExpander &Rewriter) { // through *explicit* control flow. We have to eliminate the possibility of // implicit exits (see below) before we know it's truly exact. const SCEV *ExactBTC = SE->getBackedgeTakenCount(L); - if (isa<SCEVCouldNotCompute>(ExactBTC) || !isSafeToExpand(ExactBTC, *SE)) + if (isa<SCEVCouldNotCompute>(ExactBTC) || !Rewriter.isSafeToExpand(ExactBTC)) return false; assert(SE->isLoopInvariant(ExactBTC, L) && "BTC must be loop invariant"); @@ -1769,7 +1769,8 @@ bool IndVarSimplify::predicateLoopExits(Loop *L, SCEVExpander &Rewriter) { return true; const SCEV *ExitCount = SE->getExitCount(L, ExitingBB); - if (isa<SCEVCouldNotCompute>(ExitCount) || !isSafeToExpand(ExitCount, *SE)) + if (isa<SCEVCouldNotCompute>(ExitCount) || + !Rewriter.isSafeToExpand(ExitCount)) return true; assert(SE->isLoopInvariant(ExitCount, L) && diff --git a/llvm/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp b/llvm/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp index b54cf5e7cb20..328615011ceb 100644 --- a/llvm/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp +++ b/llvm/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp @@ -1451,7 +1451,7 @@ bool LoopConstrainer::run() { return false; } - if (!isSafeToExpandAt(ExitPreLoopAtSCEV, InsertPt, SE)) { + if (!Expander.isSafeToExpandAt(ExitPreLoopAtSCEV, InsertPt)) { LLVM_DEBUG(dbgs() << "irce: could not prove that it is safe to expand the" << " preloop exit limit " << *ExitPreLoopAtSCEV << " at block " << InsertPt->getParent()->getName() @@ -1478,7 +1478,7 @@ bool LoopConstrainer::run() { return false; } - if (!isSafeToExpandAt(ExitMainLoopAtSCEV, InsertPt, SE)) { + if (!Expander.isSafeToExpandAt(ExitMainLoopAtSCEV, InsertPt)) { LLVM_DEBUG(dbgs() << "irce: could not prove that it is safe to expand the" << " main loop exit limit " << *ExitMainLoopAtSCEV << " at block " << InsertPt->getParent()->getName() diff --git a/llvm/lib/Transforms/Scalar/LoopDataPrefetch.cpp b/llvm/lib/Transforms/Scalar/LoopDataPrefetch.cpp index 9590fbbb1994..fd2eaee8b47d 100644 --- a/llvm/lib/Transforms/Scalar/LoopDataPrefetch.cpp +++ b/llvm/lib/Transforms/Scalar/LoopDataPrefetch.cpp @@ -388,15 +388,15 @@ bool LoopDataPrefetch::runOnLoop(Loop *L) { if (!isStrideLargeEnough(P.LSCEVAddRec, TargetMinStride)) continue; + BasicBlock *BB = P.InsertPt->getParent(); + SCEVExpander SCEVE(*SE, BB->getModule()->getDataLayout(), "prefaddr"); const SCEV *NextLSCEV = SE->getAddExpr(P.LSCEVAddRec, SE->getMulExpr( SE->getConstant(P.LSCEVAddRec->getType(), ItersAhead), P.LSCEVAddRec->getStepRecurrence(*SE))); - if (!isSafeToExpand(NextLSCEV, *SE)) + if (!SCEVE.isSafeToExpand(NextLSCEV)) continue; - BasicBlock *BB = P.InsertPt->getParent(); Type *I8Ptr = Type::getInt8PtrTy(BB->getContext(), 0/*PtrAddrSpace*/); - SCEVExpander SCEVE(*SE, BB->getModule()->getDataLayout(), "prefaddr"); Value *PrefPtrValue = SCEVE.expandCodeFor(NextLSCEV, I8Ptr, P.InsertPt); IRBuilder<> Builder(P.InsertPt); diff --git a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp index d908c151d9f2..3ed022f65d9a 100644 --- a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp +++ b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp @@ -1129,7 +1129,7 @@ bool LoopIdiomRecognize::processLoopStridedStore( // TODO: ideally we should still be able to generate memset if SCEV expander // is taught to generate the dependencies at the latest point. - if (!isSafeToExpand(Start, *SE)) + if (!Expander.isSafeToExpand(Start)) return Changed; // Okay, we have a strided store "p[i]" of a splattable value. We can turn @@ -1163,7 +1163,7 @@ bool LoopIdiomRecognize::processLoopStridedStore( // TODO: ideally we should still be able to generate memset if SCEV expander // is taught to generate the dependencies at the latest point. - if (!isSafeToExpand(NumBytesS, *SE)) + if (!Expander.isSafeToExpand(NumBytesS)) return Changed; Value *NumBytes = diff --git a/llvm/lib/Transforms/Scalar/LoopInterchange.cpp b/llvm/lib/Transforms/Scalar/LoopInterchange.cpp index 1d3023d04463..18daa4295224 100644 --- a/llvm/lib/Transforms/Scalar/LoopInterchange.cpp +++ b/llvm/lib/Transforms/Scalar/LoopInterchange.cpp @@ -288,7 +288,6 @@ static void populateWorklist(Loop &L, LoopVector &LoopList) { Vec = &CurrentLoop->getSubLoops(); } LoopList.push_back(CurrentLoop); - return; } namespace { diff --git a/llvm/lib/Transforms/Scalar/LoopPredication.cpp b/llvm/lib/Transforms/Scalar/LoopPredication.cpp index d0ee5b47a8ca..b327d38d2a84 100644 --- a/llvm/lib/Transforms/Scalar/LoopPredication.cpp +++ b/llvm/lib/Transforms/Scalar/LoopPredication.cpp @@ -275,7 +275,8 @@ class LoopPredication { /// which is that an expression *can be made* invariant via SCEVExpander. /// Thus, this version is only suitable for finding an insert point to be be /// passed to SCEVExpander! - Instruction *findInsertPt(Instruction *User, ArrayRef<const SCEV*> Ops); + Instruction *findInsertPt(const SCEVExpander &Expander, Instruction *User, + ArrayRef<const SCEV *> Ops); /// Return true if the value is known to produce a single fixed value across /// all iterations on which it executes. Note that this does not imply @@ -418,13 +419,14 @@ Value *LoopPredication::expandCheck(SCEVExpander &Expander, return Builder.getFalse(); } - Value *LHSV = Expander.expandCodeFor(LHS, Ty, findInsertPt(Guard, {LHS})); - Value *RHSV = Expander.expandCodeFor(RHS, Ty, findInsertPt(Guard, {RHS})); + Value *LHSV = + Expander.expandCodeFor(LHS, Ty, findInsertPt(Expander, Guard, {LHS})); + Value *RHSV = + Expander.expandCodeFor(RHS, Ty, findInsertPt(Expander, Guard, {RHS})); IRBuilder<> Builder(findInsertPt(Guard, {LHSV, RHSV})); return Builder.CreateICmp(Pred, LHSV, RHSV); } - // Returns true if its safe to truncate the IV to RangeCheckType. // When the IV type is wider than the range operand type, we can still do loop // predication, by generating SCEVs for the range and latch that are of the @@ -516,14 +518,15 @@ Instruction *LoopPredication::findInsertPt(Instruction *Use, return Preheader->getTerminator(); } -Instruction *LoopPredication::findInsertPt(Instruction *Use, - ArrayRef<const SCEV*> Ops) { +Instruction *LoopPredication::findInsertPt(const SCEVExpander &Expander, + Instruction *Use, + ArrayRef<const SCEV *> Ops) { // Subtlety: SCEV considers things to be invariant if the value produced is // the same across iterations. This is not the same as being able to // evaluate outside the loop, which is what we actually need here. for (const SCEV *Op : Ops) if (!SE->isLoopInvariant(Op, L) || - !isSafeToExpandAt(Op, Preheader->getTerminator(), *SE)) + !Expander.isSafeToExpandAt(Op, Preheader->getTerminator())) return Use; return Preheader->getTerminator(); } @@ -589,8 +592,8 @@ Optional<Value *> LoopPredication::widenICmpRangeCheckIncrementingLoop( LLVM_DEBUG(dbgs() << "Can't expand limit check!\n"); return None; } - if (!isSafeToExpandAt(LatchStart, Guard, *SE) || - !isSafeToExpandAt(LatchLimit, Guard, *SE)) { + if (!Expander.isSafeToExpandAt(LatchStart, Guard) || + !Expander.isSafeToExpandAt(LatchLimit, Guard)) { LLVM_DEBUG(dbgs() << "Can't expand limit check!\n"); return None; } @@ -632,8 +635,8 @@ Optional<Value *> LoopPredication::widenICmpRangeCheckDecrementingLoop( LLVM_DEBUG(dbgs() << "Can't expand limit check!\n"); return None; } - if (!isSafeToExpandAt(LatchStart, Guard, *SE) || - !isSafeToExpandAt(LatchLimit, Guard, *SE)) { + if (!Expander.isSafeToExpandAt(LatchStart, Guard) || + !Expander.isSafeToExpandAt(LatchLimit, Guard)) { LLVM_DEBUG(dbgs() << "Can't expand limit check!\n"); return None; } @@ -1159,7 +1162,7 @@ bool LoopPredication::predicateLoopExits(Loop *L, SCEVExpander &Rewriter) { const SCEV *MinEC = getMinAnalyzeableBackedgeTakenCount(*SE, *DT, L); if (isa<SCEVCouldNotCompute>(MinEC) || MinEC->getType()->isPointerTy() || !SE->isLoopInvariant(MinEC, L) || - !isSafeToExpandAt(MinEC, WidenableBR, *SE)) + !Rewriter.isSafeToExpandAt(MinEC, WidenableBR)) return ChangedLoop; // Subtlety: We need to avoid inserting additional uses of the WC. We know @@ -1198,7 +1201,7 @@ bool LoopPredication::predicateLoopExits(Loop *L, SCEVExpander &Rewriter) { const SCEV *ExitCount = SE->getExitCount(L, ExitingBB); if (isa<SCEVCouldNotCompute>(ExitCount) || ExitCount->getType()->isPointerTy() || - !isSafeToExpandAt(ExitCount, WidenableBR, *SE)) + !Rewriter.isSafeToExpandAt(ExitCount, WidenableBR)) continue; const bool ExitIfTrue = !L->contains(*succ_begin(ExitingBB)); diff --git a/llvm/lib/Transforms/Scalar/LoopSimplifyCFG.cpp b/llvm/lib/Transforms/Scalar/LoopSimplifyCFG.cpp index b7e0e32780b4..083f87436acd 100644 --- a/llvm/lib/Transforms/Scalar/LoopSimplifyCFG.cpp +++ b/llvm/lib/Transforms/Scalar/LoopSimplifyCFG.cpp @@ -576,6 +576,18 @@ public: return false; } + // TODO: Tokens may breach LCSSA form by default. However, the transform for + // dead exit blocks requires LCSSA form to be maintained for all values, + // tokens included, otherwise it may break use-def dominance (see PR56243). + if (!DeadExitBlocks.empty() && !L.isLCSSAForm(DT, /*IgnoreTokens*/ false)) { + assert(L.isLCSSAForm(DT, /*IgnoreTokens*/ true) && + "LCSSA broken not by tokens?"); + LLVM_DEBUG(dbgs() << "Give up constant terminator folding in loop " + << Header->getName() + << ": tokens uses potentially break LCSSA form.\n"); + return false; + } + SE.forgetTopmostLoop(&L); // Dump analysis results. LLVM_DEBUG(dump()); diff --git a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp index 4ef7809c6681..a3434f8bc46d 100644 --- a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp +++ b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp @@ -1950,6 +1950,7 @@ class LSRInstance { Loop *const L; MemorySSAUpdater *MSSAU; TTI::AddressingModeKind AMK; + mutable SCEVExpander Rewriter; bool Changed = false; /// This is the insert position that the current loop's induction variable @@ -1998,7 +1999,7 @@ class LSRInstance { SmallVectorImpl<ChainUsers> &ChainUsersVec); void FinalizeChain(IVChain &Chain); void CollectChains(); - void GenerateIVChain(const IVChain &Chain, SCEVExpander &Rewriter, + void GenerateIVChain(const IVChain &Chain, SmallVectorImpl<WeakTrackingVH> &DeadInsts); void CollectInterestingTypesAndFactors(); @@ -2068,22 +2069,19 @@ class LSRInstance { void Solve(SmallVectorImpl<const Formula *> &Solution) const; BasicBlock::iterator - HoistInsertPosition(BasicBlock::iterator IP, - const SmallVectorImpl<Instruction *> &Inputs) const; - BasicBlock::iterator - AdjustInsertPositionForExpand(BasicBlock::iterator IP, - const LSRFixup &LF, - const LSRUse &LU, - SCEVExpander &Rewriter) const; + HoistInsertPosition(BasicBlock::iterator IP, + const SmallVectorImpl<Instruction *> &Inputs) const; + BasicBlock::iterator AdjustInsertPositionForExpand(BasicBlock::iterator IP, + const LSRFixup &LF, + const LSRUse &LU) const; Value *Expand(const LSRUse &LU, const LSRFixup &LF, const Formula &F, - BasicBlock::iterator IP, SCEVExpander &Rewriter, + BasicBlock::iterator IP, SmallVectorImpl<WeakTrackingVH> &DeadInsts) const; void RewriteForPHI(PHINode *PN, const LSRUse &LU, const LSRFixup &LF, - const Formula &F, SCEVExpander &Rewriter, + const Formula &F, SmallVectorImpl<WeakTrackingVH> &DeadInsts) const; void Rewrite(const LSRUse &LU, const LSRFixup &LF, const Formula &F, - SCEVExpander &Rewriter, SmallVectorImpl<WeakTrackingVH> &DeadInsts) const; void ImplementSolution(const SmallVectorImpl<const Formula *> &Solution); @@ -3183,7 +3181,7 @@ static bool canFoldIVIncExpr(const SCEV *IncExpr, Instruction *UserInst, /// Generate an add or subtract for each IVInc in a chain to materialize the IV /// user's operand from the previous IV user's operand. -void LSRInstance::GenerateIVChain(const IVChain &Chain, SCEVExpander &Rewriter, +void LSRInstance::GenerateIVChain(const IVChain &Chain, SmallVectorImpl<WeakTrackingVH> &DeadInsts) { // Find the new IVOperand for the head of the chain. It may have been replaced // by LSR. @@ -3335,7 +3333,7 @@ void LSRInstance::CollectFixupsAndInitialFormulae() { // x == y --> x - y == 0 const SCEV *N = SE.getSCEV(NV); - if (SE.isLoopInvariant(N, L) && isSafeToExpand(N, SE) && + if (SE.isLoopInvariant(N, L) && Rewriter.isSafeToExpand(N) && (!NV->getType()->isPointerTy() || SE.getPointerBase(N) == SE.getPointerBase(S))) { // S is normalized, so normalize N before folding it into S @@ -3343,6 +3341,21 @@ void LSRInstance::CollectFixupsAndInitialFormulae() { N = normalizeForPostIncUse(N, TmpPostIncLoops, SE); Kind = LSRUse::ICmpZero; S = SE.getMinusSCEV(N, S); + } else if (L->isLoopInvariant(NV) && + (!isa<Instruction>(NV) || + DT.dominates(cast<Instruction>(NV), L->getHeader())) && + !NV->getType()->isPointerTy()) { + // If we can't generally expand the expression (e.g. it contains + // a divide), but it is already at a loop invariant point before the + // loop, wrap it in an unknown (to prevent the expander from trying + // to re-expand in a potentially unsafe way.) The restriction to + // integer types is required because the unknown hides the base, and + // SCEV can't compute the difference of two unknown pointers. + N = SE.getUnknown(NV); + N = normalizeForPostIncUse(N, TmpPostIncLoops, SE); + Kind = LSRUse::ICmpZero; + S = SE.getMinusSCEV(N, S); + assert(!isa<SCEVCouldNotCompute>(S)); } // -1 and the negations of all interesting strides (except the negation @@ -3385,10 +3398,10 @@ void LSRInstance::CollectFixupsAndInitialFormulae() { /// Insert a formula for the given expression into the given use, separating out /// loop-variant portions from loop-invariant and loop-computable portions. -void -LSRInstance::InsertInitialFormula(const SCEV *S, LSRUse &LU, size_t LUIdx) { +void LSRInstance::InsertInitialFormula(const SCEV *S, LSRUse &LU, + size_t LUIdx) { // Mark uses whose expressions cannot be expanded. - if (!isSafeToExpand(S, SE, /*CanonicalMode*/ false)) + if (!Rewriter.isSafeToExpand(S)) LU.RigidFormula = true; Formula F; @@ -5206,11 +5219,8 @@ LSRInstance::HoistInsertPosition(BasicBlock::iterator IP, /// Determine an input position which will be dominated by the operands and /// which will dominate the result. -BasicBlock::iterator -LSRInstance::AdjustInsertPositionForExpand(BasicBlock::iterator LowestIP, - const LSRFixup &LF, - const LSRUse &LU, - SCEVExpander &Rewriter) const { +BasicBlock::iterator LSRInstance::AdjustInsertPositionForExpand( + BasicBlock::iterator LowestIP, const LSRFixup &LF, const LSRUse &LU) const { // Collect some instructions which must be dominated by the // expanding replacement. These must be dominated by any operands that // will be required in the expansion. @@ -5273,14 +5283,13 @@ LSRInstance::AdjustInsertPositionForExpand(BasicBlock::iterator LowestIP, /// is called "expanding"). Value *LSRInstance::Expand(const LSRUse &LU, const LSRFixup &LF, const Formula &F, BasicBlock::iterator IP, - SCEVExpander &Rewriter, SmallVectorImpl<WeakTrackingVH> &DeadInsts) const { if (LU.RigidFormula) return LF.OperandValToReplace; // Determine an input position which will be dominated by the operands and // which will dominate the result. - IP = AdjustInsertPositionForExpand(IP, LF, LU, Rewriter); + IP = AdjustInsertPositionForExpand(IP, LF, LU); Rewriter.setInsertPoint(&*IP); // Inform the Rewriter if we have a post-increment use, so that it can @@ -5452,7 +5461,7 @@ Value *LSRInstance::Expand(const LSRUse &LU, const LSRFixup &LF, /// to be expanded in multiple places. void LSRInstance::RewriteForPHI( PHINode *PN, const LSRUse &LU, const LSRFixup &LF, const Formula &F, - SCEVExpander &Rewriter, SmallVectorImpl<WeakTrackingVH> &DeadInsts) const { + SmallVectorImpl<WeakTrackingVH> &DeadInsts) const { DenseMap<BasicBlock *, Value *> Inserted; for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) if (PN->getIncomingValue(i) == LF.OperandValToReplace) { @@ -5507,8 +5516,8 @@ void LSRInstance::RewriteForPHI( if (!Pair.second) PN->setIncomingValue(i, Pair.first->second); else { - Value *FullV = Expand(LU, LF, F, BB->getTerminator()->getIterator(), - Rewriter, DeadInsts); + Value *FullV = + Expand(LU, LF, F, BB->getTerminator()->getIterator(), DeadInsts); // If this is reuse-by-noop-cast, insert the noop cast. Type *OpTy = LF.OperandValToReplace->getType(); @@ -5567,15 +5576,14 @@ void LSRInstance::RewriteForPHI( /// is called "expanding"), and update the UserInst to reference the newly /// expanded value. void LSRInstance::Rewrite(const LSRUse &LU, const LSRFixup &LF, - const Formula &F, SCEVExpander &Rewriter, + const Formula &F, SmallVectorImpl<WeakTrackingVH> &DeadInsts) const { // First, find an insertion point that dominates UserInst. For PHI nodes, // find the nearest block which dominates all the relevant uses. if (PHINode *PN = dyn_cast<PHINode>(LF.UserInst)) { - RewriteForPHI(PN, LU, LF, F, Rewriter, DeadInsts); + RewriteForPHI(PN, LU, LF, F, DeadInsts); } else { - Value *FullV = - Expand(LU, LF, F, LF.UserInst->getIterator(), Rewriter, DeadInsts); + Value *FullV = Expand(LU, LF, F, LF.UserInst->getIterator(), DeadInsts); // If this is reuse-by-noop-cast, insert the noop cast. Type *OpTy = LF.OperandValToReplace->getType(); @@ -5609,13 +5617,6 @@ void LSRInstance::ImplementSolution( // we can remove them after we are done working. SmallVector<WeakTrackingVH, 16> DeadInsts; - SCEVExpander Rewriter(SE, L->getHeader()->getModule()->getDataLayout(), "lsr", - false); -#ifndef NDEBUG - Rewriter.setDebugType(DEBUG_TYPE); -#endif - Rewriter.disableCanonicalMode(); - Rewriter.enableLSRMode(); Rewriter.setIVIncInsertPos(L, IVIncInsertPos); // Mark phi nodes that terminate chains so the expander tries to reuse them. @@ -5627,12 +5628,12 @@ void LSRInstance::ImplementSolution( // Expand the new value definitions and update the users. for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) for (const LSRFixup &Fixup : Uses[LUIdx].Fixups) { - Rewrite(Uses[LUIdx], Fixup, *Solution[LUIdx], Rewriter, DeadInsts); + Rewrite(Uses[LUIdx], Fixup, *Solution[LUIdx], DeadInsts); Changed = true; } for (const IVChain &Chain : IVChainVec) { - GenerateIVChain(Chain, Rewriter, DeadInsts); + GenerateIVChain(Chain, DeadInsts); Changed = true; } @@ -5697,8 +5698,10 @@ LSRInstance::LSRInstance(Loop *L, IVUsers &IU, ScalarEvolution &SE, const TargetTransformInfo &TTI, AssumptionCache &AC, TargetLibraryInfo &TLI, MemorySSAUpdater *MSSAU) : IU(IU), SE(SE), DT(DT), LI(LI), AC(AC), TLI(TLI), TTI(TTI), L(L), - MSSAU(MSSAU), AMK(PreferredAddresingMode.getNumOccurrences() > 0 ? - PreferredAddresingMode : TTI.getPreferredAddressingMode(L, &SE)) { + MSSAU(MSSAU), AMK(PreferredAddresingMode.getNumOccurrences() > 0 + ? PreferredAddresingMode + : TTI.getPreferredAddressingMode(L, &SE)), + Rewriter(SE, L->getHeader()->getModule()->getDataLayout(), "lsr", false) { // If LoopSimplify form is not available, stay out of trouble. if (!L->isLoopSimplifyForm()) return; @@ -5733,6 +5736,14 @@ LSRInstance::LSRInstance(Loop *L, IVUsers &IU, ScalarEvolution &SE, L->getHeader()->printAsOperand(dbgs(), /*PrintType=*/false); dbgs() << ":\n"); + // Configure SCEVExpander already now, so the correct mode is used for + // isSafeToExpand() checks. +#ifndef NDEBUG + Rewriter.setDebugType(DEBUG_TYPE); +#endif + Rewriter.disableCanonicalMode(); + Rewriter.enableLSRMode(); + // First, perform some low-level loop optimizations. OptimizeShadowIV(); OptimizeLoopTermCond(); diff --git a/llvm/lib/Transforms/Scalar/Reassociate.cpp b/llvm/lib/Transforms/Scalar/Reassociate.cpp index 75f0896d4845..240fb5e60687 100644 --- a/llvm/lib/Transforms/Scalar/Reassociate.cpp +++ b/llvm/lib/Transforms/Scalar/Reassociate.cpp @@ -142,12 +142,21 @@ XorOpnd::XorOpnd(Value *V) { isOr = true; } +/// Return true if I is an instruction with the FastMathFlags that are needed +/// for general reassociation set. This is not the same as testing +/// Instruction::isAssociative() because it includes operations like fsub. +/// (This routine is only intended to be called for floating-point operations.) +static bool hasFPAssociativeFlags(Instruction *I) { + assert(I && I->getType()->isFPOrFPVectorTy() && "Should only check FP ops"); + return I->hasAllowReassoc() && I->hasNoSignedZeros(); +} + /// Return true if V is an instruction of the specified opcode and if it /// only has one use. static BinaryOperator *isReassociableOp(Value *V, unsigned Opcode) { auto *I = dyn_cast<Instruction>(V); if (I && I->hasOneUse() && I->getOpcode() == Opcode) - if (!isa<FPMathOperator>(I) || I->isFast()) + if (!isa<FPMathOperator>(I) || hasFPAssociativeFlags(I)) return cast<BinaryOperator>(I); return nullptr; } @@ -157,7 +166,7 @@ static BinaryOperator *isReassociableOp(Value *V, unsigned Opcode1, auto *I = dyn_cast<Instruction>(V); if (I && I->hasOneUse() && (I->getOpcode() == Opcode1 || I->getOpcode() == Opcode2)) - if (!isa<FPMathOperator>(I) || I->isFast()) + if (!isa<FPMathOperator>(I) || hasFPAssociativeFlags(I)) return cast<BinaryOperator>(I); return nullptr; } @@ -449,7 +458,8 @@ using RepeatedValue = std::pair<Value*, APInt>; /// of the expression) if it can turn them into binary operators of the right /// type and thus make the expression bigger. static bool LinearizeExprTree(Instruction *I, - SmallVectorImpl<RepeatedValue> &Ops) { + SmallVectorImpl<RepeatedValue> &Ops, + ReassociatePass::OrderedSet &ToRedo) { assert((isa<UnaryOperator>(I) || isa<BinaryOperator>(I)) && "Expected a UnaryOperator or BinaryOperator!"); LLVM_DEBUG(dbgs() << "LINEARIZE: " << *I << '\n'); @@ -572,23 +582,32 @@ static bool LinearizeExprTree(Instruction *I, assert((!isa<Instruction>(Op) || cast<Instruction>(Op)->getOpcode() != Opcode || (isa<FPMathOperator>(Op) && - !cast<Instruction>(Op)->isFast())) && + !hasFPAssociativeFlags(cast<Instruction>(Op)))) && "Should have been handled above!"); assert(Op->hasOneUse() && "Has uses outside the expression tree!"); // If this is a multiply expression, turn any internal negations into - // multiplies by -1 so they can be reassociated. - if (Instruction *Tmp = dyn_cast<Instruction>(Op)) - if ((Opcode == Instruction::Mul && match(Tmp, m_Neg(m_Value()))) || - (Opcode == Instruction::FMul && match(Tmp, m_FNeg(m_Value())))) { - LLVM_DEBUG(dbgs() - << "MORPH LEAF: " << *Op << " (" << Weight << ") TO "); - Tmp = LowerNegateToMultiply(Tmp); - LLVM_DEBUG(dbgs() << *Tmp << '\n'); - Worklist.push_back(std::make_pair(Tmp, Weight)); - Changed = true; - continue; + // multiplies by -1 so they can be reassociated. Add any users of the + // newly created multiplication by -1 to the redo list, so any + // reassociation opportunities that are exposed will be reassociated + // further. + Instruction *Neg; + if (((Opcode == Instruction::Mul && match(Op, m_Neg(m_Value()))) || + (Opcode == Instruction::FMul && match(Op, m_FNeg(m_Value())))) && + match(Op, m_Instruction(Neg))) { + LLVM_DEBUG(dbgs() + << "MORPH LEAF: " << *Op << " (" << Weight << ") TO "); + Instruction *Mul = LowerNegateToMultiply(Neg); + LLVM_DEBUG(dbgs() << *Mul << '\n'); + Worklist.push_back(std::make_pair(Mul, Weight)); + for (User *U : Mul->users()) { + if (BinaryOperator *UserBO = dyn_cast<BinaryOperator>(U)) + ToRedo.insert(UserBO); } + ToRedo.insert(Neg); + Changed = true; + continue; + } // Failed to morph into an expression of the right type. This really is // a leaf. @@ -1141,7 +1160,7 @@ Value *ReassociatePass::RemoveFactorFromExpression(Value *V, Value *Factor) { return nullptr; SmallVector<RepeatedValue, 8> Tree; - MadeChange |= LinearizeExprTree(BO, Tree); + MadeChange |= LinearizeExprTree(BO, Tree, RedoInsts); SmallVector<ValueEntry, 8> Factors; Factors.reserve(Tree.size()); for (unsigned i = 0, e = Tree.size(); i != e; ++i) { @@ -2206,8 +2225,9 @@ void ReassociatePass::OptimizeInst(Instruction *I) { if (Instruction *Res = canonicalizeNegFPConstants(I)) I = Res; - // Don't optimize floating-point instructions unless they are 'fast'. - if (I->getType()->isFPOrFPVectorTy() && !I->isFast()) + // Don't optimize floating-point instructions unless they have the + // appropriate FastMathFlags for reassociation enabled. + if (I->getType()->isFPOrFPVectorTy() && !hasFPAssociativeFlags(I)) return; // Do not reassociate boolean (i1) expressions. We want to preserve the @@ -2320,7 +2340,7 @@ void ReassociatePass::ReassociateExpression(BinaryOperator *I) { // First, walk the expression tree, linearizing the tree, collecting the // operand information. SmallVector<RepeatedValue, 8> Tree; - MadeChange |= LinearizeExprTree(I, Tree); + MadeChange |= LinearizeExprTree(I, Tree, RedoInsts); SmallVector<ValueEntry, 8> Ops; Ops.reserve(Tree.size()); for (const RepeatedValue &E : Tree) diff --git a/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp b/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp index 51e4a5773f3e..baf407c5037b 100644 --- a/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp +++ b/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp @@ -1702,10 +1702,20 @@ makeStatepointExplicitImpl(CallBase *Call, /* to replace */ auto &Context = Call->getContext(); auto &DL = Call->getModule()->getDataLayout(); auto GetBaseAndOffset = [&](Value *Derived) { - assert(PointerToBase.count(Derived)); + Value *Base = nullptr; + // Optimizations in unreachable code might substitute the real pointer + // with undef, poison or null-derived constant. Return null base for + // them to be consistent with the handling in the main algorithm in + // findBaseDefiningValue. + if (isa<Constant>(Derived)) + Base = + ConstantPointerNull::get(cast<PointerType>(Derived->getType())); + else { + assert(PointerToBase.count(Derived)); + Base = PointerToBase.find(Derived)->second; + } unsigned AddressSpace = Derived->getType()->getPointerAddressSpace(); unsigned IntPtrSize = DL.getPointerSizeInBits(AddressSpace); - Value *Base = PointerToBase.find(Derived)->second; Value *Base_int = Builder.CreatePtrToInt( Base, Type::getIntNTy(Context, IntPtrSize)); Value *Derived_int = Builder.CreatePtrToInt( diff --git a/llvm/lib/Transforms/Scalar/Scalar.cpp b/llvm/lib/Transforms/Scalar/Scalar.cpp index 008ddfc72740..5ab9e25577d8 100644 --- a/llvm/lib/Transforms/Scalar/Scalar.cpp +++ b/llvm/lib/Transforms/Scalar/Scalar.cpp @@ -111,8 +111,6 @@ void llvm::initializeScalarOpts(PassRegistry &Registry) { initializeLoopLoadEliminationPass(Registry); initializeLoopSimplifyCFGLegacyPassPass(Registry); initializeLoopVersioningLegacyPassPass(Registry); - initializeEntryExitInstrumenterPass(Registry); - initializePostInlineEntryExitInstrumenterPass(Registry); } void LLVMAddLoopSimplifyCFGPass(LLVMPassManagerRef PM) { diff --git a/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp b/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp index f6525ad7de9b..0b797abefe20 100644 --- a/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp +++ b/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp @@ -68,11 +68,6 @@ static cl::opt<bool> cl::desc("Allow relaxed uniform region checks"), cl::init(true)); -static cl::opt<unsigned> - ReorderNodeSize("structurizecfg-node-reorder-size", - cl::desc("Limit region size for reordering nodes"), - cl::init(100), cl::Hidden); - // Definition of the complex types used in this pass. using BBValuePair = std::pair<BasicBlock *, Value *>; @@ -267,8 +262,6 @@ class StructurizeCFG { void orderNodes(); - void reorderNodes(); - void analyzeLoops(RegionNode *N); Value *buildCondition(BranchInst *Term, unsigned Idx, bool Invert); @@ -427,57 +420,6 @@ void StructurizeCFG::orderNodes() { } } -/// Change the node ordering to decrease the range of live values, especially -/// the values that capture the control flow path for branches. We do this -/// by moving blocks with a single predecessor and successor to appear after -/// predecessor. The motivation is to move some loop exit blocks into a loop. -/// In cases where a loop has a large number of exit blocks, this reduces the -/// amount of values needed across the loop boundary. -void StructurizeCFG::reorderNodes() { - SmallVector<RegionNode *, 8> NewOrder; - DenseMap<BasicBlock *, unsigned> MoveTo; - BitVector Moved(Order.size()); - - // The benefits of reordering nodes occurs for large regions. - if (Order.size() <= ReorderNodeSize) - return; - - // The algorithm works with two passes over Order. The first pass identifies - // the blocks to move and the position to move them to. The second pass - // creates the new order based upon this information. We move blocks with - // a single predecessor and successor. If there are multiple candidates then - // maintain the original order. - BBSet Seen; - for (int I = Order.size() - 1; I >= 0; --I) { - auto *BB = Order[I]->getEntry(); - Seen.insert(BB); - auto *Pred = BB->getSinglePredecessor(); - auto *Succ = BB->getSingleSuccessor(); - // Consider only those basic blocks that have a predecessor in Order and a - // successor that exits the region. The region may contain subregions that - // have been structurized and are not included in Order. - if (Pred && Succ && Seen.count(Pred) && Succ == ParentRegion->getExit() && - !MoveTo.count(Pred)) { - MoveTo[Pred] = I; - Moved.set(I); - } - } - - // If no blocks have been moved then the original order is good. - if (!Moved.count()) - return; - - for (size_t I = 0, E = Order.size(); I < E; ++I) { - auto *BB = Order[I]->getEntry(); - if (MoveTo.count(BB)) - NewOrder.push_back(Order[MoveTo[BB]]); - if (!Moved[I]) - NewOrder.push_back(Order[I]); - } - - Order.assign(NewOrder); -} - /// Determine the end of the loops void StructurizeCFG::analyzeLoops(RegionNode *N) { if (N->isSubRegion()) { @@ -1139,7 +1081,6 @@ bool StructurizeCFG::run(Region *R, DominatorTree *DT) { ParentRegion = R; orderNodes(); - reorderNodes(); collectInfos(); createFlow(); insertConditions(false); diff --git a/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp b/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp index 079b2fc973b9..e3cb5f359e34 100644 --- a/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp +++ b/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp @@ -80,7 +80,7 @@ void llvm::detachDeadBlocks( // contained within it must dominate their uses, that all uses will // eventually be removed (they are themselves dead). if (!I.use_empty()) - I.replaceAllUsesWith(UndefValue::get(I.getType())); + I.replaceAllUsesWith(PoisonValue::get(I.getType())); BB->getInstList().pop_back(); } new UnreachableInst(BB->getContext(), BB); @@ -188,8 +188,10 @@ bool llvm::MergeBlockIntoPredecessor(BasicBlock *BB, DomTreeUpdater *DTU, // Don't break self-loops. if (PredBB == BB) return false; - // Don't break unwinding instructions. - if (PredBB->getTerminator()->isExceptionalTerminator()) + + // Don't break unwinding instructions or terminators with other side-effects. + Instruction *PTI = PredBB->getTerminator(); + if (PTI->isExceptionalTerminator() || PTI->mayHaveSideEffects()) return false; // Can't merge if there are multiple distinct successors. @@ -202,7 +204,7 @@ bool llvm::MergeBlockIntoPredecessor(BasicBlock *BB, DomTreeUpdater *DTU, BasicBlock *NewSucc = nullptr; unsigned FallThruPath; if (PredecessorWithTwoSuccessors) { - if (!(PredBB_BI = dyn_cast<BranchInst>(PredBB->getTerminator()))) + if (!(PredBB_BI = dyn_cast<BranchInst>(PTI))) return false; BranchInst *BB_JmpI = dyn_cast<BranchInst>(BB->getTerminator()); if (!BB_JmpI || !BB_JmpI->isUnconditional()) @@ -256,7 +258,6 @@ bool llvm::MergeBlockIntoPredecessor(BasicBlock *BB, DomTreeUpdater *DTU, Updates.push_back({DominatorTree::Delete, PredBB, BB}); } - Instruction *PTI = PredBB->getTerminator(); Instruction *STI = BB->getTerminator(); Instruction *Start = &*BB->begin(); // If there's nothing to move, mark the starting instruction as the last @@ -1141,7 +1142,7 @@ SplitBlockPredecessorsImpl(BasicBlock *BB, ArrayRef<BasicBlock *> Preds, if (Preds.empty()) { // Insert dummy values as the incoming value. for (BasicBlock::iterator I = BB->begin(); isa<PHINode>(I); ++I) - cast<PHINode>(I)->addIncoming(UndefValue::get(I->getType()), NewBB); + cast<PHINode>(I)->addIncoming(PoisonValue::get(I->getType()), NewBB); } // Update DominatorTree, LoopInfo, and LCCSA analysis information. diff --git a/llvm/lib/Transforms/Utils/BuildLibCalls.cpp b/llvm/lib/Transforms/Utils/BuildLibCalls.cpp index c4a58f36c171..e25ec74a0572 100644 --- a/llvm/lib/Transforms/Utils/BuildLibCalls.cpp +++ b/llvm/lib/Transforms/Utils/BuildLibCalls.cpp @@ -270,9 +270,6 @@ bool llvm::inferNonMandatoryLibFuncAttrs(Function &F, bool Changed = false; - if(!isLibFreeFunction(&F, TheLibFunc) && !isReallocLikeFn(&F, &TLI)) - Changed |= setDoesNotFreeMemory(F); - if (F.getParent() != nullptr && F.getParent()->getRtLibUseGOT()) Changed |= setNonLazyBind(F); @@ -285,14 +282,14 @@ bool llvm::inferNonMandatoryLibFuncAttrs(Function &F, Changed |= setOnlyAccessesArgMemory(F); Changed |= setWillReturn(F); Changed |= setDoesNotCapture(F, 0); - return Changed; + break; case LibFunc_strchr: case LibFunc_strrchr: Changed |= setOnlyAccessesArgMemory(F); Changed |= setOnlyReadsMemory(F); Changed |= setDoesNotThrow(F); Changed |= setWillReturn(F); - return Changed; + break; case LibFunc_strtol: case LibFunc_strtod: case LibFunc_strtof: @@ -304,7 +301,7 @@ bool llvm::inferNonMandatoryLibFuncAttrs(Function &F, Changed |= setWillReturn(F); Changed |= setDoesNotCapture(F, 1); Changed |= setOnlyReadsMemory(F, 0); - return Changed; + break; case LibFunc_strcat: case LibFunc_strncat: Changed |= setOnlyAccessesArgMemory(F); @@ -315,7 +312,7 @@ bool llvm::inferNonMandatoryLibFuncAttrs(Function &F, Changed |= setOnlyReadsMemory(F, 1); Changed |= setDoesNotAlias(F, 0); Changed |= setDoesNotAlias(F, 1); - return Changed; + break; case LibFunc_strcpy: case LibFunc_strncpy: Changed |= setReturnedArg(F, 0); @@ -330,14 +327,14 @@ bool llvm::inferNonMandatoryLibFuncAttrs(Function &F, Changed |= setOnlyReadsMemory(F, 1); Changed |= setDoesNotAlias(F, 0); Changed |= setDoesNotAlias(F, 1); - return Changed; + break; case LibFunc_strxfrm: Changed |= setDoesNotThrow(F); Changed |= setWillReturn(F); Changed |= setDoesNotCapture(F, 0); Changed |= setDoesNotCapture(F, 1); Changed |= setOnlyReadsMemory(F, 1); - return Changed; + break; case LibFunc_strcmp: // 0,1 case LibFunc_strspn: // 0,1 case LibFunc_strncmp: // 0,1 @@ -348,7 +345,7 @@ bool llvm::inferNonMandatoryLibFuncAttrs(Function &F, Changed |= setOnlyReadsMemory(F); Changed |= setDoesNotCapture(F, 0); Changed |= setDoesNotCapture(F, 1); - return Changed; + break; case LibFunc_strcoll: case LibFunc_strcasecmp: // 0,1 case LibFunc_strncasecmp: // @@ -359,7 +356,7 @@ bool llvm::inferNonMandatoryLibFuncAttrs(Function &F, Changed |= setWillReturn(F); Changed |= setDoesNotCapture(F, 0); Changed |= setDoesNotCapture(F, 1); - return Changed; + break; case LibFunc_strstr: case LibFunc_strpbrk: Changed |= setOnlyAccessesArgMemory(F); @@ -367,26 +364,26 @@ bool llvm::inferNonMandatoryLibFuncAttrs(Function &F, Changed |= setDoesNotThrow(F); Changed |= setWillReturn(F); Changed |= setDoesNotCapture(F, 1); - return Changed; + break; case LibFunc_strtok: case LibFunc_strtok_r: Changed |= setDoesNotThrow(F); Changed |= setWillReturn(F); Changed |= setDoesNotCapture(F, 1); Changed |= setOnlyReadsMemory(F, 1); - return Changed; + break; case LibFunc_scanf: Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 0); Changed |= setOnlyReadsMemory(F, 0); - return Changed; + break; case LibFunc_setbuf: case LibFunc_setvbuf: Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 0); - return Changed; + break; case LibFunc_strndup: Changed |= setArgNoUndef(F, 1); LLVM_FALLTHROUGH; @@ -398,7 +395,7 @@ bool llvm::inferNonMandatoryLibFuncAttrs(Function &F, Changed |= setWillReturn(F); Changed |= setDoesNotCapture(F, 0); Changed |= setOnlyReadsMemory(F, 0); - return Changed; + break; case LibFunc_stat: case LibFunc_statvfs: Changed |= setRetAndArgsNoUndef(F); @@ -406,7 +403,7 @@ bool llvm::inferNonMandatoryLibFuncAttrs(Function &F, Changed |= setDoesNotCapture(F, 0); Changed |= setDoesNotCapture(F, 1); Changed |= setOnlyReadsMemory(F, 0); - return Changed; + break; case LibFunc_sscanf: Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); @@ -414,7 +411,7 @@ bool llvm::inferNonMandatoryLibFuncAttrs(Function &F, Changed |= setDoesNotCapture(F, 1); Changed |= setOnlyReadsMemory(F, 0); Changed |= setOnlyReadsMemory(F, 1); - return Changed; + break; case LibFunc_sprintf: Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); @@ -423,7 +420,7 @@ bool llvm::inferNonMandatoryLibFuncAttrs(Function &F, Changed |= setOnlyWritesMemory(F, 0); Changed |= setDoesNotCapture(F, 1); Changed |= setOnlyReadsMemory(F, 1); - return Changed; + break; case LibFunc_snprintf: Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); @@ -432,7 +429,7 @@ bool llvm::inferNonMandatoryLibFuncAttrs(Function &F, Changed |= setOnlyWritesMemory(F, 0); Changed |= setDoesNotCapture(F, 2); Changed |= setOnlyReadsMemory(F, 2); - return Changed; + break; case LibFunc_setitimer: Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); @@ -440,13 +437,13 @@ bool llvm::inferNonMandatoryLibFuncAttrs(Function &F, Changed |= setDoesNotCapture(F, 1); Changed |= setDoesNotCapture(F, 2); Changed |= setOnlyReadsMemory(F, 1); - return Changed; + break; case LibFunc_system: // May throw; "system" is a valid pthread cancellation point. Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotCapture(F, 0); Changed |= setOnlyReadsMemory(F, 0); - return Changed; + break; case LibFunc_aligned_alloc: Changed |= setAlignedAllocParam(F, 0); Changed |= setAllocSize(F, 1, None); @@ -464,7 +461,7 @@ bool llvm::inferNonMandatoryLibFuncAttrs(Function &F, Changed |= setDoesNotThrow(F); Changed |= setRetDoesNotAlias(F); Changed |= setWillReturn(F); - return Changed; + break; case LibFunc_memcmp: Changed |= setOnlyAccessesArgMemory(F); Changed |= setOnlyReadsMemory(F); @@ -472,21 +469,21 @@ bool llvm::inferNonMandatoryLibFuncAttrs(Function &F, Changed |= setWillReturn(F); Changed |= setDoesNotCapture(F, 0); Changed |= setDoesNotCapture(F, 1); - return Changed; + break; case LibFunc_memchr: case LibFunc_memrchr: Changed |= setDoesNotThrow(F); Changed |= setOnlyAccessesArgMemory(F); Changed |= setOnlyReadsMemory(F); Changed |= setWillReturn(F); - return Changed; + break; case LibFunc_modf: case LibFunc_modff: case LibFunc_modfl: Changed |= setDoesNotThrow(F); Changed |= setWillReturn(F); Changed |= setDoesNotCapture(F, 1); - return Changed; + break; case LibFunc_memcpy: Changed |= setDoesNotThrow(F); Changed |= setOnlyAccessesArgMemory(F); @@ -497,7 +494,7 @@ bool llvm::inferNonMandatoryLibFuncAttrs(Function &F, Changed |= setDoesNotAlias(F, 1); Changed |= setDoesNotCapture(F, 1); Changed |= setOnlyReadsMemory(F, 1); - return Changed; + break; case LibFunc_memmove: Changed |= setDoesNotThrow(F); Changed |= setOnlyAccessesArgMemory(F); @@ -506,7 +503,7 @@ bool llvm::inferNonMandatoryLibFuncAttrs(Function &F, Changed |= setOnlyWritesMemory(F, 0); Changed |= setDoesNotCapture(F, 1); Changed |= setOnlyReadsMemory(F, 1); - return Changed; + break; case LibFunc_mempcpy: case LibFunc_memccpy: Changed |= setWillReturn(F); @@ -519,7 +516,7 @@ bool llvm::inferNonMandatoryLibFuncAttrs(Function &F, Changed |= setDoesNotAlias(F, 1); Changed |= setDoesNotCapture(F, 1); Changed |= setOnlyReadsMemory(F, 1); - return Changed; + break; case LibFunc_memalign: Changed |= setAllocFamily(F, "malloc"); Changed |= setAllocKind(F, AllocFnKind::Alloc | AllocFnKind::Aligned | @@ -531,19 +528,19 @@ bool llvm::inferNonMandatoryLibFuncAttrs(Function &F, Changed |= setDoesNotThrow(F); Changed |= setRetDoesNotAlias(F); Changed |= setWillReturn(F); - return Changed; + break; case LibFunc_mkdir: Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 0); Changed |= setOnlyReadsMemory(F, 0); - return Changed; + break; case LibFunc_mktime: Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setWillReturn(F); Changed |= setDoesNotCapture(F, 0); - return Changed; + break; case LibFunc_realloc: case LibFunc_reallocf: case LibFunc_vec_realloc: @@ -559,17 +556,17 @@ bool llvm::inferNonMandatoryLibFuncAttrs(Function &F, Changed |= setWillReturn(F); Changed |= setDoesNotCapture(F, 0); Changed |= setArgNoUndef(F, 1); - return Changed; + break; case LibFunc_read: // May throw; "read" is a valid pthread cancellation point. Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotCapture(F, 1); - return Changed; + break; case LibFunc_rewind: Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 0); - return Changed; + break; case LibFunc_rmdir: case LibFunc_remove: case LibFunc_realpath: @@ -577,7 +574,7 @@ bool llvm::inferNonMandatoryLibFuncAttrs(Function &F, Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 0); Changed |= setOnlyReadsMemory(F, 0); - return Changed; + break; case LibFunc_rename: Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); @@ -585,20 +582,20 @@ bool llvm::inferNonMandatoryLibFuncAttrs(Function &F, Changed |= setDoesNotCapture(F, 1); Changed |= setOnlyReadsMemory(F, 0); Changed |= setOnlyReadsMemory(F, 1); - return Changed; + break; case LibFunc_readlink: Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 0); Changed |= setDoesNotCapture(F, 1); Changed |= setOnlyReadsMemory(F, 0); - return Changed; + break; case LibFunc_write: // May throw; "write" is a valid pthread cancellation point. Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotCapture(F, 1); Changed |= setOnlyReadsMemory(F, 1); - return Changed; + break; case LibFunc_bcopy: Changed |= setDoesNotThrow(F); Changed |= setOnlyAccessesArgMemory(F); @@ -607,7 +604,7 @@ bool llvm::inferNonMandatoryLibFuncAttrs(Function &F, Changed |= setOnlyReadsMemory(F, 0); Changed |= setOnlyWritesMemory(F, 1); Changed |= setDoesNotCapture(F, 1); - return Changed; + break; case LibFunc_bcmp: Changed |= setDoesNotThrow(F); Changed |= setOnlyAccessesArgMemory(F); @@ -615,14 +612,14 @@ bool llvm::inferNonMandatoryLibFuncAttrs(Function &F, Changed |= setWillReturn(F); Changed |= setDoesNotCapture(F, 0); Changed |= setDoesNotCapture(F, 1); - return Changed; + break; case LibFunc_bzero: Changed |= setDoesNotThrow(F); Changed |= setOnlyAccessesArgMemory(F); Changed |= setWillReturn(F); Changed |= setDoesNotCapture(F, 0); Changed |= setOnlyWritesMemory(F, 0); - return Changed; + break; case LibFunc_calloc: case LibFunc_vec_calloc: Changed |= setAllocFamily(F, TheLibFunc == LibFunc_vec_calloc ? "vec_malloc" @@ -634,21 +631,21 @@ bool llvm::inferNonMandatoryLibFuncAttrs(Function &F, Changed |= setDoesNotThrow(F); Changed |= setRetDoesNotAlias(F); Changed |= setWillReturn(F); - return Changed; + break; case LibFunc_chmod: case LibFunc_chown: Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 0); Changed |= setOnlyReadsMemory(F, 0); - return Changed; + break; case LibFunc_ctermid: case LibFunc_clearerr: case LibFunc_closedir: Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 0); - return Changed; + break; case LibFunc_atoi: case LibFunc_atol: case LibFunc_atof: @@ -657,13 +654,13 @@ bool llvm::inferNonMandatoryLibFuncAttrs(Function &F, Changed |= setOnlyReadsMemory(F); Changed |= setWillReturn(F); Changed |= setDoesNotCapture(F, 0); - return Changed; + break; case LibFunc_access: Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 0); Changed |= setOnlyReadsMemory(F, 0); - return Changed; + break; case LibFunc_fopen: Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); @@ -672,19 +669,19 @@ bool llvm::inferNonMandatoryLibFuncAttrs(Function &F, Changed |= setDoesNotCapture(F, 1); Changed |= setOnlyReadsMemory(F, 0); Changed |= setOnlyReadsMemory(F, 1); - return Changed; + break; case LibFunc_fdopen: Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setRetDoesNotAlias(F); Changed |= setDoesNotCapture(F, 1); Changed |= setOnlyReadsMemory(F, 1); - return Changed; + break; case LibFunc_feof: Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 0); - return Changed; + break; case LibFunc_free: case LibFunc_vec_free: Changed |= setAllocFamily(F, TheLibFunc == LibFunc_vec_free ? "vec_malloc" @@ -696,7 +693,7 @@ bool llvm::inferNonMandatoryLibFuncAttrs(Function &F, Changed |= setDoesNotThrow(F); Changed |= setWillReturn(F); Changed |= setDoesNotCapture(F, 0); - return Changed; + break; case LibFunc_fseek: case LibFunc_ftell: case LibFunc_fgetc: @@ -713,45 +710,45 @@ bool llvm::inferNonMandatoryLibFuncAttrs(Function &F, Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 0); - return Changed; + break; case LibFunc_ferror: Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 0); Changed |= setOnlyReadsMemory(F); - return Changed; + break; case LibFunc_fputc: case LibFunc_fputc_unlocked: case LibFunc_fstat: Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 1); - return Changed; + break; case LibFunc_frexp: case LibFunc_frexpf: case LibFunc_frexpl: Changed |= setDoesNotThrow(F); Changed |= setWillReturn(F); Changed |= setDoesNotCapture(F, 1); - return Changed; + break; case LibFunc_fstatvfs: Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 1); - return Changed; + break; case LibFunc_fgets: case LibFunc_fgets_unlocked: Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 2); - return Changed; + break; case LibFunc_fread: case LibFunc_fread_unlocked: Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 0); Changed |= setDoesNotCapture(F, 3); - return Changed; + break; case LibFunc_fwrite: case LibFunc_fwrite_unlocked: Changed |= setRetAndArgsNoUndef(F); @@ -759,7 +756,7 @@ bool llvm::inferNonMandatoryLibFuncAttrs(Function &F, Changed |= setDoesNotCapture(F, 0); Changed |= setDoesNotCapture(F, 3); // FIXME: readonly #1? - return Changed; + break; case LibFunc_fputs: case LibFunc_fputs_unlocked: Changed |= setRetAndArgsNoUndef(F); @@ -767,7 +764,7 @@ bool llvm::inferNonMandatoryLibFuncAttrs(Function &F, Changed |= setDoesNotCapture(F, 0); Changed |= setDoesNotCapture(F, 1); Changed |= setOnlyReadsMemory(F, 0); - return Changed; + break; case LibFunc_fscanf: case LibFunc_fprintf: Changed |= setRetAndArgsNoUndef(F); @@ -775,73 +772,73 @@ bool llvm::inferNonMandatoryLibFuncAttrs(Function &F, Changed |= setDoesNotCapture(F, 0); Changed |= setDoesNotCapture(F, 1); Changed |= setOnlyReadsMemory(F, 1); - return Changed; + break; case LibFunc_fgetpos: Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 0); Changed |= setDoesNotCapture(F, 1); - return Changed; + break; case LibFunc_getc: Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 0); - return Changed; + break; case LibFunc_getlogin_r: Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 0); - return Changed; + break; case LibFunc_getc_unlocked: Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 0); - return Changed; + break; case LibFunc_getenv: Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setOnlyReadsMemory(F); Changed |= setDoesNotCapture(F, 0); - return Changed; + break; case LibFunc_gets: case LibFunc_getchar: case LibFunc_getchar_unlocked: Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); - return Changed; + break; case LibFunc_getitimer: Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 1); - return Changed; + break; case LibFunc_getpwnam: Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 0); Changed |= setOnlyReadsMemory(F, 0); - return Changed; + break; case LibFunc_ungetc: Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 1); - return Changed; + break; case LibFunc_uname: Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 0); - return Changed; + break; case LibFunc_unlink: Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 0); Changed |= setOnlyReadsMemory(F, 0); - return Changed; + break; case LibFunc_unsetenv: Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 0); Changed |= setOnlyReadsMemory(F, 0); - return Changed; + break; case LibFunc_utime: case LibFunc_utimes: Changed |= setRetAndArgsNoUndef(F); @@ -850,13 +847,13 @@ bool llvm::inferNonMandatoryLibFuncAttrs(Function &F, Changed |= setDoesNotCapture(F, 1); Changed |= setOnlyReadsMemory(F, 0); Changed |= setOnlyReadsMemory(F, 1); - return Changed; + break; case LibFunc_putc: case LibFunc_putc_unlocked: Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 1); - return Changed; + break; case LibFunc_puts: case LibFunc_printf: case LibFunc_perror: @@ -864,23 +861,23 @@ bool llvm::inferNonMandatoryLibFuncAttrs(Function &F, Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 0); Changed |= setOnlyReadsMemory(F, 0); - return Changed; + break; case LibFunc_pread: // May throw; "pread" is a valid pthread cancellation point. Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotCapture(F, 1); - return Changed; + break; case LibFunc_pwrite: // May throw; "pwrite" is a valid pthread cancellation point. Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotCapture(F, 1); Changed |= setOnlyReadsMemory(F, 1); - return Changed; + break; case LibFunc_putchar: case LibFunc_putchar_unlocked: Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); - return Changed; + break; case LibFunc_popen: Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); @@ -889,18 +886,18 @@ bool llvm::inferNonMandatoryLibFuncAttrs(Function &F, Changed |= setDoesNotCapture(F, 1); Changed |= setOnlyReadsMemory(F, 0); Changed |= setOnlyReadsMemory(F, 1); - return Changed; + break; case LibFunc_pclose: Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 0); - return Changed; + break; case LibFunc_vscanf: Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 0); Changed |= setOnlyReadsMemory(F, 0); - return Changed; + break; case LibFunc_vsscanf: Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); @@ -908,20 +905,20 @@ bool llvm::inferNonMandatoryLibFuncAttrs(Function &F, Changed |= setDoesNotCapture(F, 1); Changed |= setOnlyReadsMemory(F, 0); Changed |= setOnlyReadsMemory(F, 1); - return Changed; + break; case LibFunc_vfscanf: Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 0); Changed |= setDoesNotCapture(F, 1); Changed |= setOnlyReadsMemory(F, 1); - return Changed; + break; case LibFunc_vprintf: Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 0); Changed |= setOnlyReadsMemory(F, 0); - return Changed; + break; case LibFunc_vfprintf: case LibFunc_vsprintf: Changed |= setRetAndArgsNoUndef(F); @@ -929,63 +926,63 @@ bool llvm::inferNonMandatoryLibFuncAttrs(Function &F, Changed |= setDoesNotCapture(F, 0); Changed |= setDoesNotCapture(F, 1); Changed |= setOnlyReadsMemory(F, 1); - return Changed; + break; case LibFunc_vsnprintf: Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 0); Changed |= setDoesNotCapture(F, 2); Changed |= setOnlyReadsMemory(F, 2); - return Changed; + break; case LibFunc_open: // May throw; "open" is a valid pthread cancellation point. Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotCapture(F, 0); Changed |= setOnlyReadsMemory(F, 0); - return Changed; + break; case LibFunc_opendir: Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setRetDoesNotAlias(F); Changed |= setDoesNotCapture(F, 0); Changed |= setOnlyReadsMemory(F, 0); - return Changed; + break; case LibFunc_tmpfile: Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setRetDoesNotAlias(F); - return Changed; + break; case LibFunc_times: Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 0); - return Changed; + break; case LibFunc_htonl: case LibFunc_htons: case LibFunc_ntohl: case LibFunc_ntohs: Changed |= setDoesNotThrow(F); Changed |= setDoesNotAccessMemory(F); - return Changed; + break; case LibFunc_lstat: Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 0); Changed |= setDoesNotCapture(F, 1); Changed |= setOnlyReadsMemory(F, 0); - return Changed; + break; case LibFunc_lchown: Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 0); Changed |= setOnlyReadsMemory(F, 0); - return Changed; + break; case LibFunc_qsort: // May throw; places call through function pointer. // Cannot give undef pointer/size Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotCapture(F, 3); - return Changed; + break; case LibFunc_dunder_strndup: Changed |= setArgNoUndef(F, 1); LLVM_FALLTHROUGH; @@ -995,28 +992,28 @@ bool llvm::inferNonMandatoryLibFuncAttrs(Function &F, Changed |= setWillReturn(F); Changed |= setDoesNotCapture(F, 0); Changed |= setOnlyReadsMemory(F, 0); - return Changed; + break; case LibFunc_dunder_strtok_r: Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 1); Changed |= setOnlyReadsMemory(F, 1); - return Changed; + break; case LibFunc_under_IO_getc: Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 0); - return Changed; + break; case LibFunc_under_IO_putc: Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 1); - return Changed; + break; case LibFunc_dunder_isoc99_scanf: Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 0); Changed |= setOnlyReadsMemory(F, 0); - return Changed; + break; case LibFunc_stat64: case LibFunc_lstat64: case LibFunc_statvfs64: @@ -1025,7 +1022,7 @@ bool llvm::inferNonMandatoryLibFuncAttrs(Function &F, Changed |= setDoesNotCapture(F, 0); Changed |= setDoesNotCapture(F, 1); Changed |= setOnlyReadsMemory(F, 0); - return Changed; + break; case LibFunc_dunder_isoc99_sscanf: Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); @@ -1033,7 +1030,7 @@ bool llvm::inferNonMandatoryLibFuncAttrs(Function &F, Changed |= setDoesNotCapture(F, 1); Changed |= setOnlyReadsMemory(F, 0); Changed |= setOnlyReadsMemory(F, 1); - return Changed; + break; case LibFunc_fopen64: Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); @@ -1042,30 +1039,30 @@ bool llvm::inferNonMandatoryLibFuncAttrs(Function &F, Changed |= setDoesNotCapture(F, 1); Changed |= setOnlyReadsMemory(F, 0); Changed |= setOnlyReadsMemory(F, 1); - return Changed; + break; case LibFunc_fseeko64: case LibFunc_ftello64: Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 0); - return Changed; + break; case LibFunc_tmpfile64: Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setRetDoesNotAlias(F); - return Changed; + break; case LibFunc_fstat64: case LibFunc_fstatvfs64: Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 1); - return Changed; + break; case LibFunc_open64: // May throw; "open" is a valid pthread cancellation point. Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotCapture(F, 0); Changed |= setOnlyReadsMemory(F, 0); - return Changed; + break; case LibFunc_gettimeofday: // Currently some platforms have the restrict keyword on the arguments to // gettimeofday. To be conservative, do not add noalias to gettimeofday's @@ -1074,7 +1071,7 @@ bool llvm::inferNonMandatoryLibFuncAttrs(Function &F, Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 0); Changed |= setDoesNotCapture(F, 1); - return Changed; + break; case LibFunc_memset_pattern4: case LibFunc_memset_pattern8: case LibFunc_memset_pattern16: @@ -1089,18 +1086,18 @@ bool llvm::inferNonMandatoryLibFuncAttrs(Function &F, Changed |= setOnlyAccessesArgMemory(F); Changed |= setOnlyWritesMemory(F, 0); Changed |= setDoesNotThrow(F); - return Changed; + break; // int __nvvm_reflect(const char *) case LibFunc_nvvm_reflect: Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotAccessMemory(F); Changed |= setDoesNotThrow(F); - return Changed; + break; case LibFunc_ldexp: case LibFunc_ldexpf: case LibFunc_ldexpl: Changed |= setWillReturn(F); - return Changed; + break; case LibFunc_abs: case LibFunc_acos: case LibFunc_acosf: @@ -1227,12 +1224,17 @@ bool llvm::inferNonMandatoryLibFuncAttrs(Function &F, Changed |= setDoesNotFreeMemory(F); Changed |= setOnlyWritesMemory(F); Changed |= setWillReturn(F); - return Changed; + break; default: // FIXME: It'd be really nice to cover all the library functions we're // aware of here. - return false; + break; } + // We have to do this step after AllocKind has been inferred on functions so + // we can reliably identify free-like and realloc-like functions. + if (!isLibFreeFunction(&F, TheLibFunc) && !isReallocLikeFn(&F, &TLI)) + Changed |= setDoesNotFreeMemory(F); + return Changed; } static void setArgExtAttr(Function &F, unsigned ArgNo, diff --git a/llvm/lib/Transforms/Utils/CanonicalizeAliases.cpp b/llvm/lib/Transforms/Utils/CanonicalizeAliases.cpp index f229d4bf14e9..9101a1e41f7b 100644 --- a/llvm/lib/Transforms/Utils/CanonicalizeAliases.cpp +++ b/llvm/lib/Transforms/Utils/CanonicalizeAliases.cpp @@ -65,23 +65,6 @@ static bool canonicalizeAliases(Module &M) { canonicalizeAlias(&GA, Changed); return Changed; } - -// Legacy pass that canonicalizes aliases. -class CanonicalizeAliasesLegacyPass : public ModulePass { - -public: - /// Pass identification, replacement for typeid - static char ID; - - /// Specify pass name for debug output - StringRef getPassName() const override { return "Canonicalize Aliases"; } - - explicit CanonicalizeAliasesLegacyPass() : ModulePass(ID) {} - - bool runOnModule(Module &M) override { return canonicalizeAliases(M); } -}; -char CanonicalizeAliasesLegacyPass::ID = 0; - } // anonymous namespace PreservedAnalyses CanonicalizeAliasesPass::run(Module &M, @@ -91,14 +74,3 @@ PreservedAnalyses CanonicalizeAliasesPass::run(Module &M, return PreservedAnalyses::none(); } - -INITIALIZE_PASS_BEGIN(CanonicalizeAliasesLegacyPass, "canonicalize-aliases", - "Canonicalize aliases", false, false) -INITIALIZE_PASS_END(CanonicalizeAliasesLegacyPass, "canonicalize-aliases", - "Canonicalize aliases", false, false) - -namespace llvm { -ModulePass *createCanonicalizeAliasesPass() { - return new CanonicalizeAliasesLegacyPass(); -} -} // namespace llvm diff --git a/llvm/lib/Transforms/Utils/CloneFunction.cpp b/llvm/lib/Transforms/Utils/CloneFunction.cpp index 8f053cd56e0e..1d348213bfdb 100644 --- a/llvm/lib/Transforms/Utils/CloneFunction.cpp +++ b/llvm/lib/Transforms/Utils/CloneFunction.cpp @@ -206,9 +206,20 @@ void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc, }; // Avoid cloning types, compile units, and (other) subprograms. - for (DISubprogram *ISP : DIFinder->subprograms()) - if (ISP != SPClonedWithinModule) + SmallPtrSet<const DISubprogram *, 16> MappedToSelfSPs; + for (DISubprogram *ISP : DIFinder->subprograms()) { + if (ISP != SPClonedWithinModule) { mapToSelfIfNew(ISP); + MappedToSelfSPs.insert(ISP); + } + } + + // If a subprogram isn't going to be cloned skip its lexical blocks as well. + for (DIScope *S : DIFinder->scopes()) { + auto *LScope = dyn_cast<DILocalScope>(S); + if (LScope && MappedToSelfSPs.count(LScope->getSubprogram())) + mapToSelfIfNew(S); + } for (DICompileUnit *CU : DIFinder->compile_units()) mapToSelfIfNew(CU); @@ -723,14 +734,14 @@ void llvm::CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc, } // If the loops above have made these phi nodes have 0 or 1 operand, - // replace them with undef or the input value. We must do this for + // replace them with poison or the input value. We must do this for // correctness, because 0-operand phis are not valid. PN = cast<PHINode>(NewBB->begin()); if (PN->getNumIncomingValues() == 0) { BasicBlock::iterator I = NewBB->begin(); BasicBlock::const_iterator OldI = OldBB->begin(); while ((PN = dyn_cast<PHINode>(I++))) { - Value *NV = UndefValue::get(PN->getType()); + Value *NV = PoisonValue::get(PN->getType()); PN->replaceAllUsesWith(NV); assert(VMap[&*OldI] == PN && "VMap mismatch"); VMap[&*OldI] = NV; diff --git a/llvm/lib/Transforms/Utils/EntryExitInstrumenter.cpp b/llvm/lib/Transforms/Utils/EntryExitInstrumenter.cpp index e3e8f63383df..60f910bceab8 100644 --- a/llvm/lib/Transforms/Utils/EntryExitInstrumenter.cpp +++ b/llvm/lib/Transforms/Utils/EntryExitInstrumenter.cpp @@ -117,65 +117,6 @@ static bool runOnFunction(Function &F, bool PostInlining) { return Changed; } -namespace { -struct EntryExitInstrumenter : public FunctionPass { - static char ID; - EntryExitInstrumenter() : FunctionPass(ID) { - initializeEntryExitInstrumenterPass(*PassRegistry::getPassRegistry()); - } - void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.addPreserved<GlobalsAAWrapperPass>(); - AU.addPreserved<DominatorTreeWrapperPass>(); - } - bool runOnFunction(Function &F) override { return ::runOnFunction(F, false); } -}; -char EntryExitInstrumenter::ID = 0; - -struct PostInlineEntryExitInstrumenter : public FunctionPass { - static char ID; - PostInlineEntryExitInstrumenter() : FunctionPass(ID) { - initializePostInlineEntryExitInstrumenterPass( - *PassRegistry::getPassRegistry()); - } - void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.addPreserved<GlobalsAAWrapperPass>(); - AU.addPreserved<DominatorTreeWrapperPass>(); - } - bool runOnFunction(Function &F) override { return ::runOnFunction(F, true); } -}; -char PostInlineEntryExitInstrumenter::ID = 0; -} - -INITIALIZE_PASS_BEGIN( - EntryExitInstrumenter, "ee-instrument", - "Instrument function entry/exit with calls to e.g. mcount() (pre inlining)", - false, false) -INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) -INITIALIZE_PASS_END( - EntryExitInstrumenter, "ee-instrument", - "Instrument function entry/exit with calls to e.g. mcount() (pre inlining)", - false, false) - -INITIALIZE_PASS_BEGIN( - PostInlineEntryExitInstrumenter, "post-inline-ee-instrument", - "Instrument function entry/exit with calls to e.g. mcount() " - "(post inlining)", - false, false) -INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) -INITIALIZE_PASS_END( - PostInlineEntryExitInstrumenter, "post-inline-ee-instrument", - "Instrument function entry/exit with calls to e.g. mcount() " - "(post inlining)", - false, false) - -FunctionPass *llvm::createEntryExitInstrumenterPass() { - return new EntryExitInstrumenter(); -} - -FunctionPass *llvm::createPostInlineEntryExitInstrumenterPass() { - return new PostInlineEntryExitInstrumenter(); -} - PreservedAnalyses llvm::EntryExitInstrumenterPass::run(Function &F, FunctionAnalysisManager &AM) { runOnFunction(F, PostInlining); diff --git a/llvm/lib/Transforms/Utils/Evaluator.cpp b/llvm/lib/Transforms/Utils/Evaluator.cpp index 7b8d8553bac2..7509fde6df9d 100644 --- a/llvm/lib/Transforms/Utils/Evaluator.cpp +++ b/llvm/lib/Transforms/Utils/Evaluator.cpp @@ -301,9 +301,9 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst, BasicBlock *&NextBB, LLVM_DEBUG(dbgs() << "Evaluating Instruction: " << *CurInst << "\n"); if (StoreInst *SI = dyn_cast<StoreInst>(CurInst)) { - if (!SI->isSimple()) { - LLVM_DEBUG(dbgs() << "Store is not simple! Can not evaluate.\n"); - return false; // no volatile/atomic accesses. + if (SI->isVolatile()) { + LLVM_DEBUG(dbgs() << "Store is volatile! Can not evaluate.\n"); + return false; // no volatile accesses. } Constant *Ptr = getVal(SI->getOperand(1)); Constant *FoldedPtr = ConstantFoldConstant(Ptr, DL, TLI); @@ -337,10 +337,10 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst, BasicBlock *&NextBB, if (!Res.first->second.write(Val, Offset, DL)) return false; } else if (LoadInst *LI = dyn_cast<LoadInst>(CurInst)) { - if (!LI->isSimple()) { + if (LI->isVolatile()) { LLVM_DEBUG( - dbgs() << "Found a Load! Not a simple load, can not evaluate.\n"); - return false; // no volatile/atomic accesses. + dbgs() << "Found a Load! Volatile load, can not evaluate.\n"); + return false; // no volatile accesses. } Constant *Ptr = getVal(LI->getOperand(0)); diff --git a/llvm/lib/Transforms/Utils/FunctionImportUtils.cpp b/llvm/lib/Transforms/Utils/FunctionImportUtils.cpp index 193806d9cc87..8e6d4626c9fd 100644 --- a/llvm/lib/Transforms/Utils/FunctionImportUtils.cpp +++ b/llvm/lib/Transforms/Utils/FunctionImportUtils.cpp @@ -35,6 +35,13 @@ bool FunctionImportGlobalProcessing::doImportAsDefinition( bool FunctionImportGlobalProcessing::shouldPromoteLocalToGlobal( const GlobalValue *SGV, ValueInfo VI) { assert(SGV->hasLocalLinkage()); + + // Ifuncs and ifunc alias does not have summary. + if (isa<GlobalIFunc>(SGV) || + (isa<GlobalAlias>(SGV) && + isa<GlobalIFunc>(cast<GlobalAlias>(SGV)->getAliaseeObject()))) + return false; + // Both the imported references and the original local variable must // be promoted. if (!isPerformingImport() && !isModuleExporting()) diff --git a/llvm/lib/Transforms/Utils/InlineFunction.cpp b/llvm/lib/Transforms/Utils/InlineFunction.cpp index 2fb00f95b749..00387ec426bf 100644 --- a/llvm/lib/Transforms/Utils/InlineFunction.cpp +++ b/llvm/lib/Transforms/Utils/InlineFunction.cpp @@ -2194,9 +2194,11 @@ llvm::InlineResult llvm::InlineFunction(CallBase &CB, InlineFunctionInfo &IFI, CI->setTailCallKind(ChildTCK); InlinedMustTailCalls |= CI->isMustTailCall(); - // Calls inlined through a 'nounwind' call site should be marked - // 'nounwind'. - if (MarkNoUnwind) + // Call sites inlined through a 'nounwind' call site should be + // 'nounwind' as well. However, avoid marking call sites explicitly + // where possible. This helps expose more opportunities for CSE after + // inlining, commonly when the callee is an intrinsic. + if (MarkNoUnwind && !CI->doesNotThrow()) CI->setDoesNotThrow(); } } @@ -2625,7 +2627,7 @@ llvm::InlineResult llvm::InlineFunction(CallBase &CB, InlineFunctionInfo &IFI, } else if (!CB.use_empty()) { // No returns, but something is using the return value of the call. Just // nuke the result. - CB.replaceAllUsesWith(UndefValue::get(CB.getType())); + CB.replaceAllUsesWith(PoisonValue::get(CB.getType())); } // Since we are now done with the Call/Invoke, we can delete it. diff --git a/llvm/lib/Transforms/Utils/Local.cpp b/llvm/lib/Transforms/Utils/Local.cpp index b203259db1c6..2f1d0c2f9012 100644 --- a/llvm/lib/Transforms/Utils/Local.cpp +++ b/llvm/lib/Transforms/Utils/Local.cpp @@ -439,6 +439,10 @@ bool llvm::wouldInstructionBeTriviallyDead(Instruction *I, return true; } + if (auto *CB = dyn_cast<CallBase>(I)) + if (isRemovableAlloc(CB, TLI)) + return true; + if (!I->willReturn()) return false; @@ -489,16 +493,13 @@ bool llvm::wouldInstructionBeTriviallyDead(Instruction *I, } } - if (isAllocationFn(I, TLI) && isAllocRemovable(cast<CallBase>(I), TLI)) - return true; - - if (CallInst *CI = isFreeCall(I, TLI)) - if (Constant *C = dyn_cast<Constant>(CI->getArgOperand(0))) - return C->isNullValue() || isa<UndefValue>(C); - - if (auto *Call = dyn_cast<CallBase>(I)) + if (auto *Call = dyn_cast<CallBase>(I)) { + if (Value *FreedOp = getFreedOperand(Call, TLI)) + if (Constant *C = dyn_cast<Constant>(FreedOp)) + return C->isNullValue() || isa<UndefValue>(C); if (isMathLibCallNoop(Call, TLI)) return true; + } // Non-volatile atomic loads from constants can be removed. if (auto *LI = dyn_cast<LoadInst>(I)) @@ -637,7 +638,7 @@ bool llvm::RecursivelyDeleteDeadPHINode(PHINode *PN, // won't prove fruitful. if (!Visited.insert(I).second) { // Break the cycle and delete the instruction and its operands. - I->replaceAllUsesWith(UndefValue::get(I->getType())); + I->replaceAllUsesWith(PoisonValue::get(I->getType())); (void)RecursivelyDeleteTriviallyDeadInstructions(I, TLI, MSSAU); return true; } @@ -750,8 +751,8 @@ void llvm::MergeBasicBlockIntoOnlyPred(BasicBlock *DestBB, // If BB has single-entry PHI nodes, fold them. while (PHINode *PN = dyn_cast<PHINode>(DestBB->begin())) { Value *NewVal = PN->getIncomingValue(0); - // Replace self referencing PHI with undef, it must be dead. - if (NewVal == PN) NewVal = UndefValue::get(PN->getType()); + // Replace self referencing PHI with poison, it must be dead. + if (NewVal == PN) NewVal = PoisonValue::get(PN->getType()); PN->replaceAllUsesWith(NewVal); PN->eraseFromParent(); } @@ -2105,7 +2106,7 @@ llvm::removeAllNonTerminatorAndEHPadInstructions(BasicBlock *BB) { // Delete the next to last instruction. Instruction *Inst = &*--EndInst->getIterator(); if (!Inst->use_empty() && !Inst->getType()->isTokenTy()) - Inst->replaceAllUsesWith(UndefValue::get(Inst->getType())); + Inst->replaceAllUsesWith(PoisonValue::get(Inst->getType())); if (Inst->isEHPad() || Inst->getType()->isTokenTy()) { EndInst = Inst; continue; @@ -2144,7 +2145,7 @@ unsigned llvm::changeToUnreachable(Instruction *I, bool PreserveLCSSA, BasicBlock::iterator BBI = I->getIterator(), BBE = BB->end(); while (BBI != BBE) { if (!BBI->use_empty()) - BBI->replaceAllUsesWith(UndefValue::get(BBI->getType())); + BBI->replaceAllUsesWith(PoisonValue::get(BBI->getType())); BB->getInstList().erase(BBI++); ++NumInstrsRemoved; } diff --git a/llvm/lib/Transforms/Utils/LoopRotationUtils.cpp b/llvm/lib/Transforms/Utils/LoopRotationUtils.cpp index 0f33559c7e70..597c88ad13df 100644 --- a/llvm/lib/Transforms/Utils/LoopRotationUtils.cpp +++ b/llvm/lib/Transforms/Utils/LoopRotationUtils.cpp @@ -622,7 +622,7 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) { // We only need to split loop exit edges. Loop *PredLoop = LI->getLoopFor(ExitPred); if (!PredLoop || PredLoop->contains(Exit) || - ExitPred->getTerminator()->isIndirectTerminator()) + isa<IndirectBrInst>(ExitPred->getTerminator())) continue; SplitLatchEdge |= L->getLoopLatch() == ExitPred; BasicBlock *ExitSplit = SplitCriticalEdge( diff --git a/llvm/lib/Transforms/Utils/LoopSimplify.cpp b/llvm/lib/Transforms/Utils/LoopSimplify.cpp index 55d5c733733b..2ff8a3f7b228 100644 --- a/llvm/lib/Transforms/Utils/LoopSimplify.cpp +++ b/llvm/lib/Transforms/Utils/LoopSimplify.cpp @@ -127,7 +127,7 @@ BasicBlock *llvm::InsertPreheaderForLoop(Loop *L, DominatorTree *DT, // If the loop is branched to from an indirect terminator, we won't // be able to fully transform the loop, because it prohibits // edge splitting. - if (P->getTerminator()->isIndirectTerminator()) + if (isa<IndirectBrInst>(P->getTerminator())) return nullptr; // Keep track of it. @@ -256,7 +256,7 @@ static Loop *separateNestedLoop(Loop *L, BasicBlock *Preheader, if (PN->getIncomingValue(i) != PN || !L->contains(PN->getIncomingBlock(i))) { // We can't split indirect control flow edges. - if (PN->getIncomingBlock(i)->getTerminator()->isIndirectTerminator()) + if (isa<IndirectBrInst>(PN->getIncomingBlock(i)->getTerminator())) return nullptr; OuterLoopPreds.push_back(PN->getIncomingBlock(i)); } @@ -375,7 +375,7 @@ static BasicBlock *insertUniqueBackedgeBlock(Loop *L, BasicBlock *Preheader, std::vector<BasicBlock*> BackedgeBlocks; for (BasicBlock *P : predecessors(Header)) { // Indirect edges cannot be split, so we must fail if we find one. - if (P->getTerminator()->isIndirectTerminator()) + if (isa<IndirectBrInst>(P->getTerminator())) return nullptr; if (P != Preheader) BackedgeBlocks.push_back(P); diff --git a/llvm/lib/Transforms/Utils/LoopUtils.cpp b/llvm/lib/Transforms/Utils/LoopUtils.cpp index 82f993b4ceab..349063dd5e89 100644 --- a/llvm/lib/Transforms/Utils/LoopUtils.cpp +++ b/llvm/lib/Transforms/Utils/LoopUtils.cpp @@ -602,10 +602,10 @@ void llvm::deleteDeadLoop(Loop *L, DominatorTree *DT, ScalarEvolution *SE, // loop will be already eliminated and we have less work to do but according // to API doc of User::dropAllReferences only valid operation after dropping // references, is deletion. So let's substitute all usages of - // instruction from the loop with undef value of corresponding type first. + // instruction from the loop with poison value of corresponding type first. for (auto *Block : L->blocks()) for (Instruction &I : *Block) { - auto *Undef = UndefValue::get(I.getType()); + auto *Poison = PoisonValue::get(I.getType()); for (Use &U : llvm::make_early_inc_range(I.uses())) { if (auto *Usr = dyn_cast<Instruction>(U.getUser())) if (L->contains(Usr->getParent())) @@ -615,7 +615,7 @@ void llvm::deleteDeadLoop(Loop *L, DominatorTree *DT, ScalarEvolution *SE, if (DT) assert(!DT->isReachableFromEntry(U) && "Unexpected user in reachable block"); - U.set(Undef); + U.set(Poison); } auto *DVI = dyn_cast<DbgVariableIntrinsic>(&I); if (!DVI) @@ -1357,7 +1357,7 @@ int llvm::rewriteLoopExitValues(Loop *L, LoopInfo *LI, TargetLibraryInfo *TLI, const SCEV *ExitValue = SE->getSCEVAtScope(Inst, L->getParentLoop()); if (isa<SCEVCouldNotCompute>(ExitValue) || !SE->isLoopInvariant(ExitValue, L) || - !isSafeToExpand(ExitValue, *SE)) { + !Rewriter.isSafeToExpand(ExitValue)) { // TODO: This should probably be sunk into SCEV in some way; maybe a // getSCEVForExit(SCEV*, L, ExitingBB)? It can be generalized for // most SCEV expressions and other recurrence types (e.g. shift @@ -1370,7 +1370,7 @@ int llvm::rewriteLoopExitValues(Loop *L, LoopInfo *LI, TargetLibraryInfo *TLI, ExitValue = AddRec->evaluateAtIteration(ExitCount, *SE); if (isa<SCEVCouldNotCompute>(ExitValue) || !SE->isLoopInvariant(ExitValue, L) || - !isSafeToExpand(ExitValue, *SE)) + !Rewriter.isSafeToExpand(ExitValue)) continue; } diff --git a/llvm/lib/Transforms/Utils/LowerAtomic.cpp b/llvm/lib/Transforms/Utils/LowerAtomic.cpp index 9914a5ca6c5e..2247b8107739 100644 --- a/llvm/lib/Transforms/Utils/LowerAtomic.cpp +++ b/llvm/lib/Transforms/Utils/LowerAtomic.cpp @@ -31,7 +31,7 @@ bool llvm::lowerAtomicCmpXchgInst(AtomicCmpXchgInst *CXI) { Value *Res = Builder.CreateSelect(Equal, Val, Orig); Builder.CreateStore(Res, Ptr); - Res = Builder.CreateInsertValue(UndefValue::get(CXI->getType()), Orig, 0); + Res = Builder.CreateInsertValue(PoisonValue::get(CXI->getType()), Orig, 0); Res = Builder.CreateInsertValue(Res, Equal, 1); CXI->replaceAllUsesWith(Res); diff --git a/llvm/lib/Transforms/Utils/NameAnonGlobals.cpp b/llvm/lib/Transforms/Utils/NameAnonGlobals.cpp index deaee467531d..d4ab4504064f 100644 --- a/llvm/lib/Transforms/Utils/NameAnonGlobals.cpp +++ b/llvm/lib/Transforms/Utils/NameAnonGlobals.cpp @@ -81,26 +81,6 @@ bool llvm::nameUnamedGlobals(Module &M) { return Changed; } -namespace { - -// Legacy pass that provides a name to every anon globals. -class NameAnonGlobalLegacyPass : public ModulePass { - -public: - /// Pass identification, replacement for typeid - static char ID; - - /// Specify pass name for debug output - StringRef getPassName() const override { return "Name Anon Globals"; } - - explicit NameAnonGlobalLegacyPass() : ModulePass(ID) {} - - bool runOnModule(Module &M) override { return nameUnamedGlobals(M); } -}; -char NameAnonGlobalLegacyPass::ID = 0; - -} // anonymous namespace - PreservedAnalyses NameAnonGlobalPass::run(Module &M, ModuleAnalysisManager &AM) { if (!nameUnamedGlobals(M)) @@ -108,14 +88,3 @@ PreservedAnalyses NameAnonGlobalPass::run(Module &M, return PreservedAnalyses::none(); } - -INITIALIZE_PASS_BEGIN(NameAnonGlobalLegacyPass, "name-anon-globals", - "Provide a name to nameless globals", false, false) -INITIALIZE_PASS_END(NameAnonGlobalLegacyPass, "name-anon-globals", - "Provide a name to nameless globals", false, false) - -namespace llvm { -ModulePass *createNameAnonGlobalPass() { - return new NameAnonGlobalLegacyPass(); -} -} diff --git a/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp b/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp index 0c8bf3827256..372cd74ea01d 100644 --- a/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp +++ b/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp @@ -2568,9 +2568,7 @@ namespace { // only needed when the expression includes some subexpression that is not IV // derived. // -// Currently, we only allow division by a nonzero constant here. If this is -// inadequate, we could easily allow division by SCEVUnknown by using -// ValueTracking to check isKnownNonZero(). +// Currently, we only allow division by a value provably non-zero here. // // We cannot generally expand recurrences unless the step dominates the loop // header. The expander handles the special case of affine recurrences by @@ -2588,8 +2586,7 @@ struct SCEVFindUnsafe { bool follow(const SCEV *S) { if (const SCEVUDivExpr *D = dyn_cast<SCEVUDivExpr>(S)) { - const SCEVConstant *SC = dyn_cast<SCEVConstant>(D->getRHS()); - if (!SC || SC->getValue()->isZero()) { + if (!SE.isKnownNonZero(D->getRHS())) { IsUnsafe = true; return false; } @@ -2613,18 +2610,17 @@ struct SCEVFindUnsafe { } bool isDone() const { return IsUnsafe; } }; -} +} // namespace -namespace llvm { -bool isSafeToExpand(const SCEV *S, ScalarEvolution &SE, bool CanonicalMode) { +bool SCEVExpander::isSafeToExpand(const SCEV *S) const { SCEVFindUnsafe Search(SE, CanonicalMode); visitAll(S, Search); return !Search.IsUnsafe; } -bool isSafeToExpandAt(const SCEV *S, const Instruction *InsertionPoint, - ScalarEvolution &SE) { - if (!isSafeToExpand(S, SE)) +bool SCEVExpander::isSafeToExpandAt(const SCEV *S, + const Instruction *InsertionPoint) const { + if (!isSafeToExpand(S)) return false; // We have to prove that the expanded site of S dominates InsertionPoint. // This is easy when not in the same block, but hard when S is an instruction @@ -2674,4 +2670,3 @@ void SCEVExpanderCleaner::cleanup() { I->eraseFromParent(); } } -} diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp index 4b5ade99767b..1806081678a8 100644 --- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp @@ -4851,7 +4851,7 @@ static bool removeEmptyCleanup(CleanupReturnInst *RI, DomTreeUpdater *DTU) { PN.moveBefore(InsertPt); // Also, add a dummy incoming value for the original BB itself, // so that the PHI is well-formed until we drop said predecessor. - PN.addIncoming(UndefValue::get(PN.getType()), BB); + PN.addIncoming(PoisonValue::get(PN.getType()), BB); } } diff --git a/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp b/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp index af15e0c31b75..0ab79a32f526 100644 --- a/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp @@ -593,7 +593,7 @@ bool SimplifyIndvar::eliminateTrunc(TruncInst *TI) { } // Trunc no longer needed. - TI->replaceAllUsesWith(UndefValue::get(TI->getType())); + TI->replaceAllUsesWith(PoisonValue::get(TI->getType())); DeadInsts.emplace_back(TI); return true; } @@ -660,7 +660,7 @@ bool SimplifyIndvar::replaceIVUserWithLoopInvariant(Instruction *I) { auto *IP = GetLoopInvariantInsertPosition(L, I); - if (!isSafeToExpandAt(S, IP, *SE)) { + if (!Rewriter.isSafeToExpandAt(S, IP)) { LLVM_DEBUG(dbgs() << "INDVARS: Can not replace IV user: " << *I << " with non-speculable loop invariant: " << *S << '\n'); return false; @@ -679,20 +679,30 @@ bool SimplifyIndvar::replaceIVUserWithLoopInvariant(Instruction *I) { /// Eliminate redundant type cast between integer and float. bool SimplifyIndvar::replaceFloatIVWithIntegerIV(Instruction *UseInst) { - if (UseInst->getOpcode() != CastInst::SIToFP) + if (UseInst->getOpcode() != CastInst::SIToFP && + UseInst->getOpcode() != CastInst::UIToFP) return false; Value *IVOperand = UseInst->getOperand(0); // Get the symbolic expression for this instruction. - ConstantRange IVRange = SE->getSignedRange(SE->getSCEV(IVOperand)); + const SCEV *IV = SE->getSCEV(IVOperand); + unsigned MaskBits; + if (UseInst->getOpcode() == CastInst::SIToFP) + MaskBits = SE->getSignedRange(IV).getMinSignedBits(); + else + MaskBits = SE->getUnsignedRange(IV).getActiveBits(); unsigned DestNumSigBits = UseInst->getType()->getFPMantissaWidth(); - if (IVRange.getActiveBits() <= DestNumSigBits) { + if (MaskBits <= DestNumSigBits) { for (User *U : UseInst->users()) { - // Match for fptosi of sitofp and with same type. - auto *CI = dyn_cast<FPToSIInst>(U); + // Match for fptosi/fptoui of sitofp and with same type. + auto *CI = dyn_cast<CastInst>(U); if (!CI || IVOperand->getType() != CI->getType()) continue; + CastInst::CastOps Opcode = CI->getOpcode(); + if (Opcode != CastInst::FPToSI && Opcode != CastInst::FPToUI) + continue; + CI->replaceAllUsesWith(IVOperand); DeadInsts.push_back(CI); LLVM_DEBUG(dbgs() << "INDVARS: Replace IV user: " << *CI @@ -1015,7 +1025,7 @@ class WidenIV { SmallPtrSet<Instruction *,16> Widened; - enum ExtendKind { ZeroExtended, SignExtended, Unknown }; + enum class ExtendKind { Zero, Sign, Unknown }; // A map tracking the kind of extension used to widen each narrow IV // and narrow IV user. @@ -1172,7 +1182,7 @@ WidenIV::WidenIV(const WideIVInfo &WI, LoopInfo *LInfo, ScalarEvolution *SEv, HasGuards(HasGuards), UsePostIncrementRanges(UsePostIncrementRanges), DeadInsts(DI) { assert(L->getHeader() == OrigPhi->getParent() && "Phi must be an IV"); - ExtendKindMap[OrigPhi] = WI.IsSigned ? SignExtended : ZeroExtended; + ExtendKindMap[OrigPhi] = WI.IsSigned ? ExtendKind::Sign : ExtendKind::Zero; } Value *WidenIV::createExtendInst(Value *NarrowOper, Type *WideType, @@ -1225,7 +1235,7 @@ Instruction *WidenIV::cloneBitwiseIVUser(WidenIV::NarrowIVDefUse DU) { // about the narrow operand yet so must insert a [sz]ext. It is probably loop // invariant and will be folded or hoisted. If it actually comes from a // widened IV, it should be removed during a future call to widenIVUse. - bool IsSigned = getExtendKind(NarrowDef) == SignExtended; + bool IsSigned = getExtendKind(NarrowDef) == ExtendKind::Sign; Value *LHS = (NarrowUse->getOperand(0) == NarrowDef) ? WideDef : createExtendInst(NarrowUse->getOperand(0), WideType, @@ -1290,7 +1300,7 @@ Instruction *WidenIV::cloneArithmeticIVUser(WidenIV::NarrowIVDefUse DU, return WideUse == WideAR; }; - bool SignExtend = getExtendKind(NarrowDef) == SignExtended; + bool SignExtend = getExtendKind(NarrowDef) == ExtendKind::Sign; if (!GuessNonIVOperand(SignExtend)) { SignExtend = !SignExtend; if (!GuessNonIVOperand(SignExtend)) @@ -1350,7 +1360,7 @@ WidenIV::getExtendedOperandRecurrence(WidenIV::NarrowIVDefUse DU) { // Only Add/Sub/Mul instructions supported yet. if (OpCode != Instruction::Add && OpCode != Instruction::Sub && OpCode != Instruction::Mul) - return {nullptr, Unknown}; + return {nullptr, ExtendKind::Unknown}; // One operand (NarrowDef) has already been extended to WideDef. Now determine // if extending the other will lead to a recurrence. @@ -1362,14 +1372,14 @@ WidenIV::getExtendedOperandRecurrence(WidenIV::NarrowIVDefUse DU) { const OverflowingBinaryOperator *OBO = cast<OverflowingBinaryOperator>(DU.NarrowUse); ExtendKind ExtKind = getExtendKind(DU.NarrowDef); - if (ExtKind == SignExtended && OBO->hasNoSignedWrap()) + if (ExtKind == ExtendKind::Sign && OBO->hasNoSignedWrap()) ExtendOperExpr = SE->getSignExtendExpr( SE->getSCEV(DU.NarrowUse->getOperand(ExtendOperIdx)), WideType); - else if(ExtKind == ZeroExtended && OBO->hasNoUnsignedWrap()) + else if (ExtKind == ExtendKind::Zero && OBO->hasNoUnsignedWrap()) ExtendOperExpr = SE->getZeroExtendExpr( SE->getSCEV(DU.NarrowUse->getOperand(ExtendOperIdx)), WideType); else - return {nullptr, Unknown}; + return {nullptr, ExtendKind::Unknown}; // When creating this SCEV expr, don't apply the current operations NSW or NUW // flags. This instruction may be guarded by control flow that the no-wrap @@ -1387,7 +1397,7 @@ WidenIV::getExtendedOperandRecurrence(WidenIV::NarrowIVDefUse DU) { dyn_cast<SCEVAddRecExpr>(getSCEVByOpCode(lhs, rhs, OpCode)); if (!AddRec || AddRec->getLoop() != L) - return {nullptr, Unknown}; + return {nullptr, ExtendKind::Unknown}; return {AddRec, ExtKind}; } @@ -1396,17 +1406,17 @@ WidenIV::getExtendedOperandRecurrence(WidenIV::NarrowIVDefUse DU) { /// widening it's type? In other words, can the extend be safely hoisted out of /// the loop with SCEV reducing the value to a recurrence on the same loop. If /// so, return the extended recurrence and the kind of extension used. Otherwise -/// return {nullptr, Unknown}. +/// return {nullptr, ExtendKind::Unknown}. WidenIV::WidenedRecTy WidenIV::getWideRecurrence(WidenIV::NarrowIVDefUse DU) { if (!DU.NarrowUse->getType()->isIntegerTy()) - return {nullptr, Unknown}; + return {nullptr, ExtendKind::Unknown}; const SCEV *NarrowExpr = SE->getSCEV(DU.NarrowUse); if (SE->getTypeSizeInBits(NarrowExpr->getType()) >= SE->getTypeSizeInBits(WideType)) { // NarrowUse implicitly widens its operand. e.g. a gep with a narrow // index. So don't follow this use. - return {nullptr, Unknown}; + return {nullptr, ExtendKind::Unknown}; } const SCEV *WideExpr; @@ -1414,21 +1424,21 @@ WidenIV::WidenedRecTy WidenIV::getWideRecurrence(WidenIV::NarrowIVDefUse DU) { if (DU.NeverNegative) { WideExpr = SE->getSignExtendExpr(NarrowExpr, WideType); if (isa<SCEVAddRecExpr>(WideExpr)) - ExtKind = SignExtended; + ExtKind = ExtendKind::Sign; else { WideExpr = SE->getZeroExtendExpr(NarrowExpr, WideType); - ExtKind = ZeroExtended; + ExtKind = ExtendKind::Zero; } - } else if (getExtendKind(DU.NarrowDef) == SignExtended) { + } else if (getExtendKind(DU.NarrowDef) == ExtendKind::Sign) { WideExpr = SE->getSignExtendExpr(NarrowExpr, WideType); - ExtKind = SignExtended; + ExtKind = ExtendKind::Sign; } else { WideExpr = SE->getZeroExtendExpr(NarrowExpr, WideType); - ExtKind = ZeroExtended; + ExtKind = ExtendKind::Zero; } const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(WideExpr); if (!AddRec || AddRec->getLoop() != L) - return {nullptr, Unknown}; + return {nullptr, ExtendKind::Unknown}; return {AddRec, ExtKind}; } @@ -1468,7 +1478,7 @@ bool WidenIV::widenLoopCompare(WidenIV::NarrowIVDefUse DU) { // // (A) == icmp slt i32 sext(%narrow), sext(%val) // == icmp slt i32 zext(%narrow), sext(%val) - bool IsSigned = getExtendKind(DU.NarrowDef) == SignExtended; + bool IsSigned = getExtendKind(DU.NarrowDef) == ExtendKind::Sign; if (!(DU.NeverNegative || IsSigned == Cmp->isSigned())) return false; @@ -1533,8 +1543,8 @@ bool WidenIV::widenWithVariantUse(WidenIV::NarrowIVDefUse DU) { const OverflowingBinaryOperator *OBO = cast<OverflowingBinaryOperator>(NarrowUse); ExtendKind ExtKind = getExtendKind(NarrowDef); - bool CanSignExtend = ExtKind == SignExtended && OBO->hasNoSignedWrap(); - bool CanZeroExtend = ExtKind == ZeroExtended && OBO->hasNoUnsignedWrap(); + bool CanSignExtend = ExtKind == ExtendKind::Sign && OBO->hasNoSignedWrap(); + bool CanZeroExtend = ExtKind == ExtendKind::Zero && OBO->hasNoUnsignedWrap(); auto AnotherOpExtKind = ExtKind; // Check that all uses are either: @@ -1564,14 +1574,14 @@ bool WidenIV::widenWithVariantUse(WidenIV::NarrowIVDefUse DU) { // predicates. For equality, it's legal to widen icmp for either sign and // zero extend. For sign extend, we can also do so for signed predicates, // likeweise for zero extend we can widen icmp for unsigned predicates. - if (ExtKind == ZeroExtended && ICmpInst::isSigned(Pred)) + if (ExtKind == ExtendKind::Zero && ICmpInst::isSigned(Pred)) return false; - if (ExtKind == SignExtended && ICmpInst::isUnsigned(Pred)) + if (ExtKind == ExtendKind::Sign && ICmpInst::isUnsigned(Pred)) return false; ICmpUsers.push_back(ICmp); continue; } - if (ExtKind == SignExtended) + if (ExtKind == ExtendKind::Sign) User = dyn_cast<SExtInst>(User); else User = dyn_cast<ZExtInst>(User); @@ -1594,7 +1604,7 @@ bool WidenIV::widenWithVariantUse(WidenIV::NarrowIVDefUse DU) { // will most likely not see it. Let's try to prove it. if (OpCode != Instruction::Add) return false; - if (ExtKind != ZeroExtended) + if (ExtKind != ExtendKind::Zero) return false; const SCEV *LHS = SE->getSCEV(OBO->getOperand(0)); const SCEV *RHS = SE->getSCEV(OBO->getOperand(1)); @@ -1609,7 +1619,7 @@ bool WidenIV::widenWithVariantUse(WidenIV::NarrowIVDefUse DU) { return false; // In fact, our 'add' is 'sub nuw'. We will need to widen the 2nd operand as // neg(zext(neg(op))), which is basically sext(op). - AnotherOpExtKind = SignExtended; + AnotherOpExtKind = ExtendKind::Sign; } // Verifying that Defining operand is an AddRec @@ -1621,14 +1631,16 @@ bool WidenIV::widenWithVariantUse(WidenIV::NarrowIVDefUse DU) { LLVM_DEBUG(dbgs() << "Cloning arithmetic IVUser: " << *NarrowUse << "\n"); // Generating a widening use instruction. - Value *LHS = (NarrowUse->getOperand(0) == NarrowDef) - ? WideDef - : createExtendInst(NarrowUse->getOperand(0), WideType, - AnotherOpExtKind, NarrowUse); - Value *RHS = (NarrowUse->getOperand(1) == NarrowDef) - ? WideDef - : createExtendInst(NarrowUse->getOperand(1), WideType, - AnotherOpExtKind, NarrowUse); + Value *LHS = + (NarrowUse->getOperand(0) == NarrowDef) + ? WideDef + : createExtendInst(NarrowUse->getOperand(0), WideType, + AnotherOpExtKind == ExtendKind::Sign, NarrowUse); + Value *RHS = + (NarrowUse->getOperand(1) == NarrowDef) + ? WideDef + : createExtendInst(NarrowUse->getOperand(1), WideType, + AnotherOpExtKind == ExtendKind::Sign, NarrowUse); auto *NarrowBO = cast<BinaryOperator>(NarrowUse); auto *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(), LHS, RHS, @@ -1667,7 +1679,7 @@ bool WidenIV::widenWithVariantUse(WidenIV::NarrowIVDefUse DU) { auto ExtendedOp = [&](Value * V)->Value * { if (V == NarrowUse) return WideBO; - if (ExtKind == ZeroExtended) + if (ExtKind == ExtendKind::Zero) return Builder.CreateZExt(V, WideBO->getType()); else return Builder.CreateSExt(V, WideBO->getType()); @@ -1723,10 +1735,10 @@ Instruction *WidenIV::widenIVUse(WidenIV::NarrowIVDefUse DU, SCEVExpander &Rewri // This narrow use can be widened by a sext if it's non-negative or its narrow // def was widended by a sext. Same for zext. auto canWidenBySExt = [&]() { - return DU.NeverNegative || getExtendKind(DU.NarrowDef) == SignExtended; + return DU.NeverNegative || getExtendKind(DU.NarrowDef) == ExtendKind::Sign; }; auto canWidenByZExt = [&]() { - return DU.NeverNegative || getExtendKind(DU.NarrowDef) == ZeroExtended; + return DU.NeverNegative || getExtendKind(DU.NarrowDef) == ExtendKind::Zero; }; // Our raison d'etre! Eliminate sign and zero extension. @@ -1774,7 +1786,8 @@ Instruction *WidenIV::widenIVUse(WidenIV::NarrowIVDefUse DU, SCEVExpander &Rewri if (!WideAddRec.first) WideAddRec = getWideRecurrence(DU); - assert((WideAddRec.first == nullptr) == (WideAddRec.second == Unknown)); + assert((WideAddRec.first == nullptr) == + (WideAddRec.second == ExtendKind::Unknown)); if (!WideAddRec.first) { // If use is a loop condition, try to promote the condition instead of // truncating the IV first. @@ -1869,7 +1882,7 @@ PHINode *WidenIV::createWideIV(SCEVExpander &Rewriter) { return nullptr; // Widen the induction variable expression. - const SCEV *WideIVExpr = getExtendKind(OrigPhi) == SignExtended + const SCEV *WideIVExpr = getExtendKind(OrigPhi) == ExtendKind::Sign ? SE->getSignExtendExpr(AddRec, WideType) : SE->getZeroExtendExpr(AddRec, WideType); diff --git a/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp b/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp index b359717424a6..bca3b0538c5d 100644 --- a/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp @@ -1948,14 +1948,16 @@ Value *LibCallSimplifier::optimizePow(CallInst *Pow, IRBuilderBase &B) { if (Value *Sqrt = replacePowWithSqrt(Pow, B)) return Sqrt; + // If we can approximate pow: // pow(x, n) -> powi(x, n) * sqrt(x) if n has exactly a 0.5 fraction + // pow(x, n) -> powi(x, n) if n is a constant signed integer value const APFloat *ExpoF; - if (match(Expo, m_APFloat(ExpoF)) && !ExpoF->isExactlyValue(0.5) && - !ExpoF->isExactlyValue(-0.5)) { + if (AllowApprox && match(Expo, m_APFloat(ExpoF)) && + !ExpoF->isExactlyValue(0.5) && !ExpoF->isExactlyValue(-0.5)) { APFloat ExpoA(abs(*ExpoF)); APFloat ExpoI(*ExpoF); Value *Sqrt = nullptr; - if (AllowApprox && !ExpoA.isInteger()) { + if (!ExpoA.isInteger()) { APFloat Expo2 = ExpoA; // To check if ExpoA is an integer + 0.5, we add it to itself. If there // is no floating point exception and the result is an integer, then @@ -1979,7 +1981,8 @@ Value *LibCallSimplifier::optimizePow(CallInst *Pow, IRBuilderBase &B) { return nullptr; } - // pow(x, n) -> powi(x, n) if n is a constant signed integer value + // 0.5 fraction is now optionally handled. + // Do pow -> powi for remaining integer exponent APSInt IntExpo(TLI->getIntSize(), /*isUnsigned=*/false); if (ExpoF->isInteger() && ExpoF->convertToInteger(IntExpo, APFloat::rmTowardZero, &Ignored) == diff --git a/llvm/lib/Transforms/Utils/UnifyLoopExits.cpp b/llvm/lib/Transforms/Utils/UnifyLoopExits.cpp index 832353741500..9bbfe06b9abb 100644 --- a/llvm/lib/Transforms/Utils/UnifyLoopExits.cpp +++ b/llvm/lib/Transforms/Utils/UnifyLoopExits.cpp @@ -145,8 +145,6 @@ static bool unifyLoopExits(DominatorTree &DT, LoopInfo &LI, Loop *L) { // locate the exit blocks. SetVector<BasicBlock *> ExitingBlocks; SetVector<BasicBlock *> Exits; - // Record the exit blocks that branch to the same block. - MapVector<BasicBlock *, SetVector<BasicBlock *> > CommonSuccs; // We need SetVectors, but the Loop API takes a vector, so we use a temporary. SmallVector<BasicBlock *, 8> Temp; @@ -160,11 +158,6 @@ static bool unifyLoopExits(DominatorTree &DT, LoopInfo &LI, Loop *L) { if (SL == L || L->contains(SL)) continue; Exits.insert(S); - // The typical case for reducing the number of guard blocks occurs when - // the exit block has a single predecessor and successor. - if (S->getSinglePredecessor()) - if (auto *Succ = S->getSingleSuccessor()) - CommonSuccs[Succ].insert(S); } } @@ -179,39 +172,13 @@ static bool unifyLoopExits(DominatorTree &DT, LoopInfo &LI, Loop *L) { for (auto EB : ExitingBlocks) { dbgs() << " " << EB->getName(); } - dbgs() << "\n"; - - dbgs() << "Exit blocks with a common successor:\n"; - for (auto CS : CommonSuccs) { - dbgs() << " Succ " << CS.first->getName() << ", exits:"; - for (auto Exit : CS.second) - dbgs() << " " << Exit->getName(); - dbgs() << "\n"; - }); + dbgs() << "\n";); if (Exits.size() <= 1) { LLVM_DEBUG(dbgs() << "loop does not have multiple exits; nothing to do\n"); return false; } - // When multiple exit blocks branch to the same block, change the control - // flow hub to after the exit blocks rather than before. This reduces the - // number of guard blocks needed after the loop. - for (auto CS : CommonSuccs) { - auto CB = CS.first; - auto Preds = CS.second; - if (Exits.contains(CB)) - continue; - if (Preds.size() < 2 || Preds.size() == Exits.size()) - continue; - for (auto Exit : Preds) { - Exits.remove(Exit); - ExitingBlocks.remove(Exit->getSinglePredecessor()); - ExitingBlocks.insert(Exit); - } - Exits.insert(CB); - } - SmallVector<BasicBlock *, 8> GuardBlocks; DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager); auto LoopExitBlock = CreateControlFlowHub(&DTU, GuardBlocks, ExitingBlocks, @@ -231,17 +198,6 @@ static bool unifyLoopExits(DominatorTree &DT, LoopInfo &LI, Loop *L) { if (auto ParentLoop = L->getParentLoop()) { for (auto G : GuardBlocks) { ParentLoop->addBasicBlockToLoop(G, LI); - // Ensure the guard block predecessors are in a valid loop. After the - // change to the control flow hub for common successors, a guard block - // predecessor may not be in a loop or may be in an outer loop. - for (auto Pred : predecessors(G)) { - auto PredLoop = LI.getLoopFor(Pred); - if (!ParentLoop->contains(PredLoop)) { - if (PredLoop) - LI.removeBlock(Pred); - ParentLoop->addBasicBlockToLoop(Pred, LI); - } - } } ParentLoop->verifyLoop(); } diff --git a/llvm/lib/Transforms/Utils/Utils.cpp b/llvm/lib/Transforms/Utils/Utils.cpp index f34f2df971b1..d002922cfd30 100644 --- a/llvm/lib/Transforms/Utils/Utils.cpp +++ b/llvm/lib/Transforms/Utils/Utils.cpp @@ -28,7 +28,6 @@ void llvm::initializeTransformUtils(PassRegistry &Registry) { initializeAssumeSimplifyPassLegacyPassPass(Registry); initializeAssumeBuilderPassLegacyPassPass(Registry); initializeBreakCriticalEdgesPass(Registry); - initializeCanonicalizeAliasesLegacyPassPass(Registry); initializeCanonicalizeFreezeInLoopsPass(Registry); initializeInstNamerPass(Registry); initializeLCSSAWrapperPassPass(Registry); @@ -37,7 +36,6 @@ void llvm::initializeTransformUtils(PassRegistry &Registry) { initializeLowerGlobalDtorsLegacyPassPass(Registry); initializeLowerInvokeLegacyPassPass(Registry); initializeLowerSwitchLegacyPassPass(Registry); - initializeNameAnonGlobalLegacyPassPass(Registry); initializePromoteLegacyPassPass(Registry); initializeStripNonLineTableDebugLegacyPassPass(Registry); initializeUnifyFunctionExitNodesLegacyPassPass(Registry); diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 0777a1385916..b887ea41676b 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -92,6 +92,7 @@ #include "llvm/Analysis/ScalarEvolutionExpressions.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/Analysis/ValueTracking.h" #include "llvm/Analysis/VectorUtils.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/BasicBlock.h" @@ -473,7 +474,7 @@ public: virtual std::pair<BasicBlock *, Value *> createVectorizedLoopSkeleton(); /// Widen a single call instruction within the innermost loop. - void widenCallInstruction(CallInst &I, VPValue *Def, VPUser &ArgOperands, + void widenCallInstruction(CallInst &CI, VPValue *Def, VPUser &ArgOperands, VPTransformState &State); /// Fix the vectorized code, taking care of header phi's, live-outs, and more. @@ -1447,15 +1448,14 @@ public: // through scalar predication or masked load/store or masked gather/scatter. // \p VF is the vectorization factor that will be used to vectorize \p I. // Superset of instructions that return true for isScalarWithPredication. - bool isPredicatedInst(Instruction *I, ElementCount VF, - bool IsKnownUniform = false) { - // When we know the load is uniform and the original scalar loop was not - // predicated we don't need to mark it as a predicated instruction. Any - // vectorised blocks created when tail-folding are something artificial we - // have introduced and we know there is always at least one active lane. - // That's why we call Legal->blockNeedsPredication here because it doesn't - // query tail-folding. - if (IsKnownUniform && isa<LoadInst>(I) && + bool isPredicatedInst(Instruction *I, ElementCount VF) { + // When we know the load's address is loop invariant and the instruction + // in the original scalar loop was unconditionally executed then we + // don't need to mark it as a predicated instruction. Tail folding may + // introduce additional predication, but we're guaranteed to always have + // at least one active lane. We call Legal->blockNeedsPredication here + // because it doesn't query tail-folding. + if (Legal->isUniformMemOp(*I) && isa<LoadInst>(I) && !Legal->blockNeedsPredication(I->getParent())) return false; if (!blockNeedsPredicationForAnyReason(I->getParent())) @@ -1657,10 +1657,6 @@ private: InstructionCost getScalarizationOverhead(Instruction *I, ElementCount VF) const; - /// Returns whether the instruction is a load or store and will be a emitted - /// as a vector operation. - bool isConsecutiveLoadOrStore(Instruction *I); - /// Returns true if an artificially high cost for emulated masked memrefs /// should be used. bool useEmulatedMaskMemRefHack(Instruction *I, ElementCount VF); @@ -1919,10 +1915,13 @@ public: auto DiffChecks = RtPtrChecking.getDiffChecks(); if (DiffChecks) { + Value *RuntimeVF = nullptr; MemRuntimeCheckCond = addDiffRuntimeChecks( MemCheckBlock->getTerminator(), L, *DiffChecks, MemCheckExp, - [VF](IRBuilderBase &B, unsigned Bits) { - return getRuntimeVF(B, B.getIntNTy(Bits), VF); + [VF, &RuntimeVF](IRBuilderBase &B, unsigned Bits) { + if (!RuntimeVF) + RuntimeVF = getRuntimeVF(B, B.getIntNTy(Bits), VF); + return RuntimeVF; }, IC); } else { @@ -2947,11 +2946,17 @@ void InnerLoopVectorizer::emitIterationCountCheck(BasicBlock *Bypass) { // If tail is to be folded, vector loop takes care of all iterations. Type *CountTy = Count->getType(); Value *CheckMinIters = Builder.getFalse(); - auto CreateStep = [&]() { + auto CreateStep = [&]() -> Value * { // Create step with max(MinProTripCount, UF * VF). - if (UF * VF.getKnownMinValue() < MinProfitableTripCount.getKnownMinValue()) - return createStepForVF(Builder, CountTy, MinProfitableTripCount, 1); - return createStepForVF(Builder, CountTy, VF, UF); + if (UF * VF.getKnownMinValue() >= MinProfitableTripCount.getKnownMinValue()) + return createStepForVF(Builder, CountTy, VF, UF); + + Value *MinProfTC = + createStepForVF(Builder, CountTy, MinProfitableTripCount, 1); + if (!VF.isScalable()) + return MinProfTC; + return Builder.CreateBinaryIntrinsic( + Intrinsic::umax, MinProfTC, createStepForVF(Builder, CountTy, VF, UF)); }; if (!Cost->foldTailByMasking()) @@ -4168,46 +4173,26 @@ bool InnerLoopVectorizer::useOrderedReductions( return Cost->useOrderedReductions(RdxDesc); } -/// A helper function for checking whether an integer division-related -/// instruction may divide by zero (in which case it must be predicated if -/// executed conditionally in the scalar code). -/// TODO: It may be worthwhile to generalize and check isKnownNonZero(). -/// Non-zero divisors that are non compile-time constants will not be -/// converted into multiplication, so we will still end up scalarizing -/// the division, but can do so w/o predication. -static bool mayDivideByZero(Instruction &I) { - assert((I.getOpcode() == Instruction::UDiv || - I.getOpcode() == Instruction::SDiv || - I.getOpcode() == Instruction::URem || - I.getOpcode() == Instruction::SRem) && - "Unexpected instruction"); - Value *Divisor = I.getOperand(1); - auto *CInt = dyn_cast<ConstantInt>(Divisor); - return !CInt || CInt->isZero(); -} - -void InnerLoopVectorizer::widenCallInstruction(CallInst &I, VPValue *Def, +void InnerLoopVectorizer::widenCallInstruction(CallInst &CI, VPValue *Def, VPUser &ArgOperands, VPTransformState &State) { - assert(!isa<DbgInfoIntrinsic>(I) && + assert(!isa<DbgInfoIntrinsic>(CI) && "DbgInfoIntrinsic should have been dropped during VPlan construction"); - State.setDebugLocFromInst(&I); - - Module *M = I.getParent()->getParent()->getParent(); - auto *CI = cast<CallInst>(&I); + State.setDebugLocFromInst(&CI); SmallVector<Type *, 4> Tys; - for (Value *ArgOperand : CI->args()) + for (Value *ArgOperand : CI.args()) Tys.push_back(ToVectorTy(ArgOperand->getType(), VF.getKnownMinValue())); - Intrinsic::ID ID = getVectorIntrinsicIDForCall(CI, TLI); + Intrinsic::ID ID = getVectorIntrinsicIDForCall(&CI, TLI); // The flag shows whether we use Intrinsic or a usual Call for vectorized // version of the instruction. // Is it beneficial to perform intrinsic call compared to lib call? bool NeedToScalarize = false; - InstructionCost CallCost = Cost->getVectorCallCost(CI, VF, NeedToScalarize); - InstructionCost IntrinsicCost = ID ? Cost->getVectorIntrinsicCost(CI, VF) : 0; + InstructionCost CallCost = Cost->getVectorCallCost(&CI, VF, NeedToScalarize); + InstructionCost IntrinsicCost = + ID ? Cost->getVectorIntrinsicCost(&CI, VF) : 0; bool UseVectorIntrinsic = ID && IntrinsicCost <= CallCost; assert((UseVectorIntrinsic || !NeedToScalarize) && "Instruction should be scalarized elsewhere."); @@ -4215,7 +4200,7 @@ void InnerLoopVectorizer::widenCallInstruction(CallInst &I, VPValue *Def, "Either the intrinsic cost or vector call cost must be valid"); for (unsigned Part = 0; Part < UF; ++Part) { - SmallVector<Type *, 2> TysForDecl = {CI->getType()}; + SmallVector<Type *, 2> TysForDecl = {CI.getType()}; SmallVector<Value *, 4> Args; for (auto &I : enumerate(ArgOperands.operands())) { // Some intrinsics have a scalar argument - don't replace it with a @@ -4235,27 +4220,28 @@ void InnerLoopVectorizer::widenCallInstruction(CallInst &I, VPValue *Def, if (UseVectorIntrinsic) { // Use vector version of the intrinsic. if (VF.isVector()) - TysForDecl[0] = VectorType::get(CI->getType()->getScalarType(), VF); + TysForDecl[0] = VectorType::get(CI.getType()->getScalarType(), VF); + Module *M = State.Builder.GetInsertBlock()->getModule(); VectorF = Intrinsic::getDeclaration(M, ID, TysForDecl); assert(VectorF && "Can't retrieve vector intrinsic."); } else { // Use vector version of the function call. - const VFShape Shape = VFShape::get(*CI, VF, false /*HasGlobalPred*/); + const VFShape Shape = VFShape::get(CI, VF, false /*HasGlobalPred*/); #ifndef NDEBUG - assert(VFDatabase(*CI).getVectorizedFunction(Shape) != nullptr && + assert(VFDatabase(CI).getVectorizedFunction(Shape) != nullptr && "Can't create vector function."); #endif - VectorF = VFDatabase(*CI).getVectorizedFunction(Shape); + VectorF = VFDatabase(CI).getVectorizedFunction(Shape); } SmallVector<OperandBundleDef, 1> OpBundles; - CI->getOperandBundlesAsDefs(OpBundles); + CI.getOperandBundlesAsDefs(OpBundles); CallInst *V = Builder.CreateCall(VectorF, Args, OpBundles); if (isa<FPMathOperator>(V)) - V->copyFastMathFlags(CI); + V->copyFastMathFlags(&CI); State.set(Def, V, Part); - State.addMetadata(V, &I); + State.addMetadata(V, &CI); } } @@ -4470,7 +4456,9 @@ bool LoopVectorizationCostModel::isScalarWithPredication( case Instruction::SDiv: case Instruction::SRem: case Instruction::URem: - return mayDivideByZero(*I); + // TODO: We can use the loop-preheader as context point here and get + // context sensitive reasoning + return !isSafeToSpeculativelyExecute(I); } return false; } @@ -5406,7 +5394,7 @@ VectorizationFactor LoopVectorizationCostModel::selectVectorizationFactor( } LLVM_DEBUG(if (ForceVectorization && !ChosenFactor.Width.isScalar() && - ChosenFactor.Cost >= ScalarCost.Cost) dbgs() + !isMoreProfitable(ChosenFactor, ScalarCost)) dbgs() << "LV: Vectorization seems to be not beneficial, " << "but was forced by a user.\n"); LLVM_DEBUG(dbgs() << "LV: Selecting VF: " << ChosenFactor.Width << ".\n"); @@ -6069,7 +6057,8 @@ bool LoopVectorizationCostModel::useEmulatedMaskMemRefHack(Instruction *I, // from moving "masked load/store" check from legality to cost model. // Masked Load/Gather emulation was previously never allowed. // Limited number of Masked Store/Scatter emulation was allowed. - assert(isPredicatedInst(I, VF) && "Expecting a scalar emulated instruction"); + assert((isPredicatedInst(I, VF) || Legal->isUniformMemOp(*I)) && + "Expecting a scalar emulated instruction"); return isa<LoadInst>(I) || (isa<StoreInst>(I) && NumPredStores > NumberOfStoresToPredicate); @@ -6779,19 +6768,29 @@ void LoopVectorizationCostModel::setCostBasedWideningDecision(ElementCount VF) { NumPredStores++; if (Legal->isUniformMemOp(I)) { - // TODO: Avoid replicating loads and stores instead of - // relying on instcombine to remove them. + // Lowering story for uniform memory ops is currently a bit complicated. + // Scalarization works for everything which isn't a store with scalable + // VF. Fixed len VFs just scalarize and then DCE later; scalarization + // knows how to handle uniform-per-part values (i.e. the first lane + // in each unrolled VF) and can thus handle scalable loads too. For + // scalable stores, we use a scatter if legal. If not, we have no way + // to lower (currently) and thus have to abort vectorization. + if (isa<StoreInst>(&I) && VF.isScalable()) { + if (isLegalGatherOrScatter(&I, VF)) + setWideningDecision(&I, VF, CM_GatherScatter, + getGatherScatterCost(&I, VF)); + else + // Error case, abort vectorization + setWideningDecision(&I, VF, CM_Scalarize, + InstructionCost::getInvalid()); + continue; + } // Load: Scalar load + broadcast // Store: Scalar store + isLoopInvariantStoreValue ? 0 : extract - InstructionCost Cost; - if (isa<StoreInst>(&I) && VF.isScalable() && - isLegalGatherOrScatter(&I, VF)) { - Cost = getGatherScatterCost(&I, VF); - setWideningDecision(&I, VF, CM_GatherScatter, Cost); - } else { - Cost = getUniformMemOpCost(&I, VF); - setWideningDecision(&I, VF, CM_Scalarize, Cost); - } + // TODO: Avoid replicating loads and stores instead of relying on + // instcombine to remove them. + setWideningDecision(&I, VF, CM_Scalarize, + getUniformMemOpCost(&I, VF)); continue; } @@ -7146,13 +7145,10 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, ElementCount VF, InstWidening Decision = getWideningDecision(I, Width); assert(Decision != CM_Unknown && "CM decision should be taken at this point"); - if (Decision == CM_Scalarize) { - if (VF.isScalable() && isa<StoreInst>(I)) - // We can't scalarize a scalable vector store (even a uniform one - // currently), return an invalid cost so as to prevent vectorization. - return InstructionCost::getInvalid(); + if (getWideningCost(I, VF) == InstructionCost::getInvalid()) + return InstructionCost::getInvalid(); + if (Decision == CM_Scalarize) Width = ElementCount::getFixed(1); - } } VectorTy = ToVectorTy(getLoadStoreType(I), Width); return getMemoryInstructionCost(I, VF); @@ -7308,14 +7304,6 @@ Pass *createLoopVectorizePass(bool InterleaveOnlyWhenForced, } // end namespace llvm -bool LoopVectorizationCostModel::isConsecutiveLoadOrStore(Instruction *Inst) { - // Check if the pointer operand of a load or store instruction is - // consecutive. - if (auto *Ptr = getLoadStorePointerOperand(Inst)) - return Legal->isConsecutivePtr(getLoadStoreType(Inst), Ptr); - return false; -} - void LoopVectorizationCostModel::collectValuesToIgnore() { // Ignore ephemeral values. CodeMetrics::collectEphemeralValues(TheLoop, AC, ValuesToIgnore); @@ -8370,7 +8358,7 @@ VPBasicBlock *VPRecipeBuilder::handleReplication( Range); bool IsPredicated = LoopVectorizationPlanner::getDecisionAndClampRange( - [&](ElementCount VF) { return CM.isPredicatedInst(I, VF, IsUniform); }, + [&](ElementCount VF) { return CM.isPredicatedInst(I, VF); }, Range); // Even if the instruction is not marked as uniform, there are certain @@ -8406,8 +8394,6 @@ VPBasicBlock *VPRecipeBuilder::handleReplication( auto *Recipe = new VPReplicateRecipe(I, Plan->mapToVPValues(I->operands()), IsUniform, IsPredicated); - setRecipe(I, Recipe); - Plan->addVPValue(I, Recipe); // Find if I uses a predicated instruction. If so, it will use its scalar // value. Avoid hoisting the insert-element which packs the scalar value into @@ -8426,6 +8412,8 @@ VPBasicBlock *VPRecipeBuilder::handleReplication( // Finalize the recipe for Instr, first if it is not predicated. if (!IsPredicated) { LLVM_DEBUG(dbgs() << "LV: Scalarizing:" << *I << "\n"); + setRecipe(I, Recipe); + Plan->addVPValue(I, Recipe); VPBB->appendRecipe(Recipe); return VPBB; } @@ -8436,7 +8424,7 @@ VPBasicBlock *VPRecipeBuilder::handleReplication( "predicated replication."); VPBlockUtils::disconnectBlocks(VPBB, SingleSucc); // Record predicated instructions for above packing optimizations. - VPBlockBase *Region = createReplicateRegion(I, Recipe, Plan); + VPBlockBase *Region = createReplicateRegion(Recipe, Plan); VPBlockUtils::insertBlockAfter(Region, VPBB); auto *RegSucc = new VPBasicBlock(); VPBlockUtils::insertBlockAfter(RegSucc, Region); @@ -8444,11 +8432,12 @@ VPBasicBlock *VPRecipeBuilder::handleReplication( return RegSucc; } -VPRegionBlock *VPRecipeBuilder::createReplicateRegion( - Instruction *Instr, VPReplicateRecipe *PredRecipe, VPlanPtr &Plan) { +VPRegionBlock * +VPRecipeBuilder::createReplicateRegion(VPReplicateRecipe *PredRecipe, + VPlanPtr &Plan) { + Instruction *Instr = PredRecipe->getUnderlyingInstr(); // Instructions marked for predication are replicated and placed under an // if-then construct to prevent side-effects. - // Generate recipes to compute the block mask for this region. VPValue *BlockInMask = createBlockInMask(Instr->getParent(), Plan); @@ -8461,9 +8450,13 @@ VPRegionBlock *VPRecipeBuilder::createReplicateRegion( ? nullptr : new VPPredInstPHIRecipe(PredRecipe); if (PHIRecipe) { - Plan->removeVPValueFor(Instr); + setRecipe(Instr, PHIRecipe); Plan->addVPValue(Instr, PHIRecipe); + } else { + setRecipe(Instr, PredRecipe); + Plan->addVPValue(Instr, PredRecipe); } + auto *Exiting = new VPBasicBlock(Twine(RegionName) + ".continue", PHIRecipe); auto *Pred = new VPBasicBlock(Twine(RegionName) + ".if", PredRecipe); VPRegionBlock *Region = new VPRegionBlock(Entry, Exiting, RegionName, true); @@ -9564,12 +9557,19 @@ void VPReplicateRecipe::execute(VPTransformState &State) { return; } - // Generate scalar instances for all VF lanes of all UF parts, unless the - // instruction is uniform inwhich case generate only the first lane for each - // of the UF parts. - unsigned EndLane = IsUniform ? 1 : State.VF.getKnownMinValue(); - assert((!State.VF.isScalable() || IsUniform) && - "Can't scalarize a scalable vector"); + if (IsUniform) { + // Uniform within VL means we need to generate lane 0 only for each + // unrolled copy. + for (unsigned Part = 0; Part < State.UF; ++Part) + State.ILV->scalarizeInstruction(getUnderlyingInstr(), this, + VPIteration(Part, 0), IsPredicated, + State); + return; + } + + // Generate scalar instances for all VF lanes of all UF parts. + assert(!State.VF.isScalable() && "Can't scalarize a scalable vector"); + const unsigned EndLane = State.VF.getKnownMinValue(); for (unsigned Part = 0; Part < State.UF; ++Part) for (unsigned Lane = 0; Lane < EndLane; ++Lane) State.ILV->scalarizeInstruction(getUnderlyingInstr(), this, @@ -9577,52 +9577,6 @@ void VPReplicateRecipe::execute(VPTransformState &State) { State); } -void VPPredInstPHIRecipe::execute(VPTransformState &State) { - assert(State.Instance && "Predicated instruction PHI works per instance."); - Instruction *ScalarPredInst = - cast<Instruction>(State.get(getOperand(0), *State.Instance)); - BasicBlock *PredicatedBB = ScalarPredInst->getParent(); - BasicBlock *PredicatingBB = PredicatedBB->getSinglePredecessor(); - assert(PredicatingBB && "Predicated block has no single predecessor."); - assert(isa<VPReplicateRecipe>(getOperand(0)) && - "operand must be VPReplicateRecipe"); - - // By current pack/unpack logic we need to generate only a single phi node: if - // a vector value for the predicated instruction exists at this point it means - // the instruction has vector users only, and a phi for the vector value is - // needed. In this case the recipe of the predicated instruction is marked to - // also do that packing, thereby "hoisting" the insert-element sequence. - // Otherwise, a phi node for the scalar value is needed. - unsigned Part = State.Instance->Part; - if (State.hasVectorValue(getOperand(0), Part)) { - Value *VectorValue = State.get(getOperand(0), Part); - InsertElementInst *IEI = cast<InsertElementInst>(VectorValue); - PHINode *VPhi = State.Builder.CreatePHI(IEI->getType(), 2); - VPhi->addIncoming(IEI->getOperand(0), PredicatingBB); // Unmodified vector. - VPhi->addIncoming(IEI, PredicatedBB); // New vector with inserted element. - if (State.hasVectorValue(this, Part)) - State.reset(this, VPhi, Part); - else - State.set(this, VPhi, Part); - // NOTE: Currently we need to update the value of the operand, so the next - // predicated iteration inserts its generated value in the correct vector. - State.reset(getOperand(0), VPhi, Part); - } else { - Type *PredInstType = getOperand(0)->getUnderlyingValue()->getType(); - PHINode *Phi = State.Builder.CreatePHI(PredInstType, 2); - Phi->addIncoming(PoisonValue::get(ScalarPredInst->getType()), - PredicatingBB); - Phi->addIncoming(ScalarPredInst, PredicatedBB); - if (State.hasScalarValue(this, *State.Instance)) - State.reset(this, Phi, *State.Instance); - else - State.set(this, Phi, *State.Instance); - // NOTE: Currently we need to update the value of the operand, so the next - // predicated iteration inserts its generated value in the correct vector. - State.reset(getOperand(0), Phi, *State.Instance); - } -} - void VPWidenMemoryInstructionRecipe::execute(VPTransformState &State) { VPValue *StoredValue = isStore() ? getStoredValue() : nullptr; @@ -9793,8 +9747,7 @@ static ScalarEpilogueLowering getScalarEpilogueLowering( }; // 4) if the TTI hook indicates this is profitable, request predication. - if (TTI->preferPredicateOverEpilogue(L, LI, *SE, *AC, TLI, DT, - LVL.getLAI())) + if (TTI->preferPredicateOverEpilogue(L, LI, *SE, *AC, TLI, DT, &LVL)) return CM_ScalarEpilogueNotNeededUsePredicate; return CM_ScalarEpilogueAllowed; diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index e136cd9aedac..cd044c78d900 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -3744,7 +3744,7 @@ void BoUpSLP::reorderTopToBottom() { unsigned Opcode0 = TE->getOpcode(); unsigned Opcode1 = TE->getAltOpcode(); // The opcode mask selects between the two opcodes. - SmallBitVector OpcodeMask(TE->Scalars.size(), 0); + SmallBitVector OpcodeMask(TE->Scalars.size(), false); for (unsigned Lane : seq<unsigned>(0, TE->Scalars.size())) if (cast<Instruction>(TE->Scalars[Lane])->getOpcode() == Opcode1) OpcodeMask.set(Lane); @@ -4814,6 +4814,15 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth, return; } + // Don't go into catchswitch blocks, which can happen with PHIs. + // Such blocks can only have PHIs and the catchswitch. There is no + // place to insert a shuffle if we need to, so just avoid that issue. + if (isa<CatchSwitchInst>(BB->getTerminator())) { + LLVM_DEBUG(dbgs() << "SLP: bundle in catchswitch block.\n"); + newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx); + return; + } + // Check that every instruction appears once in this bundle. if (!TryToFindDuplicates(S)) return; diff --git a/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h b/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h index c7949c42c03e..07d3fa56020b 100644 --- a/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h +++ b/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h @@ -157,10 +157,8 @@ public: return Ingredient2Recipe[I]; } - /// Create a replicating region for instruction \p I that requires - /// predication. \p PredRecipe is a VPReplicateRecipe holding \p I. - VPRegionBlock *createReplicateRegion(Instruction *I, - VPReplicateRecipe *PredRecipe, + /// Create a replicating region for \p PredRecipe. + VPRegionBlock *createReplicateRegion(VPReplicateRecipe *PredRecipe, VPlanPtr &Plan); /// Build a VPReplicationRecipe for \p I and enclose it within a Region if it diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index fdd901a4a70d..cb7507264667 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -892,6 +892,52 @@ void VPBranchOnMaskRecipe::execute(VPTransformState &State) { ReplaceInstWithInst(CurrentTerminator, CondBr); } +void VPPredInstPHIRecipe::execute(VPTransformState &State) { + assert(State.Instance && "Predicated instruction PHI works per instance."); + Instruction *ScalarPredInst = + cast<Instruction>(State.get(getOperand(0), *State.Instance)); + BasicBlock *PredicatedBB = ScalarPredInst->getParent(); + BasicBlock *PredicatingBB = PredicatedBB->getSinglePredecessor(); + assert(PredicatingBB && "Predicated block has no single predecessor."); + assert(isa<VPReplicateRecipe>(getOperand(0)) && + "operand must be VPReplicateRecipe"); + + // By current pack/unpack logic we need to generate only a single phi node: if + // a vector value for the predicated instruction exists at this point it means + // the instruction has vector users only, and a phi for the vector value is + // needed. In this case the recipe of the predicated instruction is marked to + // also do that packing, thereby "hoisting" the insert-element sequence. + // Otherwise, a phi node for the scalar value is needed. + unsigned Part = State.Instance->Part; + if (State.hasVectorValue(getOperand(0), Part)) { + Value *VectorValue = State.get(getOperand(0), Part); + InsertElementInst *IEI = cast<InsertElementInst>(VectorValue); + PHINode *VPhi = State.Builder.CreatePHI(IEI->getType(), 2); + VPhi->addIncoming(IEI->getOperand(0), PredicatingBB); // Unmodified vector. + VPhi->addIncoming(IEI, PredicatedBB); // New vector with inserted element. + if (State.hasVectorValue(this, Part)) + State.reset(this, VPhi, Part); + else + State.set(this, VPhi, Part); + // NOTE: Currently we need to update the value of the operand, so the next + // predicated iteration inserts its generated value in the correct vector. + State.reset(getOperand(0), VPhi, Part); + } else { + Type *PredInstType = getOperand(0)->getUnderlyingValue()->getType(); + PHINode *Phi = State.Builder.CreatePHI(PredInstType, 2); + Phi->addIncoming(PoisonValue::get(ScalarPredInst->getType()), + PredicatingBB); + Phi->addIncoming(ScalarPredInst, PredicatedBB); + if (State.hasScalarValue(this, *State.Instance)) + State.reset(this, Phi, *State.Instance); + else + State.set(this, Phi, *State.Instance); + // NOTE: Currently we need to update the value of the operand, so the next + // predicated iteration inserts its generated value in the correct vector. + State.reset(getOperand(0), Phi, *State.Instance); + } +} + #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) void VPPredInstPHIRecipe::print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const { diff --git a/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp b/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp index 3501de6ab38e..43e0a40fedb9 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp @@ -133,7 +133,9 @@ void VPlanVerifier::verifyHierarchicalCFG( verifyRegionRec(TopRegion); } -static bool verifyVPBasicBlock(const VPBasicBlock *VPBB) { +static bool +verifyVPBasicBlock(const VPBasicBlock *VPBB, + DenseMap<const VPBlockBase *, unsigned> &BlockNumbering) { // Verify that phi-like recipes are at the beginning of the block, with no // other recipes in between. auto RecipeI = VPBB->begin(); @@ -165,15 +167,71 @@ static bool verifyVPBasicBlock(const VPBasicBlock *VPBB) { RecipeI++; } + // Verify that defs in VPBB dominate all their uses. The current + // implementation is still incomplete. + DenseMap<const VPRecipeBase *, unsigned> RecipeNumbering; + unsigned Cnt = 0; + for (const VPRecipeBase &R : *VPBB) + RecipeNumbering[&R] = Cnt++; + + for (const VPRecipeBase &R : *VPBB) { + for (const VPValue *V : R.definedValues()) { + for (const VPUser *U : V->users()) { + auto *UI = dyn_cast<VPRecipeBase>(U); + if (!UI || isa<VPHeaderPHIRecipe>(UI)) + continue; + + // If the user is in the same block, check it comes after R in the + // block. + if (UI->getParent() == VPBB) { + if (RecipeNumbering[UI] < RecipeNumbering[&R]) { + errs() << "Use before def!\n"; + return false; + } + continue; + } + + // Skip blocks outside any region for now and blocks outside + // replicate-regions. + auto *ParentR = VPBB->getParent(); + if (!ParentR || !ParentR->isReplicator()) + continue; + + // For replicators, verify that VPPRedInstPHIRecipe defs are only used + // in subsequent blocks. + if (isa<VPPredInstPHIRecipe>(&R)) { + auto I = BlockNumbering.find(UI->getParent()); + unsigned BlockNumber = I == BlockNumbering.end() ? std::numeric_limits<unsigned>::max() : I->second; + if (BlockNumber < BlockNumbering[ParentR]) { + errs() << "Use before def!\n"; + return false; + } + continue; + } + + // All non-VPPredInstPHIRecipe recipes in the block must be used in + // the replicate region only. + if (UI->getParent()->getParent() != ParentR) { + errs() << "Use before def!\n"; + return false; + } + } + } + } return true; } bool VPlanVerifier::verifyPlanIsValid(const VPlan &Plan) { + DenseMap<const VPBlockBase *, unsigned> BlockNumbering; + unsigned Cnt = 0; auto Iter = depth_first( VPBlockRecursiveTraversalWrapper<const VPBlockBase *>(Plan.getEntry())); - for (const VPBasicBlock *VPBB : - VPBlockUtils::blocksOnly<const VPBasicBlock>(Iter)) { - if (!verifyVPBasicBlock(VPBB)) + for (const VPBlockBase *VPB : Iter) { + BlockNumbering[VPB] = Cnt++; + auto *VPBB = dyn_cast<VPBasicBlock>(VPB); + if (!VPBB) + continue; + if (!verifyVPBasicBlock(VPBB, BlockNumbering)) return false; } diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp index d12624ffb824..a38936644bd3 100644 --- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp +++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp @@ -1302,7 +1302,7 @@ bool VectorCombine::foldSelectShuffle(Instruction &I, bool FromReduction) { for (ShuffleVectorInst *SV : Shuffles) { for (auto U : SV->users()) { ShuffleVectorInst *SSV = dyn_cast<ShuffleVectorInst>(U); - if (SSV && isa<UndefValue>(SSV->getOperand(1))) + if (SSV && isa<UndefValue>(SSV->getOperand(1)) && SSV->getType() == VT) Shuffles.push_back(SSV); } } diff --git a/llvm/tools/llc/llc.cpp b/llvm/tools/llc/llc.cpp index 853a0bd8eb54..f084ee2daa93 100644 --- a/llvm/tools/llc/llc.cpp +++ b/llvm/tools/llc/llc.cpp @@ -359,8 +359,6 @@ int main(int argc, char **argv) { initializeCodeGen(*Registry); initializeLoopStrengthReducePass(*Registry); initializeLowerIntrinsicsPass(*Registry); - initializeEntryExitInstrumenterPass(*Registry); - initializePostInlineEntryExitInstrumenterPass(*Registry); initializeUnreachableBlockElimLegacyPassPass(*Registry); initializeConstantHoistingLegacyPassPass(*Registry); initializeScalarOpts(*Registry); diff --git a/llvm/tools/llvm-ar/llvm-ar.cpp b/llvm/tools/llvm-ar/llvm-ar.cpp index e964dc8256a5..1d4a8e9cd398 100644 --- a/llvm/tools/llvm-ar/llvm-ar.cpp +++ b/llvm/tools/llvm-ar/llvm-ar.cpp @@ -18,10 +18,14 @@ #include "llvm/IR/LLVMContext.h" #include "llvm/Object/Archive.h" #include "llvm/Object/ArchiveWriter.h" +#include "llvm/Object/COFFImportFile.h" +#include "llvm/Object/ELFObjectFile.h" #include "llvm/Object/IRObjectFile.h" #include "llvm/Object/MachO.h" #include "llvm/Object/ObjectFile.h" #include "llvm/Object/SymbolicFile.h" +#include "llvm/Object/TapiFile.h" +#include "llvm/Object/Wasm.h" #include "llvm/Object/XCOFFObjectFile.h" #include "llvm/Support/Chrono.h" #include "llvm/Support/CommandLine.h" @@ -55,6 +59,7 @@ #endif using namespace llvm; +using namespace llvm::object; // The name this program was invoked as. static StringRef ToolName; @@ -82,7 +87,7 @@ static void printArHelp(StringRef ToolName) { =gnu - gnu =darwin - darwin =bsd - bsd - =aix - aix (big archive) + =bigarchive - big archive (AIX OS) --plugin=<string> - ignored for compatibility -h --help - display this help and exit --output - the directory to extract archive members to @@ -91,6 +96,7 @@ static void printArHelp(StringRef ToolName) { =windows - windows --thin - create a thin archive --version - print the version and exit + -X{32|64|32_64|any} - object mode (only for AIX OS) @<file> - read options from <file> OPERATIONS: @@ -184,6 +190,10 @@ static void failIfError(Error E, Twine Context = "") { }); } +static void warn(Twine Message) { + WithColor::warning(errs(), ToolName) << Message << "\n"; +} + static SmallVector<const char *, 256> PositionalArgs; static bool MRI; @@ -209,6 +219,10 @@ enum ArchiveOperation { CreateSymTab ///< Create a symbol table in an existing archive }; +enum class BitModeTy { Bit32, Bit64, Bit32_64, Any, Unknown }; + +static BitModeTy BitMode = BitModeTy::Bit32; + // Modifiers to follow operation to vary behavior static bool AddAfter = false; ///< 'a' modifier static bool AddBefore = false; ///< 'b' modifier @@ -632,6 +646,71 @@ static bool shouldCreateArchive(ArchiveOperation Op) { llvm_unreachable("Missing entry in covered switch."); } +static bool is64BitSymbolicFile(SymbolicFile &Obj) { + if (auto *IRObj = dyn_cast<IRObjectFile>(&Obj)) + return Triple(IRObj->getTargetTriple()).isArch64Bit(); + if (isa<COFFObjectFile>(Obj) || isa<COFFImportFile>(Obj)) + return false; + if (XCOFFObjectFile *XCOFFObj = dyn_cast<XCOFFObjectFile>(&Obj)) + return XCOFFObj->is64Bit(); + if (isa<WasmObjectFile>(Obj)) + return false; + if (TapiFile *Tapi = dyn_cast<TapiFile>(&Obj)) + return Tapi->is64Bit(); + if (MachOObjectFile *MachO = dyn_cast<MachOObjectFile>(&Obj)) + return MachO->is64Bit(); + if (ELFObjectFileBase *ElfO = dyn_cast<ELFObjectFileBase>(&Obj)) + return ElfO->getBytesInAddress() == 8; + + fail("unsupported file format"); +} + +static bool isValidInBitMode(Binary &Bin) { + if (BitMode == BitModeTy::Bit32_64 || BitMode == BitModeTy::Any) + return true; + + if (SymbolicFile *SymFile = dyn_cast<SymbolicFile>(&Bin)) { + bool Is64Bit = is64BitSymbolicFile(*SymFile); + if ((Is64Bit && (BitMode == BitModeTy::Bit32)) || + (!Is64Bit && (BitMode == BitModeTy::Bit64))) + return false; + } + // In AIX "ar", non-object files are always considered to have a valid bit + // mode. + return true; +} + +Expected<std::unique_ptr<Binary>> getAsBinary(const NewArchiveMember &NM, + LLVMContext *Context) { + auto BinaryOrErr = createBinary(NM.Buf->getMemBufferRef(), Context); + if (BinaryOrErr) + return std::move(*BinaryOrErr); + return BinaryOrErr.takeError(); +} + +Expected<std::unique_ptr<Binary>> getAsBinary(const Archive::Child &C, + LLVMContext *Context) { + return C.getAsBinary(Context); +} + +template <class A> static bool isValidInBitMode(const A &Member) { + if (object::Archive::getDefaultKindForHost() != object::Archive::K_AIXBIG) + return true; + LLVMContext Context; + Expected<std::unique_ptr<Binary>> BinOrErr = getAsBinary(Member, &Context); + // In AIX "ar", if there is a non-object file member, it is never ignored due + // to the bit mode setting. + if (!BinOrErr) { + consumeError(BinOrErr.takeError()); + return true; + } + return isValidInBitMode(*BinOrErr.get()); +} + +static void warnInvalidObjectForFileMode(Twine Name) { + warn("'" + Name + "' is not valid with the current object file mode"); +} + static void performReadOperation(ArchiveOperation Operation, object::Archive *OldArchive) { if (Operation == Extract && OldArchive->isThin()) @@ -646,6 +725,10 @@ static void performReadOperation(ArchiveOperation Operation, failIfError(NameOrErr.takeError()); StringRef Name = NameOrErr.get(); + // Check whether to ignore this object due to its bitness. + if (!isValidInBitMode(C)) + continue; + if (Filter) { auto I = find_if(Members, [Name](StringRef Path) { return comparePaths(Name, Path); @@ -722,8 +805,7 @@ static void addChildMember(std::vector<NewArchiveMember> &Members, Members.push_back(std::move(*NMOrErr)); } -static void addMember(std::vector<NewArchiveMember> &Members, - StringRef FileName, bool FlattenArchive = false) { +static NewArchiveMember getArchiveMember(StringRef FileName) { Expected<NewArchiveMember> NMOrErr = NewArchiveMember::getFile(FileName, Deterministic); failIfError(NMOrErr.takeError(), FileName); @@ -743,9 +825,24 @@ static void addMember(std::vector<NewArchiveMember> &Members, PathOrErr ? *PathOrErr : sys::path::convert_to_slash(FileName)); } } + return std::move(*NMOrErr); +} + +static void addMember(std::vector<NewArchiveMember> &Members, + NewArchiveMember &NM) { + Members.push_back(std::move(NM)); +} + +static void addMember(std::vector<NewArchiveMember> &Members, + StringRef FileName, bool FlattenArchive = false) { + NewArchiveMember NM = getArchiveMember(FileName); + if (!isValidInBitMode(NM)) { + warnInvalidObjectForFileMode(FileName); + return; + } if (FlattenArchive && - identify_magic(NMOrErr->Buf->getBuffer()) == file_magic::archive) { + identify_magic(NM.Buf->getBuffer()) == file_magic::archive) { object::Archive &Lib = readLibrary(FileName); // When creating thin archives, only flatten if the member is also thin. if (!Thin || Lib.isThin()) { @@ -757,7 +854,7 @@ static void addMember(std::vector<NewArchiveMember> &Members, return; } } - Members.push_back(std::move(*NMOrErr)); + Members.push_back(std::move(NM)); } enum InsertAction { @@ -773,6 +870,9 @@ static InsertAction computeInsertAction(ArchiveOperation Operation, StringRef Name, std::vector<StringRef>::iterator &Pos, StringMap<int> &MemberCount) { + if (!isValidInBitMode(Member)) + return IA_AddOldMember; + if (Operation == QuickAppend || Members.empty()) return IA_AddOldMember; auto MI = find_if( @@ -834,7 +934,7 @@ computeNewArchiveMembers(ArchiveOperation Operation, Expected<StringRef> NameOrErr = Child.getName(); failIfError(NameOrErr.takeError()); std::string Name = std::string(NameOrErr.get()); - if (comparePaths(Name, RelPos)) { + if (comparePaths(Name, RelPos) && isValidInBitMode(Child)) { assert(AddAfter || AddBefore); if (AddBefore) InsertPos = Pos; @@ -845,12 +945,25 @@ computeNewArchiveMembers(ArchiveOperation Operation, std::vector<StringRef>::iterator MemberI = Members.end(); InsertAction Action = computeInsertAction(Operation, Child, Name, MemberI, MemberCount); + + auto HandleNewMember = [](auto Member, auto &Members, auto &Child) { + NewArchiveMember NM = getArchiveMember(*Member); + if (isValidInBitMode(NM)) + addMember(Members, NM); + else { + // If a new member is not a valid object for the bit mode, add + // the old member back. + warnInvalidObjectForFileMode(*Member); + addChildMember(Members, Child, /*FlattenArchive=*/Thin); + } + }; + switch (Action) { case IA_AddOldMember: addChildMember(Ret, Child, /*FlattenArchive=*/Thin); break; case IA_AddNewMember: - addMember(Ret, *MemberI); + HandleNewMember(MemberI, Ret, Child); break; case IA_Delete: break; @@ -858,7 +971,7 @@ computeNewArchiveMembers(ArchiveOperation Operation, addChildMember(Moved, Child, /*FlattenArchive=*/Thin); break; case IA_MoveNewMember: - addMember(Moved, *MemberI); + HandleNewMember(MemberI, Moved, Child); break; } // When processing elements with the count param, we need to preserve the @@ -1043,8 +1156,7 @@ static int performOperation(ArchiveOperation Operation, } else { if (!Create) { // Produce a warning if we should and we're creating the archive - WithColor::warning(errs(), ToolName) - << "creating " << ArchiveName << "\n"; + warn("creating " + ArchiveName); } } @@ -1155,6 +1267,15 @@ static bool handleGenericOption(StringRef arg) { return false; } +static BitModeTy getBitMode(const char *RawBitMode) { + return StringSwitch<BitModeTy>(RawBitMode) + .Case("32", BitModeTy::Bit32) + .Case("64", BitModeTy::Bit64) + .Case("32_64", BitModeTy::Bit32_64) + .Case("any", BitModeTy::Any) + .Default(BitModeTy::Unknown); +} + static const char *matchFlagWithArg(StringRef Expected, ArrayRef<const char *>::iterator &ArgIt, ArrayRef<const char *> Args) { @@ -1204,6 +1325,14 @@ static int ar_main(int argc, char **argv) { cl::ExpandResponseFiles(Saver, getRspQuoting(makeArrayRef(argv, argc)), Argv); + // Get BitMode from enviorment variable "OBJECT_MODE" for AIX OS, if + // specified. + if (object::Archive::getDefaultKindForHost() == object::Archive::K_AIXBIG) { + BitMode = getBitMode(getenv("OBJECT_MODE")); + if (BitMode == BitModeTy::Unknown) + BitMode = BitModeTy::Bit32; + } + for (ArrayRef<const char *>::iterator ArgIt = Argv.begin(); ArgIt != Argv.end(); ++ArgIt) { const char *Match = nullptr; @@ -1258,6 +1387,19 @@ static int ar_main(int argc, char **argv) { matchFlagWithArg("rsp-quoting", ArgIt, Argv)) continue; + if (strncmp(*ArgIt, "-X", 2) == 0) { + if (object::Archive::getDefaultKindForHost() == + object::Archive::K_AIXBIG) { + Match = *(*ArgIt + 2) != '\0' ? *ArgIt + 2 : *(++ArgIt); + BitMode = getBitMode(Match); + if (BitMode == BitModeTy::Unknown) + fail(Twine("invalid bit mode: ") + Match); + continue; + } else { + fail(Twine(*ArgIt) + " option not supported on non AIX OS"); + } + } + Options += *ArgIt + 1; } diff --git a/llvm/tools/llvm-cov/CodeCoverage.cpp b/llvm/tools/llvm-cov/CodeCoverage.cpp index 6932e9b5bd31..13b6c3002216 100644 --- a/llvm/tools/llvm-cov/CodeCoverage.cpp +++ b/llvm/tools/llvm-cov/CodeCoverage.cpp @@ -436,8 +436,7 @@ std::unique_ptr<CoverageMapping> CodeCoverageTool::load() { CoverageMapping::load(ObjectFilenames, PGOFilename, CoverageArches, ViewOpts.CompilationDirectory); if (Error E = CoverageOrErr.takeError()) { - error("Failed to load coverage: " + toString(std::move(E)), - join(ObjectFilenames.begin(), ObjectFilenames.end(), ", ")); + error("Failed to load coverage: " + toString(std::move(E))); return nullptr; } auto Coverage = std::move(CoverageOrErr.get()); @@ -1053,7 +1052,7 @@ int CodeCoverageTool::doShow(int argc, const char **argv, sys::fs::file_status Status; if (std::error_code EC = sys::fs::status(PGOFilename, Status)) { - error("Could not read profile data!", EC.message()); + error("Could not read profile data!" + EC.message(), PGOFilename); return 1; } @@ -1170,6 +1169,12 @@ int CodeCoverageTool::doReport(int argc, const char **argv, return 1; } + sys::fs::file_status Status; + if (std::error_code EC = sys::fs::status(PGOFilename, Status)) { + error("Could not read profile data!" + EC.message(), PGOFilename); + return 1; + } + auto Coverage = load(); if (!Coverage) return 1; @@ -1219,6 +1224,12 @@ int CodeCoverageTool::doExport(int argc, const char **argv, return 1; } + sys::fs::file_status Status; + if (std::error_code EC = sys::fs::status(PGOFilename, Status)) { + error("Could not read profile data!" + EC.message(), PGOFilename); + return 1; + } + auto Coverage = load(); if (!Coverage) { error("Could not load coverage information"); diff --git a/llvm/tools/llvm-dwarfdump/llvm-dwarfdump.cpp b/llvm/tools/llvm-dwarfdump/llvm-dwarfdump.cpp index f7d3052c8c4d..cc7f353330b1 100644 --- a/llvm/tools/llvm-dwarfdump/llvm-dwarfdump.cpp +++ b/llvm/tools/llvm-dwarfdump/llvm-dwarfdump.cpp @@ -544,7 +544,7 @@ static bool collectObjectSources(ObjectFile &Obj, DWARFContext &DICtx, } // Dedup and order the sources. - llvm::sort(Sources.begin(), Sources.end()); + llvm::sort(Sources); Sources.erase(std::unique(Sources.begin(), Sources.end()), Sources.end()); for (StringRef Name : Sources) diff --git a/llvm/tools/llvm-dwarfutil/DebugInfoLinker.cpp b/llvm/tools/llvm-dwarfutil/DebugInfoLinker.cpp new file mode 100644 index 000000000000..458a58c12ca7 --- /dev/null +++ b/llvm/tools/llvm-dwarfutil/DebugInfoLinker.cpp @@ -0,0 +1,277 @@ +//=== DebugInfoLinker.cpp -------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "DebugInfoLinker.h" +#include "Error.h" +#include "llvm/DWARFLinker/DWARFLinker.h" +#include "llvm/DWARFLinker/DWARFStreamer.h" +#include "llvm/DebugInfo/DWARF/DWARFContext.h" +#include "llvm/DebugInfo/DWARF/DWARFExpression.h" +#include "llvm/Object/ObjectFile.h" +#include <memory> +#include <vector> + +namespace llvm { +namespace dwarfutil { + +// ObjFileAddressMap allows to check whether specified DIE referencing +// dead addresses. It uses tombstone values to determine dead addresses. +// The concrete values of tombstone constants were discussed in +// https://reviews.llvm.org/D81784 and https://reviews.llvm.org/D84825. +// So we use following values as indicators of dead addresses: +// +// bfd: (LowPC == 0) or (LowPC == 1 and HighPC == 1 and DWARF v4 (or less)) +// or ([LowPC, HighPC] is not inside address ranges of .text sections). +// +// maxpc: (LowPC == -1) or (LowPC == -2 and DWARF v4 (or less)) +// That value is assumed to be compatible with +// http://www.dwarfstd.org/ShowIssue.php?issue=200609.1 +// +// exec: [LowPC, HighPC] is not inside address ranges of .text sections +// +// universal: maxpc and bfd +class ObjFileAddressMap : public AddressesMap { +public: + ObjFileAddressMap(DWARFContext &Context, const Options &Options, + object::ObjectFile &ObjFile) + : Opts(Options) { + // Remember addresses of existing text sections. + for (const object::SectionRef &Sect : ObjFile.sections()) { + if (!Sect.isText()) + continue; + const uint64_t Size = Sect.getSize(); + if (Size == 0) + continue; + const uint64_t StartAddr = Sect.getAddress(); + TextAddressRanges.insert({StartAddr, StartAddr + Size}); + } + + // Check CU address ranges for tombstone value. + for (std::unique_ptr<DWARFUnit> &CU : Context.compile_units()) { + Expected<llvm::DWARFAddressRangesVector> ARanges = + CU->getUnitDIE().getAddressRanges(); + if (ARanges) { + for (auto &Range : *ARanges) { + if (!isDeadAddressRange(Range.LowPC, Range.HighPC, CU->getVersion(), + Options.Tombstone, CU->getAddressByteSize())) + DWARFAddressRanges.insert({Range.LowPC, Range.HighPC}, 0); + } + } + } + } + + // should be renamed into has valid address ranges + bool hasValidRelocs() override { return !DWARFAddressRanges.empty(); } + + bool isLiveSubprogram(const DWARFDie &DIE, + CompileUnit::DIEInfo &Info) override { + assert((DIE.getTag() == dwarf::DW_TAG_subprogram || + DIE.getTag() == dwarf::DW_TAG_label) && + "Wrong type of input die"); + + if (Optional<uint64_t> LowPC = + dwarf::toAddress(DIE.find(dwarf::DW_AT_low_pc))) { + if (!isDeadAddress(*LowPC, DIE.getDwarfUnit()->getVersion(), + Opts.Tombstone, + DIE.getDwarfUnit()->getAddressByteSize())) { + Info.AddrAdjust = 0; + Info.InDebugMap = true; + return true; + } + } + + return false; + } + + bool isLiveVariable(const DWARFDie &DIE, + CompileUnit::DIEInfo &Info) override { + assert((DIE.getTag() == dwarf::DW_TAG_variable || + DIE.getTag() == dwarf::DW_TAG_constant) && + "Wrong type of input die"); + + if (Expected<DWARFLocationExpressionsVector> Loc = + DIE.getLocations(dwarf::DW_AT_location)) { + DWARFUnit *U = DIE.getDwarfUnit(); + for (const auto &Entry : *Loc) { + DataExtractor Data(toStringRef(Entry.Expr), + U->getContext().isLittleEndian(), 0); + DWARFExpression Expression(Data, U->getAddressByteSize(), + U->getFormParams().Format); + bool HasLiveAddresses = + any_of(Expression, [&](const DWARFExpression::Operation &Op) { + // TODO: add handling of dwarf::DW_OP_addrx + return !Op.isError() && + (Op.getCode() == dwarf::DW_OP_addr && + !isDeadAddress(Op.getRawOperand(0), U->getVersion(), + Opts.Tombstone, + DIE.getDwarfUnit()->getAddressByteSize())); + }); + + if (HasLiveAddresses) { + Info.AddrAdjust = 0; + Info.InDebugMap = true; + return true; + } + } + } else { + // FIXME: missing DW_AT_location is OK here, but other errors should be + // reported to the user. + consumeError(Loc.takeError()); + } + + return false; + } + + bool applyValidRelocs(MutableArrayRef<char>, uint64_t, bool) override { + // no need to apply relocations to the linked binary. + return false; + } + + RangesTy &getValidAddressRanges() override { return DWARFAddressRanges; }; + + void clear() override { DWARFAddressRanges.clear(); } + + llvm::Expected<uint64_t> relocateIndexedAddr(uint64_t, uint64_t) override { + // should not be called. + return object::createError("no relocations in linked binary"); + } + +protected: + // returns true if specified address range is inside address ranges + // of executable sections. + bool isInsideExecutableSectionsAddressRange(uint64_t LowPC, + Optional<uint64_t> HighPC) { + Optional<AddressRange> Range = + TextAddressRanges.getRangeThatContains(LowPC); + + if (HighPC) + return Range.has_value() && Range->end() >= *HighPC; + + return Range.has_value(); + } + + uint64_t isBFDDeadAddressRange(uint64_t LowPC, Optional<uint64_t> HighPC, + uint16_t Version) { + if (LowPC == 0) + return true; + + if ((Version <= 4) && HighPC && (LowPC == 1 && *HighPC == 1)) + return true; + + return !isInsideExecutableSectionsAddressRange(LowPC, HighPC); + } + + uint64_t isMAXPCDeadAddressRange(uint64_t LowPC, Optional<uint64_t> HighPC, + uint16_t Version, uint8_t AddressByteSize) { + if (Version <= 4 && HighPC) { + if (LowPC == (dwarf::computeTombstoneAddress(AddressByteSize) - 1)) + return true; + } else if (LowPC == dwarf::computeTombstoneAddress(AddressByteSize)) + return true; + + if (!isInsideExecutableSectionsAddressRange(LowPC, HighPC)) + warning("Address referencing invalid text section is not marked with " + "tombstone value"); + + return false; + } + + bool isDeadAddressRange(uint64_t LowPC, Optional<uint64_t> HighPC, + uint16_t Version, TombstoneKind Tombstone, + uint8_t AddressByteSize) { + switch (Tombstone) { + case TombstoneKind::BFD: + return isBFDDeadAddressRange(LowPC, HighPC, Version); + case TombstoneKind::MaxPC: + return isMAXPCDeadAddressRange(LowPC, HighPC, Version, AddressByteSize); + case TombstoneKind::Universal: + return isBFDDeadAddressRange(LowPC, HighPC, Version) || + isMAXPCDeadAddressRange(LowPC, HighPC, Version, AddressByteSize); + case TombstoneKind::Exec: + return !isInsideExecutableSectionsAddressRange(LowPC, HighPC); + } + + llvm_unreachable("Unknown tombstone value"); + } + + bool isDeadAddress(uint64_t LowPC, uint16_t Version, TombstoneKind Tombstone, + uint8_t AddressByteSize) { + return isDeadAddressRange(LowPC, None, Version, Tombstone, AddressByteSize); + } + +private: + RangesTy DWARFAddressRanges; + AddressRanges TextAddressRanges; + const Options &Opts; +}; + +bool linkDebugInfo(object::ObjectFile &File, const Options &Options, + raw_pwrite_stream &OutStream) { + + auto ReportWarn = [&](const Twine &Message, StringRef Context, + const DWARFDie *Die) { + warning(Message, Context); + + if (!Options.Verbose || !Die) + return; + + DIDumpOptions DumpOpts; + DumpOpts.ChildRecurseDepth = 0; + DumpOpts.Verbose = Options.Verbose; + + WithColor::note() << " in DIE:\n"; + Die->dump(errs(), /*Indent=*/6, DumpOpts); + }; + auto ReportErr = [&](const Twine &Message, StringRef Context, + const DWARFDie *) { + WithColor::error(errs(), Context) << Message << '\n'; + }; + + // Create output streamer. + DwarfStreamer OutStreamer(OutputFileType::Object, OutStream, nullptr, + ReportWarn, ReportWarn); + if (!OutStreamer.init(File.makeTriple(), "")) + return false; + + // Create DWARF linker. + DWARFLinker DebugInfoLinker(&OutStreamer, DwarfLinkerClient::LLD); + + DebugInfoLinker.setEstimatedObjfilesAmount(1); + DebugInfoLinker.setAccelTableKind(DwarfLinkerAccelTableKind::None); + DebugInfoLinker.setErrorHandler(ReportErr); + DebugInfoLinker.setWarningHandler(ReportWarn); + DebugInfoLinker.setNumThreads(Options.NumThreads); + DebugInfoLinker.setNoODR(!Options.DoODRDeduplication); + DebugInfoLinker.setVerbosity(Options.Verbose); + DebugInfoLinker.setUpdate(!Options.DoGarbageCollection); + + std::vector<std::unique_ptr<DWARFFile>> ObjectsForLinking(1); + std::vector<std::unique_ptr<AddressesMap>> AddresssMapForLinking(1); + std::vector<std::string> EmptyWarnings; + + std::unique_ptr<DWARFContext> Context = DWARFContext::create(File); + + // Add object files to the DWARFLinker. + AddresssMapForLinking[0] = + std::make_unique<ObjFileAddressMap>(*Context, Options, File); + + ObjectsForLinking[0] = std::make_unique<DWARFFile>( + File.getFileName(), &*Context, AddresssMapForLinking[0].get(), + EmptyWarnings); + + for (size_t I = 0; I < ObjectsForLinking.size(); I++) + DebugInfoLinker.addObjectFile(*ObjectsForLinking[I]); + + // Link debug info. + DebugInfoLinker.link(); + OutStreamer.finish(); + return true; +} + +} // end of namespace dwarfutil +} // end of namespace llvm diff --git a/llvm/tools/llvm-dwarfutil/DebugInfoLinker.h b/llvm/tools/llvm-dwarfutil/DebugInfoLinker.h new file mode 100644 index 000000000000..e95c83cb9609 --- /dev/null +++ b/llvm/tools/llvm-dwarfutil/DebugInfoLinker.h @@ -0,0 +1,31 @@ +//===- DebugInfoLinker.h ----------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TOOLS_LLVM_DWARFUTIL_DEBUGINFOLINKER_H +#define LLVM_TOOLS_LLVM_DWARFUTIL_DEBUGINFOLINKER_H + +#include "Options.h" +#include "llvm/Object/Archive.h" +#include "llvm/Object/ELFObjectFile.h" +#include "llvm/Object/ObjectFile.h" + +namespace llvm { +namespace dwarfutil { + +inline bool isDebugSection(StringRef SecName) { + return SecName.startswith(".debug") || SecName.startswith(".zdebug") || + SecName == ".gdb_index"; +} + +bool linkDebugInfo(object::ObjectFile &file, const Options &Options, + raw_pwrite_stream &OutStream); + +} // end of namespace dwarfutil +} // end of namespace llvm + +#endif // LLVM_TOOLS_LLVM_DWARFUTIL_DEBUGINFOLINKER_H diff --git a/llvm/tools/llvm-dwarfutil/Error.h b/llvm/tools/llvm-dwarfutil/Error.h new file mode 100644 index 000000000000..9ef288d4f657 --- /dev/null +++ b/llvm/tools/llvm-dwarfutil/Error.h @@ -0,0 +1,44 @@ +//===- Error.h --------------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TOOLS_LLVM_DWARFUTIL_ERROR_H +#define LLVM_TOOLS_LLVM_DWARFUTIL_ERROR_H + +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/StringSet.h" +#include "llvm/ADT/Triple.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/Format.h" +#include "llvm/Support/WithColor.h" +#include "llvm/Support/raw_ostream.h" + +namespace llvm { +namespace dwarfutil { + +inline void error(Error Err, StringRef Prefix = "") { + handleAllErrors(std::move(Err), [&](ErrorInfoBase &Info) { + WithColor::error(errs(), Prefix) << Info.message() << '\n'; + }); + std::exit(EXIT_FAILURE); +} + +inline void warning(const Twine &Message, StringRef Prefix = "") { + WithColor::warning(errs(), Prefix) << Message << '\n'; +} + +inline void verbose(const Twine &Message, bool Verbose) { + if (Verbose) + outs() << Message << '\n'; +} + +} // end of namespace dwarfutil +} // end of namespace llvm + +#endif // LLVM_TOOLS_LLVM_DWARFUTIL_ERROR_H diff --git a/llvm/tools/llvm-dwarfutil/Options.h b/llvm/tools/llvm-dwarfutil/Options.h new file mode 100644 index 000000000000..c993200ceb4b --- /dev/null +++ b/llvm/tools/llvm-dwarfutil/Options.h @@ -0,0 +1,46 @@ +//===- Options.h ------------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TOOLS_LLVM_DWARFUTIL_OPTIONS_H +#define LLVM_TOOLS_LLVM_DWARFUTIL_OPTIONS_H + +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/StringRef.h" + +namespace llvm { +namespace dwarfutil { + +/// The kind of tombstone value. +enum class TombstoneKind { + BFD, /// 0/[1:1]. Bfd default. + MaxPC, /// -1/-2. Assumed to match with + /// http://www.dwarfstd.org/ShowIssue.php?issue=200609.1. + Universal, /// both: BFD + MaxPC + Exec, /// match with address range of executable sections. +}; + +struct Options { + std::string InputFileName; + std::string OutputFileName; + bool DoGarbageCollection = false; + bool DoODRDeduplication = false; + bool BuildSeparateDebugFile = false; + TombstoneKind Tombstone = TombstoneKind::Universal; + bool Verbose = false; + int NumThreads = 0; + bool Verify = false; + + std::string getSeparateDebugFileName() const { + return OutputFileName + ".debug"; + } +}; + +} // namespace dwarfutil +} // namespace llvm + +#endif // LLVM_TOOLS_LLVM_DWARFUTIL_OPTIONS_H diff --git a/llvm/tools/llvm-dwarfutil/Options.td b/llvm/tools/llvm-dwarfutil/Options.td new file mode 100644 index 000000000000..4ab1b51d808d --- /dev/null +++ b/llvm/tools/llvm-dwarfutil/Options.td @@ -0,0 +1,65 @@ +include "llvm/Option/OptParser.td" + +multiclass BB<string name, string help1, string help2> { + def NAME: Flag<["--"], name>, HelpText<help1>; + def no_ # NAME: Flag<["--"], "no-" # name>, HelpText<help2>; +} + +def help : Flag<["--"], "help">, + HelpText<"Prints this help output">; + +def h : Flag<["-"], "h">, + Alias<help>, + HelpText<"Alias for --help">; + +defm odr_deduplication : BB<"odr-deduplication", + "Do ODR deduplication for debug types(default)", + "Don`t do ODR deduplication for debug types">; + +def odr : Flag<["--"], "odr">, + Alias<odr_deduplication>, + HelpText<"Alias for --odr-deduplication">; + +def no_odr : Flag<["--"], "no-odr">, + Alias<no_odr_deduplication>, + HelpText<"Alias for --no-odr-deduplication">; + +defm garbage_collection : BB<"garbage-collection", + "Do garbage collection for debug info(default)", + "Don`t do garbage collection for debug info">; + +defm separate_debug_file : BB<"separate-debug-file", + "Create two output files: file w/o debug tables and file with debug tables", + "Create single output file, containing debug tables(default)">; + +def tombstone: Separate<["--", "-"], "tombstone">, + MetaVarName<"[bfd,maxpc,exec,universal]">, + HelpText<"Tombstone value used as a marker of invalid address(default: universal)\n" + " =bfd - Zero for all addresses and [1,1] for DWARF v4 (or less) address ranges and exec\n" + " =maxpc - Minus 1 for all addresses and minus 2 for DWARF v4 (or less) address ranges\n" + " =exec - Match with address ranges of executable sections\n" + " =universal - Both: bfd and maxpc" + >; +def: Joined<["--", "-"], "tombstone=">, Alias<tombstone>; + +def threads: Separate<["--", "-"], "num-threads">, + MetaVarName<"<threads>">, + HelpText<"Number of available threads for multi-threaded execution. " + "Defaults to the number of cores on the current machine">; + +def: Separate<["-"], "j">, + Alias<threads>, + HelpText<"Alias for --num-threads">; + +def verbose : Flag<["--"], "verbose">, + HelpText<"Enable verbose logging">; + +def verify : Flag<["--"], "verify">, + HelpText<"Run the DWARF verifier on the resulting debug info">; + +def version : Flag<["--"], "version">, + HelpText<"Print the version and exit">; + +def V : Flag<["-"], "V">, + Alias<version>, + HelpText<"Alias for --version">; diff --git a/llvm/tools/llvm-dwarfutil/llvm-dwarfutil.cpp b/llvm/tools/llvm-dwarfutil/llvm-dwarfutil.cpp new file mode 100644 index 000000000000..e77c82e0fad9 --- /dev/null +++ b/llvm/tools/llvm-dwarfutil/llvm-dwarfutil.cpp @@ -0,0 +1,527 @@ +//=== llvm-dwarfutil.cpp --------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "DebugInfoLinker.h" +#include "Error.h" +#include "Options.h" +#include "llvm/DebugInfo/DWARF/DWARFContext.h" +#include "llvm/DebugInfo/DWARF/DWARFVerifier.h" +#include "llvm/MC/MCTargetOptionsCommandFlags.h" +#include "llvm/ObjCopy/CommonConfig.h" +#include "llvm/ObjCopy/ConfigManager.h" +#include "llvm/ObjCopy/ObjCopy.h" +#include "llvm/Option/Arg.h" +#include "llvm/Option/ArgList.h" +#include "llvm/Option/Option.h" +#include "llvm/Support/CRC.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/FileUtilities.h" +#include "llvm/Support/InitLLVM.h" +#include "llvm/Support/PrettyStackTrace.h" +#include "llvm/Support/Process.h" +#include "llvm/Support/Signals.h" +#include "llvm/Support/TargetSelect.h" + +using namespace llvm; +using namespace object; + +namespace { +enum ID { + OPT_INVALID = 0, // This is not an option ID. +#define OPTION(PREFIX, NAME, ID, KIND, GROUP, ALIAS, ALIASARGS, FLAGS, PARAM, \ + HELPTEXT, METAVAR, VALUES) \ + OPT_##ID, +#include "Options.inc" +#undef OPTION +}; + +#define PREFIX(NAME, VALUE) const char *const NAME[] = VALUE; +#include "Options.inc" +#undef PREFIX + +const opt::OptTable::Info InfoTable[] = { +#define OPTION(PREFIX, NAME, ID, KIND, GROUP, ALIAS, ALIASARGS, FLAGS, PARAM, \ + HELPTEXT, METAVAR, VALUES) \ + { \ + PREFIX, NAME, HELPTEXT, \ + METAVAR, OPT_##ID, opt::Option::KIND##Class, \ + PARAM, FLAGS, OPT_##GROUP, \ + OPT_##ALIAS, ALIASARGS, VALUES}, +#include "Options.inc" +#undef OPTION +}; + +class DwarfutilOptTable : public opt::OptTable { +public: + DwarfutilOptTable() : OptTable(InfoTable) {} +}; +} // namespace + +namespace llvm { +namespace dwarfutil { + +std::string ToolName; + +static mc::RegisterMCTargetOptionsFlags MOF; + +static Error validateAndSetOptions(opt::InputArgList &Args, Options &Options) { + auto UnknownArgs = Args.filtered(OPT_UNKNOWN); + if (!UnknownArgs.empty()) + return createStringError( + std::errc::invalid_argument, + formatv("unknown option: {0}", (*UnknownArgs.begin())->getSpelling()) + .str() + .c_str()); + + std::vector<std::string> InputFiles = Args.getAllArgValues(OPT_INPUT); + if (InputFiles.size() != 2) + return createStringError( + std::errc::invalid_argument, + formatv("exactly two positional arguments expected, {0} provided", + InputFiles.size()) + .str() + .c_str()); + + Options.InputFileName = InputFiles[0]; + Options.OutputFileName = InputFiles[1]; + + Options.BuildSeparateDebugFile = + Args.hasFlag(OPT_separate_debug_file, OPT_no_separate_debug_file, false); + Options.DoODRDeduplication = + Args.hasFlag(OPT_odr_deduplication, OPT_no_odr_deduplication, true); + Options.DoGarbageCollection = + Args.hasFlag(OPT_garbage_collection, OPT_no_garbage_collection, true); + Options.Verbose = Args.hasArg(OPT_verbose); + Options.Verify = Args.hasArg(OPT_verify); + + if (opt::Arg *NumThreads = Args.getLastArg(OPT_threads)) + Options.NumThreads = atoi(NumThreads->getValue()); + else + Options.NumThreads = 0; // Use all available hardware threads + + if (opt::Arg *Tombstone = Args.getLastArg(OPT_tombstone)) { + StringRef S = Tombstone->getValue(); + if (S == "bfd") + Options.Tombstone = TombstoneKind::BFD; + else if (S == "maxpc") + Options.Tombstone = TombstoneKind::MaxPC; + else if (S == "universal") + Options.Tombstone = TombstoneKind::Universal; + else if (S == "exec") + Options.Tombstone = TombstoneKind::Exec; + else + return createStringError( + std::errc::invalid_argument, + formatv("unknown tombstone value: '{0}'", S).str().c_str()); + } + + if (Options.Verbose) { + if (Options.NumThreads != 1 && Args.hasArg(OPT_threads)) + warning("--num-threads set to 1 because verbose mode is specified"); + + Options.NumThreads = 1; + } + + if (Options.DoODRDeduplication && Args.hasArg(OPT_odr_deduplication) && + !Options.DoGarbageCollection) + return createStringError( + std::errc::invalid_argument, + "cannot use --odr-deduplication without --garbage-collection"); + + if (Options.BuildSeparateDebugFile && Options.OutputFileName == "-") + return createStringError( + std::errc::invalid_argument, + "unable to write to stdout when --separate-debug-file specified"); + + return Error::success(); +} + +static Error setConfigToAddNewDebugSections(objcopy::ConfigManager &Config, + ObjectFile &ObjFile) { + // Add new debug sections. + for (SectionRef Sec : ObjFile.sections()) { + Expected<StringRef> SecName = Sec.getName(); + if (!SecName) + return SecName.takeError(); + + if (isDebugSection(*SecName)) { + Expected<StringRef> SecData = Sec.getContents(); + if (!SecData) + return SecData.takeError(); + + Config.Common.AddSection.emplace_back(objcopy::NewSectionInfo( + *SecName, MemoryBuffer::getMemBuffer(*SecData, *SecName, false))); + } + } + + return Error::success(); +} + +static Error verifyOutput(const Options &Opts) { + if (Opts.OutputFileName == "-") { + warning("verification skipped because writing to stdout"); + return Error::success(); + } + + std::string FileName = Opts.BuildSeparateDebugFile + ? Opts.getSeparateDebugFileName() + : Opts.OutputFileName; + Expected<OwningBinary<Binary>> BinOrErr = createBinary(FileName); + if (!BinOrErr) + return createFileError(FileName, BinOrErr.takeError()); + + if (BinOrErr->getBinary()->isObject()) { + if (ObjectFile *Obj = static_cast<ObjectFile *>(BinOrErr->getBinary())) { + verbose("Verifying DWARF...", Opts.Verbose); + std::unique_ptr<DWARFContext> DICtx = DWARFContext::create(*Obj); + DIDumpOptions DumpOpts; + if (!DICtx->verify(Opts.Verbose ? outs() : nulls(), + DumpOpts.noImplicitRecursion())) + return createFileError(FileName, + createError("output verification failed")); + + return Error::success(); + } + } + + // The file "FileName" was created by this utility in the previous steps + // (i.e. it is already known that it should pass the isObject check). + // If the createBinary() function does not return an error, the isObject + // check should also be successful. + llvm_unreachable( + formatv("tool unexpectedly did not emit a supported object file: '{0}'", + FileName) + .str() + .c_str()); +} + +class raw_crc_ostream : public raw_ostream { +public: + explicit raw_crc_ostream(raw_ostream &O) : OS(O) { SetUnbuffered(); } + + void reserveExtraSpace(uint64_t ExtraSize) override { + OS.reserveExtraSpace(ExtraSize); + } + + uint32_t getCRC32() { return CRC32; } + +protected: + raw_ostream &OS; + uint32_t CRC32 = 0; + + /// See raw_ostream::write_impl. + void write_impl(const char *Ptr, size_t Size) override { + CRC32 = crc32( + CRC32, ArrayRef<uint8_t>(reinterpret_cast<const uint8_t *>(Ptr), Size)); + OS.write(Ptr, Size); + } + + /// Return the current position within the stream, not counting the bytes + /// currently in the buffer. + uint64_t current_pos() const override { return OS.tell(); } +}; + +static Expected<uint32_t> saveSeparateDebugInfo(const Options &Opts, + ObjectFile &InputFile) { + objcopy::ConfigManager Config; + std::string OutputFilename = Opts.getSeparateDebugFileName(); + Config.Common.InputFilename = Opts.InputFileName; + Config.Common.OutputFilename = OutputFilename; + Config.Common.OnlyKeepDebug = true; + uint32_t WrittenFileCRC32 = 0; + + if (Error Err = writeToOutput( + Config.Common.OutputFilename, [&](raw_ostream &OutFile) -> Error { + raw_crc_ostream CRCBuffer(OutFile); + if (Error Err = objcopy::executeObjcopyOnBinary(Config, InputFile, + CRCBuffer)) + return Err; + + WrittenFileCRC32 = CRCBuffer.getCRC32(); + return Error::success(); + })) + return std::move(Err); + + return WrittenFileCRC32; +} + +static Error saveNonDebugInfo(const Options &Opts, ObjectFile &InputFile, + uint32_t GnuDebugLinkCRC32) { + objcopy::ConfigManager Config; + Config.Common.InputFilename = Opts.InputFileName; + Config.Common.OutputFilename = Opts.OutputFileName; + Config.Common.StripDebug = true; + std::string SeparateDebugFileName = Opts.getSeparateDebugFileName(); + Config.Common.AddGnuDebugLink = sys::path::filename(SeparateDebugFileName); + Config.Common.GnuDebugLinkCRC32 = GnuDebugLinkCRC32; + + if (Error Err = writeToOutput( + Config.Common.OutputFilename, [&](raw_ostream &OutFile) -> Error { + if (Error Err = + objcopy::executeObjcopyOnBinary(Config, InputFile, OutFile)) + return Err; + + return Error::success(); + })) + return Err; + + return Error::success(); +} + +static Error splitDebugIntoSeparateFile(const Options &Opts, + ObjectFile &InputFile) { + Expected<uint32_t> SeparateDebugFileCRC32OrErr = + saveSeparateDebugInfo(Opts, InputFile); + if (!SeparateDebugFileCRC32OrErr) + return SeparateDebugFileCRC32OrErr.takeError(); + + if (Error Err = + saveNonDebugInfo(Opts, InputFile, *SeparateDebugFileCRC32OrErr)) + return Err; + + return Error::success(); +} + +using DebugInfoBits = SmallString<10000>; + +static Error addSectionsFromLinkedData(objcopy::ConfigManager &Config, + ObjectFile &InputFile, + DebugInfoBits &LinkedDebugInfoBits) { + if (dyn_cast<ELFObjectFile<ELF32LE>>(&InputFile)) { + Expected<ELFObjectFile<ELF32LE>> MemFile = ELFObjectFile<ELF32LE>::create( + MemoryBufferRef(LinkedDebugInfoBits, "")); + if (!MemFile) + return MemFile.takeError(); + + if (Error Err = setConfigToAddNewDebugSections(Config, *MemFile)) + return Err; + } else if (dyn_cast<ELFObjectFile<ELF64LE>>(&InputFile)) { + Expected<ELFObjectFile<ELF64LE>> MemFile = ELFObjectFile<ELF64LE>::create( + MemoryBufferRef(LinkedDebugInfoBits, "")); + if (!MemFile) + return MemFile.takeError(); + + if (Error Err = setConfigToAddNewDebugSections(Config, *MemFile)) + return Err; + } else if (dyn_cast<ELFObjectFile<ELF32BE>>(&InputFile)) { + Expected<ELFObjectFile<ELF32BE>> MemFile = ELFObjectFile<ELF32BE>::create( + MemoryBufferRef(LinkedDebugInfoBits, "")); + if (!MemFile) + return MemFile.takeError(); + + if (Error Err = setConfigToAddNewDebugSections(Config, *MemFile)) + return Err; + } else if (dyn_cast<ELFObjectFile<ELF64BE>>(&InputFile)) { + Expected<ELFObjectFile<ELF64BE>> MemFile = ELFObjectFile<ELF64BE>::create( + MemoryBufferRef(LinkedDebugInfoBits, "")); + if (!MemFile) + return MemFile.takeError(); + + if (Error Err = setConfigToAddNewDebugSections(Config, *MemFile)) + return Err; + } else + return createStringError(std::errc::invalid_argument, + "unsupported file format"); + + return Error::success(); +} + +static Expected<uint32_t> +saveSeparateLinkedDebugInfo(const Options &Opts, ObjectFile &InputFile, + DebugInfoBits LinkedDebugInfoBits) { + objcopy::ConfigManager Config; + std::string OutputFilename = Opts.getSeparateDebugFileName(); + Config.Common.InputFilename = Opts.InputFileName; + Config.Common.OutputFilename = OutputFilename; + Config.Common.StripDebug = true; + Config.Common.OnlyKeepDebug = true; + uint32_t WrittenFileCRC32 = 0; + + if (Error Err = + addSectionsFromLinkedData(Config, InputFile, LinkedDebugInfoBits)) + return std::move(Err); + + if (Error Err = writeToOutput( + Config.Common.OutputFilename, [&](raw_ostream &OutFile) -> Error { + raw_crc_ostream CRCBuffer(OutFile); + + if (Error Err = objcopy::executeObjcopyOnBinary(Config, InputFile, + CRCBuffer)) + return Err; + + WrittenFileCRC32 = CRCBuffer.getCRC32(); + return Error::success(); + })) + return std::move(Err); + + return WrittenFileCRC32; +} + +static Error saveSingleLinkedDebugInfo(const Options &Opts, + ObjectFile &InputFile, + DebugInfoBits LinkedDebugInfoBits) { + objcopy::ConfigManager Config; + + Config.Common.InputFilename = Opts.InputFileName; + Config.Common.OutputFilename = Opts.OutputFileName; + Config.Common.StripDebug = true; + if (Error Err = + addSectionsFromLinkedData(Config, InputFile, LinkedDebugInfoBits)) + return Err; + + if (Error Err = writeToOutput( + Config.Common.OutputFilename, [&](raw_ostream &OutFile) -> Error { + return objcopy::executeObjcopyOnBinary(Config, InputFile, OutFile); + })) + return Err; + + return Error::success(); +} + +static Error saveLinkedDebugInfo(const Options &Opts, ObjectFile &InputFile, + DebugInfoBits LinkedDebugInfoBits) { + if (Opts.BuildSeparateDebugFile) { + Expected<uint32_t> SeparateDebugFileCRC32OrErr = + saveSeparateLinkedDebugInfo(Opts, InputFile, + std::move(LinkedDebugInfoBits)); + if (!SeparateDebugFileCRC32OrErr) + return SeparateDebugFileCRC32OrErr.takeError(); + + if (Error Err = + saveNonDebugInfo(Opts, InputFile, *SeparateDebugFileCRC32OrErr)) + return Err; + } else { + if (Error Err = saveSingleLinkedDebugInfo(Opts, InputFile, + std::move(LinkedDebugInfoBits))) + return Err; + } + + return Error::success(); +} + +static Error saveCopyOfFile(const Options &Opts, ObjectFile &InputFile) { + objcopy::ConfigManager Config; + + Config.Common.InputFilename = Opts.InputFileName; + Config.Common.OutputFilename = Opts.OutputFileName; + + if (Error Err = writeToOutput( + Config.Common.OutputFilename, [&](raw_ostream &OutFile) -> Error { + return objcopy::executeObjcopyOnBinary(Config, InputFile, OutFile); + })) + return Err; + + return Error::success(); +} + +static Error applyCLOptions(const struct Options &Opts, ObjectFile &InputFile) { + if (Opts.DoGarbageCollection) { + verbose("Do garbage collection for debug info ...", Opts.Verbose); + + DebugInfoBits LinkedDebugInfo; + raw_svector_ostream OutStream(LinkedDebugInfo); + + if (linkDebugInfo(InputFile, Opts, OutStream)) { + if (Error Err = + saveLinkedDebugInfo(Opts, InputFile, std::move(LinkedDebugInfo))) + return Err; + + return Error::success(); + } + + return createStringError(std::errc::invalid_argument, + "possible broken debug info"); + } else if (Opts.BuildSeparateDebugFile) { + if (Error Err = splitDebugIntoSeparateFile(Opts, InputFile)) + return Err; + } else { + if (Error Err = saveCopyOfFile(Opts, InputFile)) + return Err; + } + + return Error::success(); +} + +} // end of namespace dwarfutil +} // end of namespace llvm + +int main(int Argc, char const *Argv[]) { + using namespace dwarfutil; + + InitLLVM X(Argc, Argv); + ToolName = Argv[0]; + + // Parse arguments. + DwarfutilOptTable T; + unsigned MAI; + unsigned MAC; + ArrayRef<const char *> ArgsArr = makeArrayRef(Argv + 1, Argc - 1); + opt::InputArgList Args = T.ParseArgs(ArgsArr, MAI, MAC); + + if (Args.hasArg(OPT_help) || Args.size() == 0) { + T.printHelp( + outs(), (ToolName + " [options] <input file> <output file>").c_str(), + "llvm-dwarfutil is a tool to copy and manipulate debug info", false); + return EXIT_SUCCESS; + } + + if (Args.hasArg(OPT_version)) { + cl::PrintVersionMessage(); + return EXIT_SUCCESS; + } + + Options Opts; + if (Error Err = validateAndSetOptions(Args, Opts)) + error(std::move(Err), dwarfutil::ToolName); + + InitializeAllTargets(); + InitializeAllTargetMCs(); + InitializeAllTargetInfos(); + InitializeAllAsmPrinters(); + InitializeAllAsmParsers(); + + ErrorOr<std::unique_ptr<MemoryBuffer>> BuffOrErr = + MemoryBuffer::getFileOrSTDIN(Opts.InputFileName); + if (BuffOrErr.getError()) + error(createFileError(Opts.InputFileName, BuffOrErr.getError())); + + Expected<std::unique_ptr<Binary>> BinOrErr = + object::createBinary(**BuffOrErr); + if (!BinOrErr) + error(createFileError(Opts.InputFileName, BinOrErr.takeError())); + + Expected<FilePermissionsApplier> PermsApplierOrErr = + FilePermissionsApplier::create(Opts.InputFileName); + if (!PermsApplierOrErr) + error(createFileError(Opts.InputFileName, PermsApplierOrErr.takeError())); + + if (!(*BinOrErr)->isObject()) + error(createFileError(Opts.InputFileName, + createError("unsupported input file"))); + + if (Error Err = + applyCLOptions(Opts, *static_cast<ObjectFile *>((*BinOrErr).get()))) + error(createFileError(Opts.InputFileName, std::move(Err))); + + BinOrErr->reset(); + BuffOrErr->reset(); + + if (Error Err = PermsApplierOrErr->apply(Opts.OutputFileName)) + error(std::move(Err)); + + if (Opts.BuildSeparateDebugFile) + if (Error Err = PermsApplierOrErr->apply(Opts.getSeparateDebugFileName())) + error(std::move(Err)); + + if (Opts.Verify) { + if (Error Err = verifyOutput(Opts)) + error(std::move(Err)); + } + + return EXIT_SUCCESS; +} diff --git a/llvm/tools/llvm-mc/llvm-mc.cpp b/llvm/tools/llvm-mc/llvm-mc.cpp index 3e737b9fbaa0..aa380d3fe9bc 100644 --- a/llvm/tools/llvm-mc/llvm-mc.cpp +++ b/llvm/tools/llvm-mc/llvm-mc.cpp @@ -77,9 +77,7 @@ static cl::opt<DebugCompressionType> CompressDebugSections( cl::desc("Choose DWARF debug sections compression:"), cl::values(clEnumValN(DebugCompressionType::None, "none", "No compression"), clEnumValN(DebugCompressionType::Z, "zlib", - "Use zlib compression"), - clEnumValN(DebugCompressionType::GNU, "zlib-gnu", - "Use zlib-gnu compression (deprecated)")), + "Use zlib compression")), cl::cat(MCCategory)); static cl::opt<bool> diff --git a/llvm/tools/llvm-objdump/llvm-objdump.cpp b/llvm/tools/llvm-objdump/llvm-objdump.cpp index 1245f9e18206..9e4fa7c0d9dd 100644 --- a/llvm/tools/llvm-objdump/llvm-objdump.cpp +++ b/llvm/tools/llvm-objdump/llvm-objdump.cpp @@ -1131,7 +1131,21 @@ static void emitPostInstructionInfo(formatted_raw_ostream &FOS, FOS.flush(); } -static void disassembleObject(const Target *TheTarget, const ObjectFile &Obj, +static void createFakeELFSections(ObjectFile &Obj) { + assert(Obj.isELF()); + if (auto *Elf32LEObj = dyn_cast<ELF32LEObjectFile>(&Obj)) + Elf32LEObj->createFakeSections(); + else if (auto *Elf64LEObj = dyn_cast<ELF64LEObjectFile>(&Obj)) + Elf64LEObj->createFakeSections(); + else if (auto *Elf32BEObj = dyn_cast<ELF32BEObjectFile>(&Obj)) + Elf32BEObj->createFakeSections(); + else if (auto *Elf64BEObj = cast<ELF64BEObjectFile>(&Obj)) + Elf64BEObj->createFakeSections(); + else + llvm_unreachable("Unsupported binary format"); +} + +static void disassembleObject(const Target *TheTarget, ObjectFile &Obj, MCContext &Ctx, MCDisassembler *PrimaryDisAsm, MCDisassembler *SecondaryDisAsm, const MCInstrAnalysis *MIA, MCInstPrinter *IP, @@ -1198,6 +1212,9 @@ static void disassembleObject(const Target *TheTarget, const ObjectFile &Obj, if (Obj.isWasm()) addMissingWasmCodeSymbols(cast<WasmObjectFile>(Obj), AllSymbols); + if (Obj.isELF() && Obj.sections().empty()) + createFakeELFSections(Obj); + BumpPtrAllocator A; StringSaver Saver(A); addPltEntries(Obj, AllSymbols, Saver); @@ -1261,6 +1278,25 @@ static void disassembleObject(const Target *TheTarget, const ObjectFile &Obj, LLVM_DEBUG(LVP.dump()); + std::unordered_map<uint64_t, BBAddrMap> AddrToBBAddrMap; + auto ReadBBAddrMap = [&](Optional<unsigned> SectionIndex = None) { + AddrToBBAddrMap.clear(); + if (const auto *Elf = dyn_cast<ELFObjectFileBase>(&Obj)) { + auto BBAddrMapsOrErr = Elf->readBBAddrMap(SectionIndex); + if (!BBAddrMapsOrErr) + reportWarning(toString(BBAddrMapsOrErr.takeError()), + Obj.getFileName()); + for (auto &FunctionBBAddrMap : *BBAddrMapsOrErr) + AddrToBBAddrMap.emplace(FunctionBBAddrMap.Addr, + std::move(FunctionBBAddrMap)); + } + }; + + // For non-relocatable objects, Read all LLVM_BB_ADDR_MAP sections into a + // single mapping, since they don't have any conflicts. + if (SymbolizeOperands && !Obj.isRelocatableObject()) + ReadBBAddrMap(); + for (const SectionRef &Section : ToolSectionFilter(Obj)) { if (FilterSections.empty() && !DisassembleAll && (!Section.isText() || Section.isVirtual())) @@ -1271,19 +1307,10 @@ static void disassembleObject(const Target *TheTarget, const ObjectFile &Obj, if (!SectSize) continue; - std::unordered_map<uint64_t, BBAddrMap> AddrToBBAddrMap; - if (SymbolizeOperands) { - if (auto *Elf = dyn_cast<ELFObjectFileBase>(&Obj)) { - // Read the BB-address-map corresponding to this section, if present. - auto SectionBBAddrMapsOrErr = Elf->readBBAddrMap(Section.getIndex()); - if (!SectionBBAddrMapsOrErr) - reportWarning(toString(SectionBBAddrMapsOrErr.takeError()), - Obj.getFileName()); - for (auto &FunctionBBAddrMap : *SectionBBAddrMapsOrErr) - AddrToBBAddrMap.emplace(FunctionBBAddrMap.Addr, - std::move(FunctionBBAddrMap)); - } - } + // For relocatable object files, read the LLVM_BB_ADDR_MAP section + // corresponding to this section, if present. + if (SymbolizeOperands && Obj.isRelocatableObject()) + ReadBBAddrMap(Section.getIndex()); // Get the list of all the symbols in this section. SectionSymbolsTy &Symbols = AllSymbols[Section]; @@ -1688,7 +1715,7 @@ static void disassembleObject(const Target *TheTarget, const ObjectFile &Obj, reportWarning("failed to disassemble missing symbol " + Sym, FileName); } -static void disassembleObject(const ObjectFile *Obj, bool InlineRelocs) { +static void disassembleObject(ObjectFile *Obj, bool InlineRelocs) { const Target *TheTarget = getTarget(Obj); // Package up features to be passed to target/subtarget @@ -1890,7 +1917,7 @@ static size_t getMaxSectionNameWidth(const ObjectFile &Obj) { return MaxWidth; } -void objdump::printSectionHeaders(const ObjectFile &Obj) { +void objdump::printSectionHeaders(ObjectFile &Obj) { size_t NameWidth = getMaxSectionNameWidth(Obj); size_t AddressWidth = 2 * Obj.getBytesInAddress(); bool HasLMAColumn = shouldDisplayLMA(Obj); @@ -1903,6 +1930,9 @@ void objdump::printSectionHeaders(const ObjectFile &Obj) { outs() << "Idx " << left_justify("Name", NameWidth) << " Size " << left_justify("VMA", AddressWidth) << " Type\n"; + if (Obj.isELF() && Obj.sections().empty()) + createFakeELFSections(Obj); + uint64_t Idx; for (const SectionRef &Section : ToolSectionFilter(Obj, &Idx)) { StringRef Name = unwrapOrError(Section.getName(), Obj.getFileName()); diff --git a/llvm/tools/llvm-objdump/llvm-objdump.h b/llvm/tools/llvm-objdump/llvm-objdump.h index dd9f58aa3308..c64c042d513e 100644 --- a/llvm/tools/llvm-objdump/llvm-objdump.h +++ b/llvm/tools/llvm-objdump/llvm-objdump.h @@ -124,7 +124,7 @@ SectionFilter ToolSectionFilter(llvm::object::ObjectFile const &O, bool isRelocAddressLess(object::RelocationRef A, object::RelocationRef B); void printRelocations(const object::ObjectFile *O); void printDynamicRelocations(const object::ObjectFile *O); -void printSectionHeaders(const object::ObjectFile &O); +void printSectionHeaders(object::ObjectFile &O); void printSectionContents(const object::ObjectFile *O); void printSymbolTable(const object::ObjectFile &O, StringRef ArchiveName, StringRef ArchitectureName = StringRef(), diff --git a/llvm/tools/llvm-profdata/llvm-profdata.cpp b/llvm/tools/llvm-profdata/llvm-profdata.cpp index 9c6586483ef0..0c23d7c1435f 100644 --- a/llvm/tools/llvm-profdata/llvm-profdata.cpp +++ b/llvm/tools/llvm-profdata/llvm-profdata.cpp @@ -2471,9 +2471,10 @@ static int showHotFunctionList(const sampleprof::SampleProfileMap &Profiles, (ProfileTotalSample > 0) ? (Func.getTotalSamples() * 100.0) / ProfileTotalSample : 0; - PrintValues.emplace_back(HotFuncInfo( - Func.getContext().toString(), Func.getTotalSamples(), - TotalSamplePercent, FuncPair.second.second, Func.getEntrySamples())); + PrintValues.emplace_back( + HotFuncInfo(Func.getContext().toString(), Func.getTotalSamples(), + TotalSamplePercent, FuncPair.second.second, + Func.getHeadSamplesEstimate())); } dumpHotFunctionList(ColumnTitle, ColumnOffset, PrintValues, HotFuncCount, Profiles.size(), HotFuncSample, ProfileTotalSample, diff --git a/llvm/tools/llvm-size/llvm-size.cpp b/llvm/tools/llvm-size/llvm-size.cpp index ec9a4cde56b6..1c7484ba5496 100644 --- a/llvm/tools/llvm-size/llvm-size.cpp +++ b/llvm/tools/llvm-size/llvm-size.cpp @@ -868,8 +868,11 @@ int main(int argc, char **argv) { StringSaver Saver(A); SizeOptTable Tbl; ToolName = argv[0]; - opt::InputArgList Args = Tbl.parseArgs(argc, argv, OPT_UNKNOWN, Saver, - [&](StringRef Msg) { error(Msg); }); + opt::InputArgList Args = + Tbl.parseArgs(argc, argv, OPT_UNKNOWN, Saver, [&](StringRef Msg) { + error(Msg); + exit(1); + }); if (Args.hasArg(OPT_help)) { Tbl.printHelp( outs(), diff --git a/llvm/tools/llvm-symbolizer/llvm-symbolizer.cpp b/llvm/tools/llvm-symbolizer/llvm-symbolizer.cpp index b782c7a1720a..7ec70e42f1c1 100644 --- a/llvm/tools/llvm-symbolizer/llvm-symbolizer.cpp +++ b/llvm/tools/llvm-symbolizer/llvm-symbolizer.cpp @@ -365,20 +365,15 @@ static SmallVector<uint8_t> parseBuildIDArg(const opt::InputArgList &Args, return BuildID; } -// Symbolize the markup from stdin and write the result to stdout. +// Symbolize markup from stdin and write the result to stdout. static void filterMarkup(const opt::InputArgList &Args) { - MarkupParser Parser; MarkupFilter Filter(outs(), parseColorArg(Args)); - for (std::string InputString; std::getline(std::cin, InputString);) { + std::string InputString; + while (std::getline(std::cin, InputString)) { InputString += '\n'; - Parser.parseLine(InputString); - Filter.beginLine(InputString); - while (Optional<MarkupNode> Element = Parser.nextNode()) - Filter.filter(*Element); + Filter.filter(InputString); } - Parser.flush(); - while (Optional<MarkupNode> Element = Parser.nextNode()) - Filter.filter(*Element); + Filter.finish(); } ExitOnError ExitOnErr; diff --git a/llvm/tools/opt/opt.cpp b/llvm/tools/opt/opt.cpp index 1160412e37af..a02997f82bb3 100644 --- a/llvm/tools/opt/opt.cpp +++ b/llvm/tools/opt/opt.cpp @@ -352,32 +352,6 @@ static void AddOptimizationPasses(legacy::PassManagerBase &MPM, if (TM) TM->adjustPassManager(Builder); - switch (PGOKindFlag) { - case InstrGen: - Builder.EnablePGOInstrGen = true; - Builder.PGOInstrGen = ProfileFile; - break; - case InstrUse: - Builder.PGOInstrUse = ProfileFile; - break; - case SampleUse: - Builder.PGOSampleUse = ProfileFile; - break; - default: - break; - } - - switch (CSPGOKindFlag) { - case CSInstrGen: - Builder.EnablePGOCSInstrGen = true; - break; - case CSInstrUse: - Builder.EnablePGOCSInstrUse = true; - break; - default: - break; - } - Builder.populateFunctionPassManager(FPM); Builder.populateModulePassManager(MPM); } @@ -545,8 +519,6 @@ int main(int argc, char **argv) { initializeIndirectBrExpandPassPass(Registry); initializeInterleavedLoadCombinePass(Registry); initializeInterleavedAccessPass(Registry); - initializeEntryExitInstrumenterPass(Registry); - initializePostInlineEntryExitInstrumenterPass(Registry); initializeUnreachableBlockElimLegacyPassPass(Registry); initializeExpandReductionsPass(Registry); initializeExpandVectorPredicationPass(Registry); diff --git a/llvm/utils/TableGen/CTagsEmitter.cpp b/llvm/utils/TableGen/CTagsEmitter.cpp index ccb7f3300dde..fe62d6a9b67f 100644 --- a/llvm/utils/TableGen/CTagsEmitter.cpp +++ b/llvm/utils/TableGen/CTagsEmitter.cpp @@ -27,18 +27,22 @@ namespace { class Tag { private: - const std::string *Id; - SMLoc Loc; + StringRef Id; + StringRef BufferIdentifier; + unsigned Line; public: - Tag(const std::string &Name, const SMLoc Location) - : Id(&Name), Loc(Location) {} - int operator<(const Tag &B) const { return *Id < *B.Id; } - void emit(raw_ostream &OS) const { + Tag(StringRef Name, const SMLoc Location) : Id(Name) { const MemoryBuffer *CurMB = - SrcMgr.getMemoryBuffer(SrcMgr.FindBufferContainingLoc(Loc)); - auto BufferName = CurMB->getBufferIdentifier(); - std::pair<unsigned, unsigned> LineAndColumn = SrcMgr.getLineAndColumn(Loc); - OS << *Id << "\t" << BufferName << "\t" << LineAndColumn.first << "\n"; + SrcMgr.getMemoryBuffer(SrcMgr.FindBufferContainingLoc(Location)); + BufferIdentifier = CurMB->getBufferIdentifier(); + auto LineAndColumn = SrcMgr.getLineAndColumn(Location); + Line = LineAndColumn.first; + } + int operator<(const Tag &B) const { + return std::make_tuple(Id, BufferIdentifier, Line) < std::make_tuple(B.Id, B.BufferIdentifier, B.Line); + } + void emit(raw_ostream &OS) const { + OS << Id << "\t" << BufferIdentifier << "\t" << Line << "\n"; } }; @@ -67,8 +71,11 @@ void CTagsEmitter::run(raw_ostream &OS) { std::vector<Tag> Tags; // Collect tags. Tags.reserve(Classes.size() + Defs.size()); - for (const auto &C : Classes) + for (const auto &C : Classes) { Tags.push_back(Tag(C.first, locate(C.second.get()))); + for (SMLoc FwdLoc : C.second->getForwardDeclarationLocs()) + Tags.push_back(Tag(C.first, FwdLoc)); + } for (const auto &D : Defs) Tags.push_back(Tag(D.first, locate(D.second.get()))); // Emit tags. diff --git a/llvm/utils/TableGen/CallingConvEmitter.cpp b/llvm/utils/TableGen/CallingConvEmitter.cpp index 8f080cd250ab..e8ec90e9c078 100644 --- a/llvm/utils/TableGen/CallingConvEmitter.cpp +++ b/llvm/utils/TableGen/CallingConvEmitter.cpp @@ -149,7 +149,8 @@ void CallingConvEmitter::EmitAction(Record *Action, << "(ValNo, ValVT, LocVT, LocInfo, ArgFlags, State))\n" << IndentStr << " return false;\n"; DelegateToMap[CurrentAction].insert(CC->getName().str()); - } else if (Action->isSubClassOf("CCAssignToReg")) { + } else if (Action->isSubClassOf("CCAssignToReg") || + Action->isSubClassOf("CCAssignToRegAndStack")) { ListInit *RegList = Action->getValueAsListInit("RegList"); if (RegList->size() == 1) { std::string Name = getQualifiedName(RegList->getElementAsRecord(0)); @@ -178,6 +179,28 @@ void CallingConvEmitter::EmitAction(Record *Action, } O << IndentStr << " State.addLoc(CCValAssign::getReg(ValNo, ValVT, " << "Reg, LocVT, LocInfo));\n"; + if (Action->isSubClassOf("CCAssignToRegAndStack")) { + int Size = Action->getValueAsInt("Size"); + int Align = Action->getValueAsInt("Align"); + O << IndentStr << " (void)State.AllocateStack("; + if (Size) + O << Size << ", "; + else + O << "\n" + << IndentStr + << " State.getMachineFunction().getDataLayout()." + "getTypeAllocSize(EVT(LocVT).getTypeForEVT(State.getContext()))," + " "; + if (Align) + O << "Align(" << Align << ")"; + else + O << "\n" + << IndentStr + << " State.getMachineFunction().getDataLayout()." + "getABITypeAlign(EVT(LocVT).getTypeForEVT(State.getContext()" + "))"; + O << ");\n"; + } O << IndentStr << " return false;\n"; O << IndentStr << "}\n"; } else if (Action->isSubClassOf("CCAssignToRegWithShadow")) { diff --git a/llvm/utils/TableGen/DXILEmitter.cpp b/llvm/utils/TableGen/DXILEmitter.cpp index fd58e798b445..b9c563c62bbe 100644 --- a/llvm/utils/TableGen/DXILEmitter.cpp +++ b/llvm/utils/TableGen/DXILEmitter.cpp @@ -122,15 +122,14 @@ static std::string buildCategoryStr(StringSet<> &Cetegorys) { static void emitDXILEnums(std::vector<DXILOperationData> &DXILOps, raw_ostream &OS) { // Sort by Category + OpName. - std::sort(DXILOps.begin(), DXILOps.end(), - [](DXILOperationData &A, DXILOperationData &B) { - // Group by Category first. - if (A.Category == B.Category) - // Inside same Category, order by OpName. - return A.DXILOp < B.DXILOp; - else - return A.Category < B.Category; - }); + llvm::sort(DXILOps, [](DXILOperationData &A, DXILOperationData &B) { + // Group by Category first. + if (A.Category == B.Category) + // Inside same Category, order by OpName. + return A.DXILOp < B.DXILOp; + else + return A.Category < B.Category; + }); OS << "// Enumeration for operations specified by DXIL\n"; OS << "enum class OpCode : unsigned {\n"; @@ -160,20 +159,19 @@ static void emitDXILEnums(std::vector<DXILOperationData> &DXILOps, std::make_pair(It.getKey().str(), buildCategoryStr(It.second))); } // Sort by Category + ClassName. - std::sort(ClassVec.begin(), ClassVec.end(), - [](std::pair<std::string, std::string> &A, - std::pair<std::string, std::string> &B) { - StringRef ClassA = A.first; - StringRef CategoryA = A.second; - StringRef ClassB = B.first; - StringRef CategoryB = B.second; - // Group by Category first. - if (CategoryA == CategoryB) - // Inside same Category, order by ClassName. - return ClassA < ClassB; - else - return CategoryA < CategoryB; - }); + llvm::sort(ClassVec, [](std::pair<std::string, std::string> &A, + std::pair<std::string, std::string> &B) { + StringRef ClassA = A.first; + StringRef CategoryA = A.second; + StringRef ClassB = B.first; + StringRef CategoryB = B.second; + // Group by Category first. + if (CategoryA == CategoryB) + // Inside same Category, order by ClassName. + return ClassA < ClassB; + else + return CategoryA < CategoryB; + }); OS << "// Groups for DXIL operations with equivalent function templates\n"; OS << "enum class OpCodeClass : unsigned {\n"; @@ -266,10 +264,9 @@ static std::string getDXILOpClassName(StringRef DXILOpClass) { static void emitDXILOperationTable(std::vector<DXILOperationData> &DXILOps, raw_ostream &OS) { // Sort by DXILOpID. - std::sort(DXILOps.begin(), DXILOps.end(), - [](DXILOperationData &A, DXILOperationData &B) { - return A.DXILOpID < B.DXILOpID; - }); + llvm::sort(DXILOps, [](DXILOperationData &A, DXILOperationData &B) { + return A.DXILOpID < B.DXILOpID; + }); // Collect Names. SequenceToOffsetTable<std::string> OpClassStrings; diff --git a/llvm/utils/TableGen/DirectiveEmitter.cpp b/llvm/utils/TableGen/DirectiveEmitter.cpp index f3751591f3d9..f32fbe3e25cd 100644 --- a/llvm/utils/TableGen/DirectiveEmitter.cpp +++ b/llvm/utils/TableGen/DirectiveEmitter.cpp @@ -15,6 +15,7 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringSet.h" +#include "llvm/ADT/StringSwitch.h" #include "llvm/TableGen/Error.h" #include "llvm/TableGen/Record.h" @@ -668,6 +669,85 @@ void GenerateFlangClauseParserKindMap(const DirectiveLanguage &DirLang, << " Parser clause\");\n"; } +bool compareClauseName(Record *R1, Record *R2) { + Clause C1{R1}; + Clause C2{R2}; + return (C1.getName() > C2.getName()); +} + +// Generate the parser for the clauses. +void GenerateFlangClausesParser(const DirectiveLanguage &DirLang, + raw_ostream &OS) { + std::vector<Record *> Clauses = DirLang.getClauses(); + // Sort clauses in reverse alphabetical order so with clauses with same + // beginning, the longer option is tried before. + llvm::sort(Clauses, compareClauseName); + IfDefScope Scope("GEN_FLANG_CLAUSES_PARSER", OS); + OS << "\n"; + unsigned index = 0; + unsigned lastClauseIndex = DirLang.getClauses().size() - 1; + OS << "TYPE_PARSER(\n"; + for (const auto &C : Clauses) { + Clause Clause{C}; + if (Clause.getAliases().empty()) { + OS << " \"" << Clause.getName() << "\""; + } else { + OS << " (" + << "\"" << Clause.getName() << "\"_tok"; + for (StringRef alias : Clause.getAliases()) { + OS << " || \"" << alias << "\"_tok"; + } + OS << ")"; + } + + OS << " >> construct<" << DirLang.getFlangClauseBaseClass() + << ">(construct<" << DirLang.getFlangClauseBaseClass() + << "::" << Clause.getFormattedParserClassName() << ">("; + if (Clause.getFlangClass().empty()) { + OS << "))"; + if (index != lastClauseIndex) + OS << " ||"; + OS << "\n"; + ++index; + continue; + } + + if (Clause.isValueOptional()) + OS << "maybe("; + OS << "parenthesized("; + + if (!Clause.getPrefix().empty()) + OS << "\"" << Clause.getPrefix() << ":\" >> "; + + // The common Flang parser are used directly. Their name is identical to + // the Flang class with first letter as lowercase. If the Flang class is + // not a common class, we assume there is a specific Parser<>{} with the + // Flang class name provided. + llvm::SmallString<128> Scratch; + StringRef Parser = + llvm::StringSwitch<StringRef>(Clause.getFlangClass()) + .Case("Name", "name") + .Case("ScalarIntConstantExpr", "scalarIntConstantExpr") + .Case("ScalarIntExpr", "scalarIntExpr") + .Case("ScalarLogicalExpr", "scalarLogicalExpr") + .Default(("Parser<" + Clause.getFlangClass() + ">{}") + .toStringRef(Scratch)); + OS << Parser; + if (!Clause.getPrefix().empty() && Clause.isPrefixOptional()) + OS << " || " << Parser; + OS << ")"; // close parenthesized(. + + if (Clause.isValueOptional()) // close maybe(. + OS << ")"; + OS << "))"; + if (index != lastClauseIndex) + OS << " ||"; + OS << "\n"; + ++index; + } + OS << ")\n"; +} + // Generate the implementation section for the enumeration in the directive // language void EmitDirectivesFlangImpl(const DirectiveLanguage &DirLang, @@ -688,6 +768,8 @@ void EmitDirectivesFlangImpl(const DirectiveLanguage &DirLang, GenerateFlangClauseCheckPrototypes(DirLang, OS); GenerateFlangClauseParserKindMap(DirLang, OS); + + GenerateFlangClausesParser(DirLang, OS); } void GenerateClauseClassMacro(const DirectiveLanguage &DirLang, diff --git a/llvm/utils/TableGen/SearchableTableEmitter.cpp b/llvm/utils/TableGen/SearchableTableEmitter.cpp index ea849807de03..327c53e93a41 100644 --- a/llvm/utils/TableGen/SearchableTableEmitter.cpp +++ b/llvm/utils/TableGen/SearchableTableEmitter.cpp @@ -15,6 +15,7 @@ #include "CodeGenIntrinsics.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringExtras.h" #include "llvm/TableGen/Error.h" #include "llvm/TableGen/Record.h" @@ -650,8 +651,9 @@ void SearchableTableEmitter::collectTableEntries( SearchIndex Idx; std::copy(Table.Fields.begin(), Table.Fields.end(), std::back_inserter(Idx.Fields)); - std::sort(Table.Entries.begin(), Table.Entries.end(), - [&](Record *LHS, Record *RHS) { return compareBy(LHS, RHS, Idx); }); + llvm::sort(Table.Entries, [&](Record *LHS, Record *RHS) { + return compareBy(LHS, RHS, Idx); + }); } void SearchableTableEmitter::run(raw_ostream &OS) { diff --git a/openmp/runtime/src/kmp.h b/openmp/runtime/src/kmp.h index 61ec737f9394..6aeb495d44f2 100644 --- a/openmp/runtime/src/kmp.h +++ b/openmp/runtime/src/kmp.h @@ -841,7 +841,9 @@ extern unsigned __kmp_affinity_num_masks; extern void __kmp_affinity_bind_thread(int which); extern kmp_affin_mask_t *__kmp_affin_fullMask; +extern kmp_affin_mask_t *__kmp_affin_origMask; extern char *__kmp_cpuinfo_file; +extern bool __kmp_affin_reset; #endif /* KMP_AFFINITY_SUPPORTED */ @@ -3070,6 +3072,7 @@ static inline bool __kmp_is_hybrid_cpu() { return false; } extern volatile int __kmp_init_serial; extern volatile int __kmp_init_gtid; extern volatile int __kmp_init_common; +extern volatile int __kmp_need_register_serial; extern volatile int __kmp_init_middle; extern volatile int __kmp_init_parallel; #if KMP_USE_MONITOR @@ -3626,8 +3629,18 @@ static inline void __kmp_assign_root_init_mask() { r->r.r_affinity_assigned = TRUE; } } +static inline void __kmp_reset_root_init_mask(int gtid) { + kmp_info_t *th = __kmp_threads[gtid]; + kmp_root_t *r = th->th.th_root; + if (r->r.r_uber_thread == th && r->r.r_affinity_assigned) { + __kmp_set_system_affinity(__kmp_affin_origMask, FALSE); + KMP_CPU_COPY(th->th.th_affin_mask, __kmp_affin_origMask); + r->r.r_affinity_assigned = FALSE; + } +} #else /* KMP_AFFINITY_SUPPORTED */ #define __kmp_assign_root_init_mask() /* Nothing */ +static inline void __kmp_reset_root_init_mask(int gtid) {} #endif /* KMP_AFFINITY_SUPPORTED */ // No need for KMP_AFFINITY_SUPPORTED guard as only one field in the // format string is for affinity, so platforms that do not support diff --git a/openmp/runtime/src/kmp_affinity.cpp b/openmp/runtime/src/kmp_affinity.cpp index d3c989e4d0c5..817b7ae88100 100644 --- a/openmp/runtime/src/kmp_affinity.cpp +++ b/openmp/runtime/src/kmp_affinity.cpp @@ -138,6 +138,18 @@ const char *__kmp_hw_get_core_type_string(kmp_hw_core_type_t type) { return "unknown"; } +#if KMP_AFFINITY_SUPPORTED +// If affinity is supported, check the affinity +// verbose and warning flags before printing warning +#define KMP_AFF_WARNING(...) \ + if (__kmp_affinity_verbose || \ + (__kmp_affinity_warnings && (__kmp_affinity_type != affinity_none))) { \ + KMP_WARNING(__VA_ARGS__); \ + } +#else +#define KMP_AFF_WARNING KMP_WARNING +#endif + //////////////////////////////////////////////////////////////////////////////// // kmp_hw_thread_t methods int kmp_hw_thread_t::compare_ids(const void *a, const void *b) { @@ -825,9 +837,9 @@ void kmp_topology_t::canonicalize() { } KMP_ASSERT(gran_type != KMP_HW_UNKNOWN); // Warn user what granularity setting will be used instead - KMP_WARNING(AffGranularityBad, "KMP_AFFINITY", - __kmp_hw_get_catalog_string(__kmp_affinity_gran), - __kmp_hw_get_catalog_string(gran_type)); + KMP_AFF_WARNING(AffGranularityBad, "KMP_AFFINITY", + __kmp_hw_get_catalog_string(__kmp_affinity_gran), + __kmp_hw_get_catalog_string(gran_type)); __kmp_affinity_gran = gran_type; } #if KMP_GROUP_AFFINITY @@ -843,8 +855,8 @@ void kmp_topology_t::canonicalize() { int proc_group_depth = get_level(KMP_HW_PROC_GROUP); if (gran_depth >= 0 && proc_group_depth >= 0 && gran_depth < proc_group_depth) { - KMP_WARNING(AffGranTooCoarseProcGroup, "KMP_AFFINITY", - __kmp_hw_get_catalog_string(__kmp_affinity_gran)); + KMP_AFF_WARNING(AffGranTooCoarseProcGroup, "KMP_AFFINITY", + __kmp_hw_get_catalog_string(__kmp_affinity_gran)); __kmp_affinity_gran = gran_type = KMP_HW_PROC_GROUP; } } @@ -966,16 +978,16 @@ bool kmp_topology_t::filter_hw_subset() { if (equivalent_type != KMP_HW_UNKNOWN) { __kmp_hw_subset->at(i).type = equivalent_type; } else { - KMP_WARNING(AffHWSubsetNotExistGeneric, - __kmp_hw_get_catalog_string(type)); + KMP_AFF_WARNING(AffHWSubsetNotExistGeneric, + __kmp_hw_get_catalog_string(type)); return false; } // Check to see if current layer has already been // specified either directly or through an equivalent type if (specified[equivalent_type] != KMP_HW_UNKNOWN) { - KMP_WARNING(AffHWSubsetEqvLayers, __kmp_hw_get_catalog_string(type), - __kmp_hw_get_catalog_string(specified[equivalent_type])); + KMP_AFF_WARNING(AffHWSubsetEqvLayers, __kmp_hw_get_catalog_string(type), + __kmp_hw_get_catalog_string(specified[equivalent_type])); return false; } specified[equivalent_type] = type; @@ -985,8 +997,8 @@ bool kmp_topology_t::filter_hw_subset() { if (max_count < 0 || (num != kmp_hw_subset_t::USE_ALL && num + offset > max_count)) { bool plural = (num > 1); - KMP_WARNING(AffHWSubsetManyGeneric, - __kmp_hw_get_catalog_string(type, plural)); + KMP_AFF_WARNING(AffHWSubsetManyGeneric, + __kmp_hw_get_catalog_string(type, plural)); return false; } @@ -1008,21 +1020,21 @@ bool kmp_topology_t::filter_hw_subset() { if ((using_core_effs || using_core_types) && !__kmp_is_hybrid_cpu()) { if (item.num_attrs == 1) { if (using_core_effs) { - KMP_WARNING(AffHWSubsetIgnoringAttr, "efficiency"); + KMP_AFF_WARNING(AffHWSubsetIgnoringAttr, "efficiency"); } else { - KMP_WARNING(AffHWSubsetIgnoringAttr, "core_type"); + KMP_AFF_WARNING(AffHWSubsetIgnoringAttr, "core_type"); } using_core_effs = false; using_core_types = false; } else { - KMP_WARNING(AffHWSubsetAttrsNonHybrid); + KMP_AFF_WARNING(AffHWSubsetAttrsNonHybrid); return false; } } // Check if using both core types and core efficiencies together if (using_core_types && using_core_effs) { - KMP_WARNING(AffHWSubsetIncompat, "core_type", "efficiency"); + KMP_AFF_WARNING(AffHWSubsetIncompat, "core_type", "efficiency"); return false; } @@ -1058,7 +1070,7 @@ bool kmp_topology_t::filter_hw_subset() { (num != kmp_hw_subset_t::USE_ALL && num + offset > max_count)) { kmp_str_buf_t buf; __kmp_hw_get_catalog_core_string(item.attr[j], &buf, num > 0); - KMP_WARNING(AffHWSubsetManyGeneric, buf.str); + KMP_AFF_WARNING(AffHWSubsetManyGeneric, buf.str); __kmp_str_buf_free(&buf); return false; } @@ -1080,8 +1092,8 @@ bool kmp_topology_t::filter_hw_subset() { } kmp_str_buf_t buf; __kmp_hw_get_catalog_core_string(other_attr, &buf, item.num[j] > 0); - KMP_WARNING(AffHWSubsetIncompat, - __kmp_hw_get_catalog_string(KMP_HW_CORE), buf.str); + KMP_AFF_WARNING(AffHWSubsetIncompat, + __kmp_hw_get_catalog_string(KMP_HW_CORE), buf.str); __kmp_str_buf_free(&buf); return false; } @@ -1093,7 +1105,7 @@ bool kmp_topology_t::filter_hw_subset() { kmp_str_buf_t buf; __kmp_hw_get_catalog_core_string(item.attr[j], &buf, item.num[j] > 0); - KMP_WARNING(AffHWSubsetAttrRepeat, buf.str); + KMP_AFF_WARNING(AffHWSubsetAttrRepeat, buf.str); __kmp_str_buf_free(&buf); return false; } @@ -1201,7 +1213,7 @@ bool kmp_topology_t::filter_hw_subset() { // One last check that we shouldn't allow filtering entire machine if (num_filtered == num_hw_threads) { - KMP_WARNING(AffHWSubsetAllFiltered); + KMP_AFF_WARNING(AffHWSubsetAllFiltered); __kmp_free(filtered); return false; } @@ -1536,6 +1548,8 @@ int __kmp_affinity_entire_machine_mask(kmp_affin_mask_t *mask) { // internal topology object and set the layer ids for it. Each routine // returns a boolean on whether it was successful at doing so. kmp_affin_mask_t *__kmp_affin_fullMask = NULL; +// Original mask is a subset of full mask in multiple processor groups topology +kmp_affin_mask_t *__kmp_affin_origMask = NULL; #if KMP_USE_HWLOC static inline bool __kmp_hwloc_is_cache_type(hwloc_obj_t obj) { @@ -3353,10 +3367,7 @@ static kmp_affin_mask_t *__kmp_create_masks(unsigned *maxIndex, KMP_INFORM(ThreadsMigrate, "KMP_AFFINITY", __kmp_affinity_gran_levels); } if (__kmp_affinity_gran_levels >= (int)depth) { - if (__kmp_affinity_verbose || - (__kmp_affinity_warnings && (__kmp_affinity_type != affinity_none))) { - KMP_WARNING(AffThreadsMayMigrate); - } + KMP_AFF_WARNING(AffThreadsMayMigrate); } // Run through the table, forming the masks for all threads on each core. @@ -3443,11 +3454,7 @@ static int nextNewMask; { \ if (((_osId) > _maxOsId) || \ (!KMP_CPU_ISSET((_osId), KMP_CPU_INDEX((_osId2Mask), (_osId))))) { \ - if (__kmp_affinity_verbose || \ - (__kmp_affinity_warnings && \ - (__kmp_affinity_type != affinity_none))) { \ - KMP_WARNING(AffIgnoreInvalidProcID, _osId); \ - } \ + KMP_AFF_WARNING(AffIgnoreInvalidProcID, _osId); \ } else { \ ADD_MASK(KMP_CPU_INDEX(_osId2Mask, (_osId))); \ } \ @@ -3498,11 +3505,7 @@ static void __kmp_affinity_process_proclist(kmp_affin_mask_t **out_masks, // Copy the mask for that osId to the sum (union) mask. if ((num > maxOsId) || (!KMP_CPU_ISSET(num, KMP_CPU_INDEX(osId2Mask, num)))) { - if (__kmp_affinity_verbose || - (__kmp_affinity_warnings && - (__kmp_affinity_type != affinity_none))) { - KMP_WARNING(AffIgnoreInvalidProcID, num); - } + KMP_AFF_WARNING(AffIgnoreInvalidProcID, num); KMP_CPU_ZERO(sumMask); } else { KMP_CPU_COPY(sumMask, KMP_CPU_INDEX(osId2Mask, num)); @@ -3534,11 +3537,7 @@ static void __kmp_affinity_process_proclist(kmp_affin_mask_t **out_masks, // Add the mask for that osId to the sum mask. if ((num > maxOsId) || (!KMP_CPU_ISSET(num, KMP_CPU_INDEX(osId2Mask, num)))) { - if (__kmp_affinity_verbose || - (__kmp_affinity_warnings && - (__kmp_affinity_type != affinity_none))) { - KMP_WARNING(AffIgnoreInvalidProcID, num); - } + KMP_AFF_WARNING(AffIgnoreInvalidProcID, num); } else { KMP_CPU_UNION(sumMask, KMP_CPU_INDEX(osId2Mask, num)); setSize++; @@ -3695,11 +3694,7 @@ static void __kmp_process_subplace_list(const char **scan, if (**scan == '}' || **scan == ',') { if ((start > maxOsId) || (!KMP_CPU_ISSET(start, KMP_CPU_INDEX(osId2Mask, start)))) { - if (__kmp_affinity_verbose || - (__kmp_affinity_warnings && - (__kmp_affinity_type != affinity_none))) { - KMP_WARNING(AffIgnoreInvalidProcID, start); - } + KMP_AFF_WARNING(AffIgnoreInvalidProcID, start); } else { KMP_CPU_UNION(tempMask, KMP_CPU_INDEX(osId2Mask, start)); (*setSize)++; @@ -3728,11 +3723,7 @@ static void __kmp_process_subplace_list(const char **scan, for (i = 0; i < count; i++) { if ((start > maxOsId) || (!KMP_CPU_ISSET(start, KMP_CPU_INDEX(osId2Mask, start)))) { - if (__kmp_affinity_verbose || - (__kmp_affinity_warnings && - (__kmp_affinity_type != affinity_none))) { - KMP_WARNING(AffIgnoreInvalidProcID, start); - } + KMP_AFF_WARNING(AffIgnoreInvalidProcID, start); break; // don't proliferate warnings for large count } else { KMP_CPU_UNION(tempMask, KMP_CPU_INDEX(osId2Mask, start)); @@ -3779,11 +3770,7 @@ static void __kmp_process_subplace_list(const char **scan, for (i = 0; i < count; i++) { if ((start > maxOsId) || (!KMP_CPU_ISSET(start, KMP_CPU_INDEX(osId2Mask, start)))) { - if (__kmp_affinity_verbose || - (__kmp_affinity_warnings && - (__kmp_affinity_type != affinity_none))) { - KMP_WARNING(AffIgnoreInvalidProcID, start); - } + KMP_AFF_WARNING(AffIgnoreInvalidProcID, start); break; // don't proliferate warnings for large count } else { KMP_CPU_UNION(tempMask, KMP_CPU_INDEX(osId2Mask, start)); @@ -3825,10 +3812,7 @@ static void __kmp_process_place(const char **scan, kmp_affin_mask_t *osId2Mask, KMP_ASSERT(num >= 0); if ((num > maxOsId) || (!KMP_CPU_ISSET(num, KMP_CPU_INDEX(osId2Mask, num)))) { - if (__kmp_affinity_verbose || - (__kmp_affinity_warnings && (__kmp_affinity_type != affinity_none))) { - KMP_WARNING(AffIgnoreInvalidProcID, num); - } + KMP_AFF_WARNING(AffIgnoreInvalidProcID, num); } else { KMP_CPU_UNION(tempMask, KMP_CPU_INDEX(osId2Mask, num)); (*setSize)++; @@ -3945,11 +3929,8 @@ void __kmp_affinity_process_placelist(kmp_affin_mask_t **out_masks, (!KMP_CPU_ISSET(j, __kmp_affin_fullMask)) || (!KMP_CPU_ISSET(j + stride, KMP_CPU_INDEX(osId2Mask, j + stride)))) { - if ((__kmp_affinity_verbose || - (__kmp_affinity_warnings && - (__kmp_affinity_type != affinity_none))) && - i < count - 1) { - KMP_WARNING(AffIgnoreInvalidProcID, j + stride); + if (i < count - 1) { + KMP_AFF_WARNING(AffIgnoreInvalidProcID, j + stride); } continue; } @@ -4072,8 +4053,13 @@ static void __kmp_aux_affinity_initialize(void) { if (__kmp_affin_fullMask == NULL) { KMP_CPU_ALLOC(__kmp_affin_fullMask); } + if (__kmp_affin_origMask == NULL) { + KMP_CPU_ALLOC(__kmp_affin_origMask); + } if (KMP_AFFINITY_CAPABLE()) { __kmp_get_system_affinity(__kmp_affin_fullMask, TRUE); + // Make a copy before possible expanding to the entire machine mask + __kmp_affin_origMask->copy(__kmp_affin_fullMask); if (__kmp_affinity_respect_mask) { // Count the number of available processors. unsigned i; @@ -4085,11 +4071,7 @@ static void __kmp_aux_affinity_initialize(void) { __kmp_avail_proc++; } if (__kmp_avail_proc > __kmp_xproc) { - if (__kmp_affinity_verbose || - (__kmp_affinity_warnings && - (__kmp_affinity_type != affinity_none))) { - KMP_WARNING(ErrorInitializeAffinity); - } + KMP_AFF_WARNING(ErrorInitializeAffinity); __kmp_affinity_type = affinity_none; KMP_AFFINITY_DISABLE(); return; @@ -4111,6 +4093,10 @@ static void __kmp_aux_affinity_initialize(void) { __kmp_avail_proc = __kmp_affinity_entire_machine_mask(__kmp_affin_fullMask); #if KMP_OS_WINDOWS + if (__kmp_num_proc_groups <= 1) { + // Copy expanded full mask if topology has single processor group + __kmp_affin_origMask->copy(__kmp_affin_fullMask); + } // Set the process affinity mask since threads' affinity // masks must be subset of process mask in Windows* OS __kmp_affin_fullMask->set_process_affinity(true); @@ -4254,10 +4240,8 @@ static void __kmp_aux_affinity_initialize(void) { // Early exit if topology could not be created if (!__kmp_topology) { - if (KMP_AFFINITY_CAPABLE() && - (__kmp_affinity_verbose || - (__kmp_affinity_warnings && (__kmp_affinity_type != affinity_none)))) { - KMP_WARNING(ErrorInitializeAffinity); + if (KMP_AFFINITY_CAPABLE()) { + KMP_AFF_WARNING(ErrorInitializeAffinity); } if (nPackages > 0 && nCoresPerPkg > 0 && __kmp_nThreadsPerCore > 0 && __kmp_ncores > 0) { @@ -4283,6 +4267,13 @@ static void __kmp_aux_affinity_initialize(void) { if (__kmp_affinity_verbose) __kmp_topology->print("KMP_AFFINITY"); bool filtered = __kmp_topology->filter_hw_subset(); + if (filtered) { +#if KMP_OS_WINDOWS + // Copy filtered full mask if topology has single processor group + if (__kmp_num_proc_groups <= 1) +#endif + __kmp_affin_origMask->copy(__kmp_affin_fullMask); + } if (filtered && __kmp_affinity_verbose) __kmp_topology->print("KMP_HW_SUBSET"); machine_hierarchy.init(__kmp_topology->get_num_hw_threads()); @@ -4321,10 +4312,7 @@ static void __kmp_aux_affinity_initialize(void) { __kmp_affinity_proclist, osId2Mask, maxIndex); } if (__kmp_affinity_num_masks == 0) { - if (__kmp_affinity_verbose || - (__kmp_affinity_warnings && (__kmp_affinity_type != affinity_none))) { - KMP_WARNING(AffNoValidProcID); - } + KMP_AFF_WARNING(AffNoValidProcID); __kmp_affinity_type = affinity_none; __kmp_create_affinity_none_places(); return; @@ -4374,9 +4362,7 @@ static void __kmp_aux_affinity_initialize(void) { case affinity_balanced: if (depth <= 1) { - if (__kmp_affinity_verbose || __kmp_affinity_warnings) { - KMP_WARNING(AffBalancedNotAvail, "KMP_AFFINITY"); - } + KMP_AFF_WARNING(AffBalancedNotAvail, "KMP_AFFINITY"); __kmp_affinity_type = affinity_none; __kmp_create_affinity_none_places(); return; @@ -4393,9 +4379,7 @@ static void __kmp_aux_affinity_initialize(void) { int nproc = ncores * maxprocpercore; if ((nproc < 2) || (nproc < __kmp_avail_proc)) { - if (__kmp_affinity_verbose || __kmp_affinity_warnings) { - KMP_WARNING(AffBalancedNotAvail, "KMP_AFFINITY"); - } + KMP_AFF_WARNING(AffBalancedNotAvail, "KMP_AFFINITY"); __kmp_affinity_type = affinity_none; return; } @@ -4506,6 +4490,10 @@ void __kmp_affinity_uninitialize(void) { KMP_CPU_FREE(__kmp_affin_fullMask); __kmp_affin_fullMask = NULL; } + if (__kmp_affin_origMask != NULL) { + KMP_CPU_FREE(__kmp_affin_origMask); + __kmp_affin_origMask = NULL; + } __kmp_affinity_num_masks = 0; __kmp_affinity_type = affinity_default; __kmp_affinity_num_places = 0; diff --git a/openmp/runtime/src/kmp_csupport.cpp b/openmp/runtime/src/kmp_csupport.cpp index b7bcc4c94148..c932d450c84e 100644 --- a/openmp/runtime/src/kmp_csupport.cpp +++ b/openmp/runtime/src/kmp_csupport.cpp @@ -632,6 +632,11 @@ void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 global_tid) { "team %p\n", global_tid, this_thr->th.th_task_team, this_thr->th.th_team)); } +#if KMP_AFFINITY_SUPPORTED + if (this_thr->th.th_team->t.t_level == 0 && __kmp_affin_reset) { + __kmp_reset_root_init_mask(global_tid); + } +#endif } else { if (__kmp_tasking_mode != tskm_immediate_exec) { KA_TRACE(20, ("__kmpc_end_serialized_parallel: T#%d decreasing nesting " @@ -2021,6 +2026,11 @@ void KMP_EXPAND_NAME(ompc_display_affinity)(char const *format) { } __kmp_assign_root_init_mask(); gtid = __kmp_get_gtid(); +#if KMP_AFFINITY_SUPPORTED + if (__kmp_threads[gtid]->th.th_team->t.t_level == 0 && __kmp_affin_reset) { + __kmp_reset_root_init_mask(gtid); + } +#endif __kmp_aux_display_affinity(gtid, format); } @@ -2034,6 +2044,11 @@ size_t KMP_EXPAND_NAME(ompc_capture_affinity)(char *buffer, size_t buf_size, } __kmp_assign_root_init_mask(); gtid = __kmp_get_gtid(); +#if KMP_AFFINITY_SUPPORTED + if (__kmp_threads[gtid]->th.th_team->t.t_level == 0 && __kmp_affin_reset) { + __kmp_reset_root_init_mask(gtid); + } +#endif __kmp_str_buf_init(&capture_buf); num_required = __kmp_aux_capture_affinity(gtid, format, &capture_buf); if (buffer && buf_size) { diff --git a/openmp/runtime/src/kmp_ftn_entry.h b/openmp/runtime/src/kmp_ftn_entry.h index 048fcf9d0397..8dbd3ce97690 100644 --- a/openmp/runtime/src/kmp_ftn_entry.h +++ b/openmp/runtime/src/kmp_ftn_entry.h @@ -238,6 +238,10 @@ int FTN_STDCALL FTN_GET_AFFINITY(void **mask) { __kmp_middle_initialize(); } __kmp_assign_root_init_mask(); + int gtid = __kmp_get_gtid(); + if (__kmp_threads[gtid]->th.th_team->t.t_level == 0 && __kmp_affin_reset) { + __kmp_reset_root_init_mask(gtid); + } return __kmp_aux_get_affinity(mask); #endif } @@ -358,9 +362,13 @@ int FTN_STDCALL KMP_EXPAND_NAME(FTN_GET_MAX_THREADS)(void) { if (!TCR_4(__kmp_init_middle)) { __kmp_middle_initialize(); } - __kmp_assign_root_init_mask(); gtid = __kmp_entry_gtid(); thread = __kmp_threads[gtid]; +#if KMP_AFFINITY_SUPPORTED + if (thread->th.th_team->t.t_level == 0 && !__kmp_affin_reset) { + __kmp_assign_root_init_mask(); + } +#endif // return thread -> th.th_team -> t.t_current_task[ // thread->th.th_info.ds.ds_tid ] -> icvs.nproc; return thread->th.th_current_task->td_icvs.nproc; @@ -509,6 +517,11 @@ void FTN_STDCALL KMP_EXPAND_NAME_IF_APPEND(FTN_DISPLAY_AFFINITY)( } __kmp_assign_root_init_mask(); gtid = __kmp_get_gtid(); +#if KMP_AFFINITY_SUPPORTED + if (__kmp_threads[gtid]->th.th_team->t.t_level == 0 && __kmp_affin_reset) { + __kmp_reset_root_init_mask(gtid); + } +#endif ConvertedString cformat(format, size); __kmp_aux_display_affinity(gtid, cformat.get()); #endif @@ -537,6 +550,11 @@ size_t FTN_STDCALL KMP_EXPAND_NAME_IF_APPEND(FTN_CAPTURE_AFFINITY)( } __kmp_assign_root_init_mask(); gtid = __kmp_get_gtid(); +#if KMP_AFFINITY_SUPPORTED + if (__kmp_threads[gtid]->th.th_team->t.t_level == 0 && __kmp_affin_reset) { + __kmp_reset_root_init_mask(gtid); + } +#endif __kmp_str_buf_init(&capture_buf); ConvertedString cformat(format, for_size); num_required = __kmp_aux_capture_affinity(gtid, cformat.get(), &capture_buf); @@ -612,7 +630,16 @@ int FTN_STDCALL KMP_EXPAND_NAME(FTN_GET_NUM_PROCS)(void) { if (!TCR_4(__kmp_init_middle)) { __kmp_middle_initialize(); } - __kmp_assign_root_init_mask(); +#if KMP_AFFINITY_SUPPORTED + if (!__kmp_affin_reset) { + // only bind root here if its affinity reset is not requested + int gtid = __kmp_entry_gtid(); + kmp_info_t *thread = __kmp_threads[gtid]; + if (thread->th.th_team->t.t_level == 0) { + __kmp_assign_root_init_mask(); + } + } +#endif return __kmp_avail_proc; #endif } @@ -802,9 +829,16 @@ int FTN_STDCALL KMP_EXPAND_NAME(FTN_GET_NUM_PLACES)(void) { if (!TCR_4(__kmp_init_middle)) { __kmp_middle_initialize(); } - __kmp_assign_root_init_mask(); if (!KMP_AFFINITY_CAPABLE()) return 0; + if (!__kmp_affin_reset) { + // only bind root here if its affinity reset is not requested + int gtid = __kmp_entry_gtid(); + kmp_info_t *thread = __kmp_threads[gtid]; + if (thread->th.th_team->t.t_level == 0) { + __kmp_assign_root_init_mask(); + } + } return __kmp_affinity_num_masks; #endif } @@ -818,9 +852,16 @@ int FTN_STDCALL KMP_EXPAND_NAME(FTN_GET_PLACE_NUM_PROCS)(int place_num) { if (!TCR_4(__kmp_init_middle)) { __kmp_middle_initialize(); } - __kmp_assign_root_init_mask(); if (!KMP_AFFINITY_CAPABLE()) return 0; + if (!__kmp_affin_reset) { + // only bind root here if its affinity reset is not requested + int gtid = __kmp_entry_gtid(); + kmp_info_t *thread = __kmp_threads[gtid]; + if (thread->th.th_team->t.t_level == 0) { + __kmp_assign_root_init_mask(); + } + } if (place_num < 0 || place_num >= (int)__kmp_affinity_num_masks) return 0; kmp_affin_mask_t *mask = KMP_CPU_INDEX(__kmp_affinity_masks, place_num); @@ -844,9 +885,16 @@ void FTN_STDCALL KMP_EXPAND_NAME(FTN_GET_PLACE_PROC_IDS)(int place_num, if (!TCR_4(__kmp_init_middle)) { __kmp_middle_initialize(); } - __kmp_assign_root_init_mask(); if (!KMP_AFFINITY_CAPABLE()) return; + if (!__kmp_affin_reset) { + // only bind root here if its affinity reset is not requested + int gtid = __kmp_entry_gtid(); + kmp_info_t *thread = __kmp_threads[gtid]; + if (thread->th.th_team->t.t_level == 0) { + __kmp_assign_root_init_mask(); + } + } if (place_num < 0 || place_num >= (int)__kmp_affinity_num_masks) return; kmp_affin_mask_t *mask = KMP_CPU_INDEX(__kmp_affinity_masks, place_num); @@ -870,11 +918,13 @@ int FTN_STDCALL KMP_EXPAND_NAME(FTN_GET_PLACE_NUM)(void) { if (!TCR_4(__kmp_init_middle)) { __kmp_middle_initialize(); } - __kmp_assign_root_init_mask(); if (!KMP_AFFINITY_CAPABLE()) return -1; gtid = __kmp_entry_gtid(); thread = __kmp_thread_from_gtid(gtid); + if (thread->th.th_team->t.t_level == 0 && !__kmp_affin_reset) { + __kmp_assign_root_init_mask(); + } if (thread->th.th_current_place < 0) return -1; return thread->th.th_current_place; @@ -890,11 +940,13 @@ int FTN_STDCALL KMP_EXPAND_NAME(FTN_GET_PARTITION_NUM_PLACES)(void) { if (!TCR_4(__kmp_init_middle)) { __kmp_middle_initialize(); } - __kmp_assign_root_init_mask(); if (!KMP_AFFINITY_CAPABLE()) return 0; gtid = __kmp_entry_gtid(); thread = __kmp_thread_from_gtid(gtid); + if (thread->th.th_team->t.t_level == 0 && !__kmp_affin_reset) { + __kmp_assign_root_init_mask(); + } first_place = thread->th.th_first_place; last_place = thread->th.th_last_place; if (first_place < 0 || last_place < 0) @@ -917,11 +969,13 @@ KMP_EXPAND_NAME(FTN_GET_PARTITION_PLACE_NUMS)(int *place_nums) { if (!TCR_4(__kmp_init_middle)) { __kmp_middle_initialize(); } - __kmp_assign_root_init_mask(); if (!KMP_AFFINITY_CAPABLE()) return; gtid = __kmp_entry_gtid(); thread = __kmp_thread_from_gtid(gtid); + if (thread->th.th_team->t.t_level == 0 && !__kmp_affin_reset) { + __kmp_assign_root_init_mask(); + } first_place = thread->th.th_first_place; last_place = thread->th.th_last_place; if (first_place < 0 || last_place < 0) diff --git a/openmp/runtime/src/kmp_global.cpp b/openmp/runtime/src/kmp_global.cpp index 99a6e88e25f8..3fd536416de7 100644 --- a/openmp/runtime/src/kmp_global.cpp +++ b/openmp/runtime/src/kmp_global.cpp @@ -44,6 +44,7 @@ tsc_tick_count __kmp_stats_start_time; volatile int __kmp_init_serial = FALSE; volatile int __kmp_init_gtid = FALSE; volatile int __kmp_init_common = FALSE; +volatile int __kmp_need_register_serial = TRUE; volatile int __kmp_init_middle = FALSE; volatile int __kmp_init_parallel = FALSE; volatile int __kmp_init_hidden_helper = FALSE; @@ -284,6 +285,7 @@ kmp_affin_mask_t *__kmp_affinity_masks = NULL; unsigned __kmp_affinity_num_masks = 0; char *__kmp_cpuinfo_file = NULL; +bool __kmp_affin_reset = 0; #endif /* KMP_AFFINITY_SUPPORTED */ diff --git a/openmp/runtime/src/kmp_os.h b/openmp/runtime/src/kmp_os.h index 5db97bb42139..02efaa1b2613 100644 --- a/openmp/runtime/src/kmp_os.h +++ b/openmp/runtime/src/kmp_os.h @@ -345,6 +345,9 @@ extern "C" { // Use a function like macro to imply that it must be followed by a semicolon #if __cplusplus > 201402L && __has_cpp_attribute(fallthrough) #define KMP_FALLTHROUGH() [[fallthrough]] +// icc cannot properly tell this attribute is absent so force off +#elif KMP_COMPILER_ICC +#define KMP_FALLTHROUGH() ((void)0) #elif __has_cpp_attribute(clang::fallthrough) #define KMP_FALLTHROUGH() [[clang::fallthrough]] #elif __has_attribute(fallthrough) || __GNUC__ >= 7 diff --git a/openmp/runtime/src/kmp_runtime.cpp b/openmp/runtime/src/kmp_runtime.cpp index 2b339fd38d63..b8d470528798 100644 --- a/openmp/runtime/src/kmp_runtime.cpp +++ b/openmp/runtime/src/kmp_runtime.cpp @@ -2641,6 +2641,11 @@ void __kmp_join_call(ident_t *loc, int gtid __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock); +#if KMP_AFFINITY_SUPPORTED + if (master_th->th.th_team->t.t_level == 0 && __kmp_affin_reset) { + __kmp_reset_root_init_mask(gtid); + } +#endif #if OMPT_SUPPORT int flags = OMPT_INVOKER(fork_context) | @@ -6965,10 +6970,12 @@ static void __kmp_do_serial_initialize(void) { /* Initialize internal memory allocator */ __kmp_init_allocator(); - /* Register the library startup via an environment variable and check to see - whether another copy of the library is already registered. */ - - __kmp_register_library_startup(); + /* Register the library startup via an environment variable or via mapped + shared memory file and check to see whether another copy of the library is + already registered. Since forked child process is often terminated, we + postpone the registration till middle initialization in the child */ + if (__kmp_need_register_serial) + __kmp_register_library_startup(); /* TODO reinitialization of library */ if (TCR_4(__kmp_global.g.g_done)) { @@ -7255,6 +7262,12 @@ static void __kmp_do_middle_initialize(void) { KA_TRACE(10, ("__kmp_middle_initialize: enter\n")); + if (UNLIKELY(!__kmp_need_register_serial)) { + // We are in a forked child process. The registration was skipped during + // serial initialization in __kmp_atfork_child handler. Do it here. + __kmp_register_library_startup(); + } + // Save the previous value for the __kmp_dflt_team_nth so that // we can avoid some reinitialization if it hasn't changed. prev_dflt_team_nth = __kmp_dflt_team_nth; diff --git a/openmp/runtime/src/kmp_settings.cpp b/openmp/runtime/src/kmp_settings.cpp index 49b23a866a58..97b20d4f8664 100644 --- a/openmp/runtime/src/kmp_settings.cpp +++ b/openmp/runtime/src/kmp_settings.cpp @@ -2169,6 +2169,7 @@ static void __kmp_parse_affinity_env(char const *name, char const *value, int respect = 0; int gran = 0; int dups = 0; + int reset = 0; bool set = false; KMP_ASSERT(value != NULL); @@ -2224,6 +2225,7 @@ static void __kmp_parse_affinity_env(char const *name, char const *value, #define set_respect(val) _set_param(respect, *out_respect, val) #define set_dups(val) _set_param(dups, *out_dups, val) #define set_proclist(val) _set_param(proclist, *out_proclist, val) +#define set_reset(val) _set_param(reset, __kmp_affin_reset, val) #define set_gran(val, levels) \ { \ @@ -2293,6 +2295,12 @@ static void __kmp_parse_affinity_env(char const *name, char const *value, } else if (__kmp_match_str("norespect", buf, CCAST(const char **, &next))) { set_respect(FALSE); buf = next; + } else if (__kmp_match_str("reset", buf, CCAST(const char **, &next))) { + set_reset(TRUE); + buf = next; + } else if (__kmp_match_str("noreset", buf, CCAST(const char **, &next))) { + set_reset(FALSE); + buf = next; } else if (__kmp_match_str("duplicates", buf, CCAST(const char **, &next)) || __kmp_match_str("dups", buf, CCAST(const char **, &next))) { @@ -2433,6 +2441,7 @@ static void __kmp_parse_affinity_env(char const *name, char const *value, #undef set_warnings #undef set_respect #undef set_granularity +#undef set_reset __kmp_str_free(&buffer); @@ -2564,6 +2573,11 @@ static void __kmp_stg_print_affinity(kmp_str_buf_t *buffer, char const *name, } else { __kmp_str_buf_print(buffer, "%s,", "norespect"); } + if (__kmp_affin_reset) { + __kmp_str_buf_print(buffer, "%s,", "reset"); + } else { + __kmp_str_buf_print(buffer, "%s,", "noreset"); + } __kmp_str_buf_print(buffer, "granularity=%s,", __kmp_hw_get_keyword(__kmp_affinity_gran, false)); } diff --git a/openmp/runtime/src/z_Linux_util.cpp b/openmp/runtime/src/z_Linux_util.cpp index 5cd6ad6a0339..91edf0254a77 100644 --- a/openmp/runtime/src/z_Linux_util.cpp +++ b/openmp/runtime/src/z_Linux_util.cpp @@ -1297,7 +1297,13 @@ static void __kmp_atfork_child(void) { __kmp_itt_reset(); // reset ITT's global state #endif /* USE_ITT_BUILD */ - __kmp_serial_initialize(); + { + // Child process often get terminated without any use of OpenMP. That might + // cause mapped shared memory file to be left unattended. Thus we postpone + // library registration till middle initialization in the child process. + __kmp_need_register_serial = FALSE; + __kmp_serial_initialize(); + } /* This is necessary to make sure no stale data is left around */ /* AC: customers complain that we use unsafe routines in the atfork |