diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2024-01-11 18:24:21 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2024-01-11 18:24:21 +0000 |
commit | 950076cd18f3fa9d789b4add9d405898efff09a5 (patch) | |
tree | 2454649366290c6292cc2d94dde042f71bc1e144 | |
parent | aca2e42c67292825f835f094eb0c4df5ce6013db (diff) |
Vendor import of llvm-project main llvmorg-18-init-16864-g3b3ee1f53424.vendor/llvm-project/llvmorg-18-init-16864-g3b3ee1f53424
241 files changed, 5209 insertions, 2318 deletions
diff --git a/clang/include/clang/AST/DeclBase.h b/clang/include/clang/AST/DeclBase.h index 10dcbdb262d8..5b1038582bc6 100644 --- a/clang/include/clang/AST/DeclBase.h +++ b/clang/include/clang/AST/DeclBase.h @@ -19,6 +19,7 @@ #include "clang/AST/SelectorLocationsKind.h" #include "clang/Basic/IdentifierTable.h" #include "clang/Basic/LLVM.h" +#include "clang/Basic/LangOptions.h" #include "clang/Basic/SourceLocation.h" #include "clang/Basic/Specifiers.h" #include "llvm/ADT/ArrayRef.h" @@ -488,6 +489,15 @@ public: // Return true if this is a FileContext Decl. bool isFileContextDecl() const; + /// Whether it resembles a flexible array member. This is a static member + /// because we want to be able to call it with a nullptr. That allows us to + /// perform non-Decl specific checks based on the object's type and strict + /// flex array level. + static bool isFlexibleArrayMemberLike( + ASTContext &Context, const Decl *D, QualType Ty, + LangOptions::StrictFlexArraysLevelKind StrictFlexArraysLevel, + bool IgnoreTemplateOrMacroSubstitution); + ASTContext &getASTContext() const LLVM_READONLY; /// Helper to get the language options from the ASTContext. diff --git a/clang/include/clang/AST/DeclCXX.h b/clang/include/clang/AST/DeclCXX.h index 984a4d8bab5e..648f5f946408 100644 --- a/clang/include/clang/AST/DeclCXX.h +++ b/clang/include/clang/AST/DeclCXX.h @@ -1425,6 +1425,9 @@ public: /// (C++11 [class]p6). bool isTriviallyCopyable() const; + /// Determine whether this class is considered trivially copyable per + bool isTriviallyCopyConstructible() const; + /// Determine whether this class is considered trivial. /// /// C++11 [class]p6: diff --git a/clang/include/clang/AST/Stmt.h b/clang/include/clang/AST/Stmt.h index da7b37ce0e12..e1fde24e6477 100644 --- a/clang/include/clang/AST/Stmt.h +++ b/clang/include/clang/AST/Stmt.h @@ -1631,8 +1631,10 @@ public: SourceLocation RB); // Build an empty compound statement with a location. - explicit CompoundStmt(SourceLocation Loc) - : Stmt(CompoundStmtClass), LBraceLoc(Loc), RBraceLoc(Loc) { + explicit CompoundStmt(SourceLocation Loc) : CompoundStmt(Loc, Loc) {} + + CompoundStmt(SourceLocation Loc, SourceLocation EndLoc) + : Stmt(CompoundStmtClass), LBraceLoc(Loc), RBraceLoc(EndLoc) { CompoundStmtBits.NumStmts = 0; CompoundStmtBits.HasFPFeatures = 0; } diff --git a/clang/include/clang/AST/Type.h b/clang/include/clang/AST/Type.h index 9e9f896ebef7..d4e5310fb3ab 100644 --- a/clang/include/clang/AST/Type.h +++ b/clang/include/clang/AST/Type.h @@ -917,6 +917,9 @@ public: /// Return true if this is a trivially copyable type (C++0x [basic.types]p9) bool isTriviallyCopyableType(const ASTContext &Context) const; + /// Return true if this is a trivially copyable type + bool isTriviallyCopyConstructibleType(const ASTContext &Context) const; + /// Return true if this is a trivially relocatable type. bool isTriviallyRelocatableType(const ASTContext &Context) const; diff --git a/clang/include/clang/Basic/Attr.td b/clang/include/clang/Basic/Attr.td index d5eabaad4889..a03b0e44e15f 100644 --- a/clang/include/clang/Basic/Attr.td +++ b/clang/include/clang/Basic/Attr.td @@ -4372,3 +4372,21 @@ def CodeAlign: StmtAttr { static constexpr int MaximumAlignment = 4096; }]; } + +def CountedBy : InheritableAttr { + let Spellings = [Clang<"counted_by">]; + let Subjects = SubjectList<[Field]>; + let Args = [IdentifierArgument<"CountedByField">]; + let Documentation = [CountedByDocs]; + let LangOpts = [COnly]; + // FIXME: This is ugly. Let using a DeclArgument would be nice, but a Decl + // isn't yet available due to the fact that we're still parsing the + // structure. Maybe that code could be changed sometime in the future. + code AdditionalMembers = [{ + private: + SourceRange CountedByFieldLoc; + public: + SourceRange getCountedByFieldLoc() const { return CountedByFieldLoc; } + void setCountedByFieldLoc(SourceRange Loc) { CountedByFieldLoc = Loc; } + }]; +} diff --git a/clang/include/clang/Basic/AttrDocs.td b/clang/include/clang/Basic/AttrDocs.td index 5416a0cbdd07..2e8d7752c975 100644 --- a/clang/include/clang/Basic/AttrDocs.td +++ b/clang/include/clang/Basic/AttrDocs.td @@ -7749,3 +7749,81 @@ but do not pass them to the underlying coroutine or pass them by value. .. _`CRT`: https://clang.llvm.org/docs/AttributeReference.html#coro-return-type }]; } + +def CountedByDocs : Documentation { + let Category = DocCatField; + let Content = [{ +Clang supports the ``counted_by`` attribute on the flexible array member of a +structure in C. The argument for the attribute is the name of a field member +holding the count of elements in the flexible array. This information can be +used to improve the results of the array bound sanitizer and the +``__builtin_dynamic_object_size`` builtin. The ``count`` field member must be +within the same non-anonymous, enclosing struct as the flexible array member. + +This example specifies that the flexible array member ``array`` has the number +of elements allocated for it in ``count``: + +.. code-block:: c + + struct bar; + + struct foo { + size_t count; + char other; + struct bar *array[] __attribute__((counted_by(count))); + }; + +This establishes a relationship between ``array`` and ``count``. Specifically, +``array`` must have at least ``count`` number of elements available. It's the +user's responsibility to ensure that this relationship is maintained through +changes to the structure. + +In the following example, the allocated array erroneously has fewer elements +than what's specified by ``p->count``. This would result in an out-of-bounds +access not being detected. + +.. code-block:: c + + #define SIZE_INCR 42 + + struct foo *p; + + void foo_alloc(size_t count) { + p = malloc(MAX(sizeof(struct foo), + offsetof(struct foo, array[0]) + count * sizeof(struct bar *))); + p->count = count + SIZE_INCR; + } + +The next example updates ``p->count``, but breaks the relationship requirement +that ``p->array`` must have at least ``p->count`` number of elements available: + +.. code-block:: c + + #define SIZE_INCR 42 + + struct foo *p; + + void foo_alloc(size_t count) { + p = malloc(MAX(sizeof(struct foo), + offsetof(struct foo, array[0]) + count * sizeof(struct bar *))); + p->count = count; + } + + void use_foo(int index, int val) { + p->count += SIZE_INCR + 1; /* 'count' is now larger than the number of elements of 'array'. */ + p->array[index] = val; /* The sanitizer can't properly check this access. */ + } + +In this example, an update to ``p->count`` maintains the relationship +requirement: + +.. code-block:: c + + void use_foo(int index, int val) { + if (p->count == 0) + return; + --p->count; + p->array[index] = val; + } + }]; +} diff --git a/clang/include/clang/Basic/DiagnosticFrontendKinds.td b/clang/include/clang/Basic/DiagnosticFrontendKinds.td index 568000106a84..85ecfdf9de62 100644 --- a/clang/include/clang/Basic/DiagnosticFrontendKinds.td +++ b/clang/include/clang/Basic/DiagnosticFrontendKinds.td @@ -167,7 +167,7 @@ def err_verify_no_such_marker : Error< def err_verify_missing_start : Error< "cannot find start ('{{') of expected %0">; def err_verify_missing_end : Error< - "cannot find end ('}}') of expected %0">; + "cannot find end ('%1') of expected %0">; def err_verify_invalid_content : Error< "invalid expected %0: %1">; def err_verify_missing_regex : Error< diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index a97182cad5d5..1a79892e4003 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -2253,6 +2253,8 @@ def warn_cxx17_compat_aggregate_init_paren_list : Warning< def err_reference_bind_to_bitfield : Error< "%select{non-const|volatile}0 reference cannot bind to " "bit-field%select{| %1}2">; +def err_reference_bind_to_bitfield_in_cce : Error< + "reference cannot bind to bit-field in converted constant expression">; def err_reference_bind_to_vector_element : Error< "%select{non-const|volatile}0 reference cannot bind to vector element">; def err_reference_bind_to_matrix_element : Error< @@ -6439,6 +6441,19 @@ def warn_superclass_variable_sized_type_not_at_end : Warning< "field %0 can overwrite instance variable %1 with variable sized type %2" " in superclass %3">, InGroup<ObjCFlexibleArray>; +def err_flexible_array_count_not_in_same_struct : Error< + "'counted_by' field %0 isn't within the same struct as the flexible array">; +def err_counted_by_attr_not_on_flexible_array_member : Error< + "'counted_by' only applies to C99 flexible array members">; +def err_counted_by_attr_refers_to_flexible_array : Error< + "'counted_by' cannot refer to the flexible array %0">; +def err_counted_by_must_be_in_structure : Error< + "field %0 in 'counted_by' not inside structure">; +def err_flexible_array_counted_by_attr_field_not_integer : Error< + "field %0 in 'counted_by' must be a non-boolean integer type">; +def note_flexible_array_counted_by_attr_field : Note< + "field %0 declared here">; + let CategoryName = "ARC Semantic Issue" in { // ARC-mode diagnostics. diff --git a/clang/include/clang/Basic/LangOptions.def b/clang/include/clang/Basic/LangOptions.def index 21abc346cf17..0428b70c6020 100644 --- a/clang/include/clang/Basic/LangOptions.def +++ b/clang/include/clang/Basic/LangOptions.def @@ -457,6 +457,7 @@ ENUM_LANGOPT(SignReturnAddressKey, SignReturnAddressKeyKind, 1, SignReturnAddres "Key used for return address signing") LANGOPT(BranchTargetEnforcement, 1, 0, "Branch-target enforcement enabled") LANGOPT(BranchProtectionPAuthLR, 1, 0, "Use PC as a diversifier using PAuthLR NOP instructions.") +LANGOPT(GuardedControlStack, 1, 0, "Guarded control stack enabled") LANGOPT(SpeculativeLoadHardening, 1, 0, "Speculative load hardening enabled") diff --git a/clang/include/clang/Basic/TargetInfo.h b/clang/include/clang/Basic/TargetInfo.h index ac3c324c6c29..3eb23ebdacf0 100644 --- a/clang/include/clang/Basic/TargetInfo.h +++ b/clang/include/clang/Basic/TargetInfo.h @@ -1373,6 +1373,7 @@ public: LangOptions::SignReturnAddressKeyKind::AKey; bool BranchTargetEnforcement = false; bool BranchProtectionPAuthLR = false; + bool GuardedControlStack = false; }; /// Determine if the Architecture in this TargetInfo supports branch diff --git a/clang/include/clang/Basic/arm_sve.td b/clang/include/clang/Basic/arm_sve.td index 7f80fb0386cc..6f35e25617ad 100644 --- a/clang/include/clang/Basic/arm_sve.td +++ b/clang/include/clang/Basic/arm_sve.td @@ -454,11 +454,11 @@ let TargetGuard = "sve,bf16" in { let TargetGuard = "sve2p1" in { // Contiguous truncating store from quadword (single vector). - def SVST1UWQ : MInst<"svst1uwq[_{d}]", "vPcd", "iUif", [IsStore], MemEltTyInt32, "aarch64_sve_st1uwq">; - def SVST1UWQ_VNUM : MInst<"svst1uwq_vnum[_{d}]", "vPcld", "iUif", [IsStore], MemEltTyInt32, "aarch64_sve_st1uwq">; + def SVST1UWQ : MInst<"svst1wq[_{d}]", "vPcd", "iUif", [IsStore], MemEltTyInt32, "aarch64_sve_st1wq">; + def SVST1UWQ_VNUM : MInst<"svst1wq_vnum[_{d}]", "vPcld", "iUif", [IsStore], MemEltTyInt32, "aarch64_sve_st1wq">; - def SVST1UDQ : MInst<"svst1udq[_{d}]", "vPcd", "lUld", [IsStore], MemEltTyInt64, "aarch64_sve_st1udq">; - def SVST1UDQ_VNUM : MInst<"svst1udq_vnum[_{d}]", "vPcld", "lUld", [IsStore], MemEltTyInt64, "aarch64_sve_st1udq">; + def SVST1UDQ : MInst<"svst1dq[_{d}]", "vPcd", "lUld", [IsStore], MemEltTyInt64, "aarch64_sve_st1dq">; + def SVST1UDQ_VNUM : MInst<"svst1dq_vnum[_{d}]", "vPcld", "lUld", [IsStore], MemEltTyInt64, "aarch64_sve_st1dq">; // Store one vector (vector base + scalar offset) def SVST1Q_SCATTER_U64BASE_OFFSET : MInst<"svst1q_scatter[_{2}base]_offset[_{d}]", "vPgld", "cUcsUsiUilUlfhdb", [IsScatterStore, IsByteIndexed], MemEltTyDefault, "aarch64_sve_st1q_scatter_scalar_offset">; @@ -2040,12 +2040,12 @@ let TargetGuard = "sve2p1|sme2" in { } let TargetGuard = "sve2p1" in { -def SVDOT_X2_S : SInst<"svdot[_{d}_{2}_{3}]", "ddhh", "i", MergeNone, "aarch64_sve_sdot_x2", [], []>; -def SVDOT_X2_U : SInst<"svdot[_{d}_{2}_{3}]", "ddhh", "Ui", MergeNone, "aarch64_sve_udot_x2", [], []>; -def SVDOT_X2_F : SInst<"svdot[_{d}_{2}_{3}]", "ddhh", "f", MergeNone, "aarch64_sve_fdot_x2", [], []>; -def SVDOT_LANE_X2_S : SInst<"svdot_lane[_{d}_{2}_{3}]", "ddhhi", "i", MergeNone, "aarch64_sve_sdot_lane_x2", [], [ImmCheck<3, ImmCheck0_3>]>; -def SVDOT_LANE_X2_U : SInst<"svdot_lane[_{d}_{2}_{3}]", "ddhhi", "Ui", MergeNone, "aarch64_sve_udot_lane_x2", [], [ImmCheck<3, ImmCheck0_3>]>; -def SVDOT_LANE_X2_F : SInst<"svdot_lane[_{d}_{2}_{3}]", "ddhhi", "f", MergeNone, "aarch64_sve_fdot_lane_x2", [], [ImmCheck<3, ImmCheck0_3>]>; +def SVDOT_X2_S : SInst<"svdot[_{d}_{2}]", "ddhh", "i", MergeNone, "aarch64_sve_sdot_x2", [], []>; +def SVDOT_X2_U : SInst<"svdot[_{d}_{2}]", "ddhh", "Ui", MergeNone, "aarch64_sve_udot_x2", [], []>; +def SVDOT_X2_F : SInst<"svdot[_{d}_{2}]", "ddhh", "f", MergeNone, "aarch64_sve_fdot_x2", [], []>; +def SVDOT_LANE_X2_S : SInst<"svdot_lane[_{d}_{2}]", "ddhhi", "i", MergeNone, "aarch64_sve_sdot_lane_x2", [], [ImmCheck<3, ImmCheck0_3>]>; +def SVDOT_LANE_X2_U : SInst<"svdot_lane[_{d}_{2}]", "ddhhi", "Ui", MergeNone, "aarch64_sve_udot_lane_x2", [], [ImmCheck<3, ImmCheck0_3>]>; +def SVDOT_LANE_X2_F : SInst<"svdot_lane[_{d}_{2}]", "ddhhi", "f", MergeNone, "aarch64_sve_fdot_lane_x2", [], [ImmCheck<3, ImmCheck0_3>]>; } let TargetGuard = "sve2p1|sme2" in { @@ -2208,7 +2208,7 @@ let TargetGuard = "sve2p1" in { def SVTBLQ : SInst<"svtblq[_{d}]", "ddu", "cUcsUsiUilUlbhfd", MergeNone, "aarch64_sve_tblq">; def SVTBXQ : SInst<"svtbxq[_{d}]", "dddu", "cUcsUsiUilUlbhfd", MergeNone, "aarch64_sve_tbxq">; // EXTQ - def EXTQ : SInst<"svextq_lane[_{d}]", "dddk", "cUcsUsiUilUlbhfd", MergeNone, "aarch64_sve_extq_lane", [], [ImmCheck<2, ImmCheck0_15>]>; + def EXTQ : SInst<"svextq[_{d}]", "dddk", "cUcsUsiUilUlbhfd", MergeNone, "aarch64_sve_extq", [], [ImmCheck<2, ImmCheck0_15>]>; // PMOV // Move to Pred multiclass PMOV_TO_PRED<string name, string types, string intrinsic, list<FlagType> flags=[], ImmCheckType immCh > { diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index 84648c6d5500..7f97d6b6faa3 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -4267,7 +4267,7 @@ def iquote : JoinedOrSeparate<["-"], "iquote">, Group<clang_i_Group>, Visibility<[ClangOption, CC1Option]>, HelpText<"Add directory to QUOTE include search path">, MetaVarName<"<directory>">; def isysroot : JoinedOrSeparate<["-"], "isysroot">, Group<clang_i_Group>, - Visibility<[ClangOption, CC1Option]>, + Visibility<[ClangOption, CC1Option, FlangOption]>, HelpText<"Set the system root directory (usually /)">, MetaVarName<"<dir>">, MarshallingInfoString<HeaderSearchOpts<"Sysroot">, [{"/"}]>; def isystem : JoinedOrSeparate<["-"], "isystem">, Group<clang_i_Group>, @@ -4585,11 +4585,13 @@ let Flags = [TargetSpecific] in { def menable_experimental_extensions : Flag<["-"], "menable-experimental-extensions">, Group<m_Group>, HelpText<"Enable use of experimental RISC-V extensions.">; def mrvv_vector_bits_EQ : Joined<["-"], "mrvv-vector-bits=">, Group<m_Group>, - HelpText<"Specify the size in bits of an RVV vector register. Defaults to " - "the vector length agnostic value of \"scalable\". Accepts power of " - "2 values between 64 and 65536. Also accepts \"zvl\" " - "to use the value implied by -march/-mcpu. Value will be reflected " - "in __riscv_v_fixed_vlen preprocessor define (RISC-V only)">; + Visibility<[ClangOption, FlangOption]>, + HelpText<"Specify the size in bits of an RVV vector register">, + DocBrief<"Defaults to the vector length agnostic value of \"scalable\". " + "Accepts power of 2 values between 64 and 65536. Also accepts " + "\"zvl\" to use the value implied by -march/-mcpu. On Clang, value " + "will be reflected in __riscv_v_fixed_vlen preprocessor define " + "(RISC-V only)">; def munaligned_access : Flag<["-"], "munaligned-access">, Group<m_Group>, HelpText<"Allow memory accesses to be unaligned (AArch32/AArch64/LoongArch/RISC-V only)">; @@ -5197,7 +5199,7 @@ def nohipwrapperinc : Flag<["-"], "nohipwrapperinc">, Group<IncludePath_Group>, HelpText<"Do not include the default HIP wrapper headers and include paths">; def : Flag<["-"], "nocudainc">, Alias<nogpuinc>; def nogpulib : Flag<["-"], "nogpulib">, MarshallingInfoFlag<LangOpts<"NoGPULib">>, - Visibility<[ClangOption, CC1Option]>, + Visibility<[ClangOption, CC1Option, FlangOption, FC1Option]>, HelpText<"Do not link device library for CUDA/HIP device compilation">; def : Flag<["-"], "nocudalib">, Alias<nogpulib>; def gpulibc : Flag<["-"], "gpulibc">, Visibility<[ClangOption, CC1Option, FlangOption, FC1Option]>, @@ -7010,6 +7012,8 @@ def mbranch_target_enforce : Flag<["-"], "mbranch-target-enforce">, MarshallingInfoFlag<LangOpts<"BranchTargetEnforcement">>; def mbranch_protection_pauth_lr : Flag<["-"], "mbranch-protection-pauth-lr">, MarshallingInfoFlag<LangOpts<"BranchProtectionPAuthLR">>; +def mguarded_control_stack : Flag<["-"], "mguarded-control-stack">, + MarshallingInfoFlag<LangOpts<"GuardedControlStack">>; def fno_dllexport_inlines : Flag<["-"], "fno-dllexport-inlines">, MarshallingInfoNegativeFlag<LangOpts<"DllExportInlines">>; def cfguard_no_checks : Flag<["-"], "cfguard-no-checks">, diff --git a/clang/include/clang/Format/Format.h b/clang/include/clang/Format/Format.h index 8604dea689f9..59b645ecab71 100644 --- a/clang/include/clang/Format/Format.h +++ b/clang/include/clang/Format/Format.h @@ -225,6 +225,22 @@ struct FormatStyle { /// bbb = 2; /// \endcode bool AlignCompound; + /// Only for ``AlignConsecutiveDeclarations``. Whether function pointers are + /// aligned. + /// \code + /// true: + /// unsigned i; + /// int &r; + /// int *p; + /// int (*f)(); + /// + /// false: + /// unsigned i; + /// int &r; + /// int *p; + /// int (*f)(); + /// \endcode + bool AlignFunctionPointers; /// Only for ``AlignConsecutiveAssignments``. Whether short assignment /// operators are left-padded to the same length as long ones in order to /// put all assignment operators to the right of the left hand side. @@ -247,7 +263,9 @@ struct FormatStyle { bool operator==(const AlignConsecutiveStyle &R) const { return Enabled == R.Enabled && AcrossEmptyLines == R.AcrossEmptyLines && AcrossComments == R.AcrossComments && - AlignCompound == R.AlignCompound && PadOperators == R.PadOperators; + AlignCompound == R.AlignCompound && + AlignFunctionPointers == R.AlignFunctionPointers && + PadOperators == R.PadOperators; } bool operator!=(const AlignConsecutiveStyle &R) const { return !(*this == R); diff --git a/clang/include/clang/Parse/Parser.h b/clang/include/clang/Parse/Parser.h index 2dbe090bd093..186dbb770858 100644 --- a/clang/include/clang/Parse/Parser.h +++ b/clang/include/clang/Parse/Parser.h @@ -234,6 +234,26 @@ class Parser : public CodeCompletionHandler { /// Parsing OpenACC directive mode. bool OpenACCDirectiveParsing = false; + /// Currently parsing a situation where an OpenACC array section could be + /// legal, such as a 'var-list'. + bool AllowOpenACCArraySections = false; + + /// RAII object to set reset OpenACC parsing a context where Array Sections + /// are allowed. + class OpenACCArraySectionRAII { + Parser &P; + + public: + OpenACCArraySectionRAII(Parser &P) : P(P) { + assert(!P.AllowOpenACCArraySections); + P.AllowOpenACCArraySections = true; + } + ~OpenACCArraySectionRAII() { + assert(P.AllowOpenACCArraySections); + P.AllowOpenACCArraySections = false; + } + }; + /// When true, we are directly inside an Objective-C message /// send expression. /// @@ -3546,8 +3566,8 @@ private: ExprResult ParseOpenACCIDExpression(); /// Parses the variable list for the `cache` construct. void ParseOpenACCCacheVarList(); - /// Parses a single variable in a variable list for the 'cache' construct. - bool ParseOpenACCCacheVar(); + /// Parses a single variable in a variable list for OpenACC. + bool ParseOpenACCVar(); bool ParseOpenACCWaitArgument(); private: diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h index edaee4c4b66d..cf2d4fbe6d3b 100644 --- a/clang/include/clang/Sema/Sema.h +++ b/clang/include/clang/Sema/Sema.h @@ -4799,6 +4799,8 @@ public: bool CheckAlwaysInlineAttr(const Stmt *OrigSt, const Stmt *CurSt, const AttributeCommonInfo &A); + bool CheckCountedByAttr(Scope *Scope, const FieldDecl *FD); + /// Adjust the calling convention of a method to be the ABI default if it /// wasn't specified explicitly. This handles method types formed from /// function type typedefs and typename template arguments. @@ -5642,6 +5644,7 @@ public: CorrectionCandidateCallback &CCC, TemplateArgumentListInfo *ExplicitTemplateArgs = nullptr, ArrayRef<Expr *> Args = std::nullopt, + DeclContext *LookupCtx = nullptr, TypoExpr **Out = nullptr); DeclResult LookupIvarInObjCMethod(LookupResult &Lookup, Scope *S, diff --git a/clang/include/clang/Sema/TypoCorrection.h b/clang/include/clang/Sema/TypoCorrection.h index e0f8d152dbe5..09de164297e7 100644 --- a/clang/include/clang/Sema/TypoCorrection.h +++ b/clang/include/clang/Sema/TypoCorrection.h @@ -282,7 +282,7 @@ class CorrectionCandidateCallback { public: static const unsigned InvalidDistance = TypoCorrection::InvalidDistance; - explicit CorrectionCandidateCallback(IdentifierInfo *Typo = nullptr, + explicit CorrectionCandidateCallback(const IdentifierInfo *Typo = nullptr, NestedNameSpecifier *TypoNNS = nullptr) : Typo(Typo), TypoNNS(TypoNNS) {} @@ -319,7 +319,7 @@ public: /// this method. virtual std::unique_ptr<CorrectionCandidateCallback> clone() = 0; - void setTypoName(IdentifierInfo *II) { Typo = II; } + void setTypoName(const IdentifierInfo *II) { Typo = II; } void setTypoNNS(NestedNameSpecifier *NNS) { TypoNNS = NNS; } // Flags for context-dependent keywords. WantFunctionLikeCasts is only @@ -345,13 +345,13 @@ protected: candidate.getCorrectionSpecifier() == TypoNNS; } - IdentifierInfo *Typo; + const IdentifierInfo *Typo; NestedNameSpecifier *TypoNNS; }; class DefaultFilterCCC final : public CorrectionCandidateCallback { public: - explicit DefaultFilterCCC(IdentifierInfo *Typo = nullptr, + explicit DefaultFilterCCC(const IdentifierInfo *Typo = nullptr, NestedNameSpecifier *TypoNNS = nullptr) : CorrectionCandidateCallback(Typo, TypoNNS) {} @@ -365,6 +365,10 @@ public: template <class C> class DeclFilterCCC final : public CorrectionCandidateCallback { public: + explicit DeclFilterCCC(const IdentifierInfo *Typo = nullptr, + NestedNameSpecifier *TypoNNS = nullptr) + : CorrectionCandidateCallback(Typo, TypoNNS) {} + bool ValidateCandidate(const TypoCorrection &candidate) override { return candidate.getCorrectionDeclAs<C>(); } diff --git a/clang/lib/AST/ASTContext.cpp b/clang/lib/AST/ASTContext.cpp index b60dcfaabfd1..d9cefcaa84d7 100644 --- a/clang/lib/AST/ASTContext.cpp +++ b/clang/lib/AST/ASTContext.cpp @@ -1318,6 +1318,13 @@ void ASTContext::InitBuiltinTypes(const TargetInfo &Target, InitBuiltinType(OMPArrayShapingTy, BuiltinType::OMPArrayShaping); InitBuiltinType(OMPIteratorTy, BuiltinType::OMPIterator); } + // Placeholder type for OpenACC array sections. + if (LangOpts.OpenACC) { + // FIXME: Once we implement OpenACC array sections in Sema, this will either + // be combined with the OpenMP type, or given its own type. In the meantime, + // just use the OpenMP type so that parsing can work. + InitBuiltinType(OMPArraySectionTy, BuiltinType::OMPArraySection); + } if (LangOpts.MatrixTypes) InitBuiltinType(IncompleteMatrixIdxTy, BuiltinType::IncompleteMatrixIdx); diff --git a/clang/lib/AST/ASTImporter.cpp b/clang/lib/AST/ASTImporter.cpp index 5e5570bb42a1..b762d6a4cd38 100644 --- a/clang/lib/AST/ASTImporter.cpp +++ b/clang/lib/AST/ASTImporter.cpp @@ -5929,15 +5929,22 @@ ExpectedDecl ASTNodeImporter::VisitClassTemplateDecl(ClassTemplateDecl *D) { if (ToD) return ToD; - bool IsFriendTemplate = D->getFriendObjectKind() != Decl::FOK_None; - bool IsDependentContext = DC != LexicalDC ? LexicalDC->isDependentContext() - : DC->isDependentContext(); - bool DependentFriend = IsFriendTemplate && IsDependentContext; + // Should check if a declaration is friend in a dependent context. + // Such templates are not linked together in a declaration chain. + // The ASTImporter strategy is to map existing forward declarations to + // imported ones only if strictly necessary, otherwise import these as new + // forward declarations. In case of the "dependent friend" declarations, new + // declarations are created, but not linked in a declaration chain. + auto IsDependentFriend = [](ClassTemplateDecl *TD) { + return TD->getFriendObjectKind() != Decl::FOK_None && + TD->getLexicalDeclContext()->isDependentContext(); + }; + bool DependentFriend = IsDependentFriend(D); ClassTemplateDecl *FoundByLookup = nullptr; // We may already have a template of the same name; try to find and match it. - if (!DependentFriend && !DC->isFunctionOrMethod()) { + if (!DC->isFunctionOrMethod()) { SmallVector<NamedDecl *, 4> ConflictingDecls; auto FoundDecls = Importer.findDeclsInToCtx(DC, Name); for (auto *FoundDecl : FoundDecls) { @@ -5953,10 +5960,13 @@ ExpectedDecl ASTNodeImporter::VisitClassTemplateDecl(ClassTemplateDecl *D) { // FIXME: sufficient conditon for 'IgnoreTemplateParmDepth'? bool IgnoreTemplateParmDepth = - FoundTemplate->getFriendObjectKind() != Decl::FOK_None && - !D->specializations().empty(); + (FoundTemplate->getFriendObjectKind() != Decl::FOK_None) != + (D->getFriendObjectKind() != Decl::FOK_None); if (IsStructuralMatch(D, FoundTemplate, /*Complain=*/true, IgnoreTemplateParmDepth)) { + if (DependentFriend || IsDependentFriend(FoundTemplate)) + continue; + ClassTemplateDecl *TemplateWithDef = getTemplateDefinition(FoundTemplate); if (D->isThisDeclarationADefinition() && TemplateWithDef) @@ -9030,6 +9040,10 @@ class AttrImporter { public: AttrImporter(ASTImporter &I) : Importer(I), NImporter(I) {} + // Useful for accessing the imported attribute. + template <typename T> T *castAttrAs() { return cast<T>(ToAttr); } + template <typename T> const T *castAttrAs() const { return cast<T>(ToAttr); } + // Create an "importer" for an attribute parameter. // Result of the 'value()' of that object is to be passed to the function // 'importAttr', in the order that is expected by the attribute class. @@ -9243,6 +9257,15 @@ Expected<Attr *> ASTImporter::Import(const Attr *FromAttr) { From->args_size()); break; } + case attr::CountedBy: { + AI.cloneAttr(FromAttr); + const auto *CBA = cast<CountedByAttr>(FromAttr); + Expected<SourceRange> SR = Import(CBA->getCountedByFieldLoc()).get(); + if (!SR) + return SR.takeError(); + AI.castAttrAs<CountedByAttr>()->setCountedByFieldLoc(SR.get()); + break; + } default: { // The default branch works for attributes that have no arguments to import. diff --git a/clang/lib/AST/DeclBase.cpp b/clang/lib/AST/DeclBase.cpp index b1733c2d052a..8163f9bdaf8d 100644 --- a/clang/lib/AST/DeclBase.cpp +++ b/clang/lib/AST/DeclBase.cpp @@ -29,7 +29,6 @@ #include "clang/AST/Type.h" #include "clang/Basic/IdentifierTable.h" #include "clang/Basic/LLVM.h" -#include "clang/Basic/LangOptions.h" #include "clang/Basic/Module.h" #include "clang/Basic/ObjCRuntime.h" #include "clang/Basic/PartialDiagnostic.h" @@ -411,6 +410,79 @@ bool Decl::isFileContextDecl() const { return DC && DC->isFileContext(); } +bool Decl::isFlexibleArrayMemberLike( + ASTContext &Ctx, const Decl *D, QualType Ty, + LangOptions::StrictFlexArraysLevelKind StrictFlexArraysLevel, + bool IgnoreTemplateOrMacroSubstitution) { + // For compatibility with existing code, we treat arrays of length 0 or + // 1 as flexible array members. + const auto *CAT = Ctx.getAsConstantArrayType(Ty); + if (CAT) { + using FAMKind = LangOptions::StrictFlexArraysLevelKind; + + llvm::APInt Size = CAT->getSize(); + if (StrictFlexArraysLevel == FAMKind::IncompleteOnly) + return false; + + // GCC extension, only allowed to represent a FAM. + if (Size.isZero()) + return true; + + if (StrictFlexArraysLevel == FAMKind::ZeroOrIncomplete && Size.uge(1)) + return false; + + if (StrictFlexArraysLevel == FAMKind::OneZeroOrIncomplete && Size.uge(2)) + return false; + } else if (!Ctx.getAsIncompleteArrayType(Ty)) { + return false; + } + + if (const auto *OID = dyn_cast_if_present<ObjCIvarDecl>(D)) + return OID->getNextIvar() == nullptr; + + const auto *FD = dyn_cast_if_present<FieldDecl>(D); + if (!FD) + return false; + + if (CAT) { + // GCC treats an array memeber of a union as an FAM if the size is one or + // zero. + llvm::APInt Size = CAT->getSize(); + if (FD->getParent()->isUnion() && (Size.isZero() || Size.isOne())) + return true; + } + + // Don't consider sizes resulting from macro expansions or template argument + // substitution to form C89 tail-padded arrays. + if (IgnoreTemplateOrMacroSubstitution) { + TypeSourceInfo *TInfo = FD->getTypeSourceInfo(); + while (TInfo) { + TypeLoc TL = TInfo->getTypeLoc(); + + // Look through typedefs. + if (TypedefTypeLoc TTL = TL.getAsAdjusted<TypedefTypeLoc>()) { + const TypedefNameDecl *TDL = TTL.getTypedefNameDecl(); + TInfo = TDL->getTypeSourceInfo(); + continue; + } + + if (auto CTL = TL.getAs<ConstantArrayTypeLoc>()) { + if (const Expr *SizeExpr = + dyn_cast_if_present<IntegerLiteral>(CTL.getSizeExpr()); + !SizeExpr || SizeExpr->getExprLoc().isMacroID()) + return false; + } + + break; + } + } + + // Test that the field is the last in the structure. + RecordDecl::field_iterator FI( + DeclContext::decl_iterator(const_cast<FieldDecl *>(FD))); + return ++FI == FD->getParent()->field_end(); +} + TranslationUnitDecl *Decl::getTranslationUnitDecl() { if (auto *TUD = dyn_cast<TranslationUnitDecl>(this)) return TUD; diff --git a/clang/lib/AST/DeclCXX.cpp b/clang/lib/AST/DeclCXX.cpp index c944862fcefe..98b0a6dc28ea 100644 --- a/clang/lib/AST/DeclCXX.cpp +++ b/clang/lib/AST/DeclCXX.cpp @@ -587,6 +587,19 @@ bool CXXRecordDecl::isTriviallyCopyable() const { return true; } +bool CXXRecordDecl::isTriviallyCopyConstructible() const { + + // A trivially copy constructible class is a class that: + // -- has no non-trivial copy constructors, + if (hasNonTrivialCopyConstructor()) + return false; + // -- has a trivial destructor. + if (!hasTrivialDestructor()) + return false; + + return true; +} + void CXXRecordDecl::markedVirtualFunctionPure() { // C++ [class.abstract]p2: // A class is abstract if it has at least one pure virtual function. diff --git a/clang/lib/AST/Expr.cpp b/clang/lib/AST/Expr.cpp index a90f92d07f86..b125fc676da8 100644 --- a/clang/lib/AST/Expr.cpp +++ b/clang/lib/AST/Expr.cpp @@ -205,85 +205,22 @@ bool Expr::isKnownToHaveBooleanValue(bool Semantic) const { } bool Expr::isFlexibleArrayMemberLike( - ASTContext &Context, + ASTContext &Ctx, LangOptions::StrictFlexArraysLevelKind StrictFlexArraysLevel, bool IgnoreTemplateOrMacroSubstitution) const { - - // For compatibility with existing code, we treat arrays of length 0 or - // 1 as flexible array members. - const auto *CAT = Context.getAsConstantArrayType(getType()); - if (CAT) { - llvm::APInt Size = CAT->getSize(); - - using FAMKind = LangOptions::StrictFlexArraysLevelKind; - - if (StrictFlexArraysLevel == FAMKind::IncompleteOnly) - return false; - - // GCC extension, only allowed to represent a FAM. - if (Size == 0) - return true; - - if (StrictFlexArraysLevel == FAMKind::ZeroOrIncomplete && Size.uge(1)) - return false; - - if (StrictFlexArraysLevel == FAMKind::OneZeroOrIncomplete && Size.uge(2)) - return false; - } else if (!Context.getAsIncompleteArrayType(getType())) - return false; - const Expr *E = IgnoreParens(); + const Decl *D = nullptr; - const NamedDecl *ND = nullptr; - if (const auto *DRE = dyn_cast<DeclRefExpr>(E)) - ND = DRE->getDecl(); - else if (const auto *ME = dyn_cast<MemberExpr>(E)) - ND = ME->getMemberDecl(); + if (const auto *ME = dyn_cast<MemberExpr>(E)) + D = ME->getMemberDecl(); + else if (const auto *DRE = dyn_cast<DeclRefExpr>(E)) + D = DRE->getDecl(); else if (const auto *IRE = dyn_cast<ObjCIvarRefExpr>(E)) - return IRE->getDecl()->getNextIvar() == nullptr; - - if (!ND) - return false; + D = IRE->getDecl(); - // A flexible array member must be the last member in the class. - // FIXME: If the base type of the member expr is not FD->getParent(), - // this should not be treated as a flexible array member access. - if (const auto *FD = dyn_cast<FieldDecl>(ND)) { - // GCC treats an array memeber of a union as an FAM if the size is one or - // zero. - if (CAT) { - llvm::APInt Size = CAT->getSize(); - if (FD->getParent()->isUnion() && (Size.isZero() || Size.isOne())) - return true; - } - - // Don't consider sizes resulting from macro expansions or template argument - // substitution to form C89 tail-padded arrays. - if (IgnoreTemplateOrMacroSubstitution) { - TypeSourceInfo *TInfo = FD->getTypeSourceInfo(); - while (TInfo) { - TypeLoc TL = TInfo->getTypeLoc(); - // Look through typedefs. - if (TypedefTypeLoc TTL = TL.getAsAdjusted<TypedefTypeLoc>()) { - const TypedefNameDecl *TDL = TTL.getTypedefNameDecl(); - TInfo = TDL->getTypeSourceInfo(); - continue; - } - if (ConstantArrayTypeLoc CTL = TL.getAs<ConstantArrayTypeLoc>()) { - const Expr *SizeExpr = dyn_cast<IntegerLiteral>(CTL.getSizeExpr()); - if (!SizeExpr || SizeExpr->getExprLoc().isMacroID()) - return false; - } - break; - } - } - - RecordDecl::field_iterator FI( - DeclContext::decl_iterator(const_cast<FieldDecl *>(FD))); - return ++FI == FD->getParent()->field_end(); - } - - return false; + return Decl::isFlexibleArrayMemberLike(Ctx, D, E->getType(), + StrictFlexArraysLevel, + IgnoreTemplateOrMacroSubstitution); } const ValueDecl * diff --git a/clang/lib/AST/Interp/ByteCodeExprGen.cpp b/clang/lib/AST/Interp/ByteCodeExprGen.cpp index e6b3097a80d8..7f8bbe787324 100644 --- a/clang/lib/AST/Interp/ByteCodeExprGen.cpp +++ b/clang/lib/AST/Interp/ByteCodeExprGen.cpp @@ -114,6 +114,8 @@ bool ByteCodeExprGen<Emitter>::VisitCastExpr(const CastExpr *CE) { } case CK_FloatingCast: { + if (DiscardResult) + return this->discard(SubExpr); if (!this->visit(SubExpr)) return false; const auto *TargetSemantics = &Ctx.getFloatSemantics(CE->getType()); @@ -121,6 +123,8 @@ bool ByteCodeExprGen<Emitter>::VisitCastExpr(const CastExpr *CE) { } case CK_IntegralToFloating: { + if (DiscardResult) + return this->discard(SubExpr); std::optional<PrimType> FromT = classify(SubExpr->getType()); if (!FromT) return false; @@ -135,6 +139,9 @@ bool ByteCodeExprGen<Emitter>::VisitCastExpr(const CastExpr *CE) { case CK_FloatingToBoolean: case CK_FloatingToIntegral: { + if (DiscardResult) + return this->discard(SubExpr); + std::optional<PrimType> ToT = classify(CE->getType()); if (!ToT) diff --git a/clang/lib/AST/Interp/Descriptor.cpp b/clang/lib/AST/Interp/Descriptor.cpp index 59a952135a2d..b330e54baf33 100644 --- a/clang/lib/AST/Interp/Descriptor.cpp +++ b/clang/lib/AST/Interp/Descriptor.cpp @@ -275,8 +275,8 @@ Descriptor::Descriptor(const DeclTy &D, const Descriptor *Elem, MetadataSize MD, } /// Unknown-size arrays of composite elements. -Descriptor::Descriptor(const DeclTy &D, Descriptor *Elem, bool IsTemporary, - UnknownSize) +Descriptor::Descriptor(const DeclTy &D, const Descriptor *Elem, + bool IsTemporary, UnknownSize) : Source(D), ElemSize(Elem->getAllocSize() + sizeof(InlineDescriptor)), Size(UnknownSizeMark), MDSize(0), AllocSize(alignof(void *) + sizeof(InitMapPtr)), ElemDesc(Elem), @@ -286,7 +286,7 @@ Descriptor::Descriptor(const DeclTy &D, Descriptor *Elem, bool IsTemporary, } /// Composite records. -Descriptor::Descriptor(const DeclTy &D, Record *R, MetadataSize MD, +Descriptor::Descriptor(const DeclTy &D, const Record *R, MetadataSize MD, bool IsConst, bool IsTemporary, bool IsMutable) : Source(D), ElemSize(std::max<size_t>(alignof(void *), R->getFullSize())), Size(ElemSize), MDSize(MD.value_or(0)), AllocSize(Size + MDSize), diff --git a/clang/lib/AST/Interp/Descriptor.h b/clang/lib/AST/Interp/Descriptor.h index 8135f3d12f70..580c200f9095 100644 --- a/clang/lib/AST/Interp/Descriptor.h +++ b/clang/lib/AST/Interp/Descriptor.h @@ -100,7 +100,7 @@ public: static constexpr MetadataSize InlineDescMD = sizeof(InlineDescriptor); /// Pointer to the record, if block contains records. - Record *const ElemRecord = nullptr; + const Record *const ElemRecord = nullptr; /// Descriptor of the array element. const Descriptor *const ElemDesc = nullptr; /// Flag indicating if the block is mutable. @@ -135,10 +135,11 @@ public: unsigned NumElems, bool IsConst, bool IsTemporary, bool IsMutable); /// Allocates a descriptor for an array of composites of unknown size. - Descriptor(const DeclTy &D, Descriptor *Elem, bool IsTemporary, UnknownSize); + Descriptor(const DeclTy &D, const Descriptor *Elem, bool IsTemporary, + UnknownSize); /// Allocates a descriptor for a record. - Descriptor(const DeclTy &D, Record *R, MetadataSize MD, bool IsConst, + Descriptor(const DeclTy &D, const Record *R, MetadataSize MD, bool IsConst, bool IsTemporary, bool IsMutable); Descriptor(const DeclTy &D, MetadataSize MD); diff --git a/clang/lib/AST/Interp/Interp.cpp b/clang/lib/AST/Interp/Interp.cpp index 21ea2503b94b..9de0926b9dba 100644 --- a/clang/lib/AST/Interp/Interp.cpp +++ b/clang/lib/AST/Interp/Interp.cpp @@ -134,6 +134,18 @@ void cleanupAfterFunctionCall(InterpState &S, CodePtr OpPC) { if (CurFunc->isUnevaluatedBuiltin()) return; + // Some builtin functions require us to only look at the call site, since + // the classified parameter types do not match. + if (CurFunc->isBuiltin()) { + const auto *CE = + cast<CallExpr>(S.Current->Caller->getExpr(S.Current->getRetPC())); + for (int32_t I = CE->getNumArgs() - 1; I >= 0; --I) { + const Expr *A = CE->getArg(I); + popArg(S, A); + } + return; + } + if (S.Current->Caller && CurFunc->isVariadic()) { // CallExpr we're look for is at the return PC of the current function, i.e. // in the caller. diff --git a/clang/lib/AST/Interp/InterpBuiltin.cpp b/clang/lib/AST/Interp/InterpBuiltin.cpp index b55b1569a259..754ca96b0c64 100644 --- a/clang/lib/AST/Interp/InterpBuiltin.cpp +++ b/clang/lib/AST/Interp/InterpBuiltin.cpp @@ -164,6 +164,8 @@ static bool retPrimValue(InterpState &S, CodePtr OpPC, APValue &Result, case X: \ return Ret<X>(S, OpPC, Result); switch (*T) { + RET_CASE(PT_Ptr); + RET_CASE(PT_FnPtr); RET_CASE(PT_Float); RET_CASE(PT_Bool); RET_CASE(PT_Sint8); @@ -613,15 +615,34 @@ static bool interp__builtin_ffs(InterpState &S, CodePtr OpPC, return true; } +static bool interp__builtin_addressof(InterpState &S, CodePtr OpPC, + const InterpFrame *Frame, + const Function *Func, + const CallExpr *Call) { + PrimType PtrT = + S.getContext().classify(Call->getArg(0)->getType()).value_or(PT_Ptr); + + if (PtrT == PT_FnPtr) { + const FunctionPointer &Arg = S.Stk.peek<FunctionPointer>(); + S.Stk.push<FunctionPointer>(Arg); + } else if (PtrT == PT_Ptr) { + const Pointer &Arg = S.Stk.peek<Pointer>(); + S.Stk.push<Pointer>(Arg); + } else { + assert(false && "Unsupported pointer type passed to __builtin_addressof()"); + } + return true; +} + bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const Function *F, const CallExpr *Call) { InterpFrame *Frame = S.Current; APValue Dummy; - QualType ReturnType = Call->getCallReturnType(S.getCtx()); - std::optional<PrimType> ReturnT = S.getContext().classify(ReturnType); + std::optional<PrimType> ReturnT = S.getContext().classify(Call->getType()); + // If classify failed, we assume void. - assert(ReturnT || ReturnType->isVoidType()); + assert(ReturnT || Call->getType()->isVoidType()); switch (F->getBuiltinID()) { case Builtin::BI__builtin_is_constant_evaluated: @@ -820,6 +841,12 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const Function *F, if (!interp__builtin_ffs(S, OpPC, Frame, F, Call)) return false; break; + case Builtin::BIaddressof: + case Builtin::BI__addressof: + case Builtin::BI__builtin_addressof: + if (!interp__builtin_addressof(S, OpPC, Frame, F, Call)) + return false; + break; default: return false; diff --git a/clang/lib/AST/Interp/Program.cpp b/clang/lib/AST/Interp/Program.cpp index 52e13398163e..1daefab4dcda 100644 --- a/clang/lib/AST/Interp/Program.cpp +++ b/clang/lib/AST/Interp/Program.cpp @@ -315,14 +315,14 @@ Descriptor *Program::createDescriptor(const DeclTy &D, const Type *Ty, bool IsConst, bool IsTemporary, bool IsMutable, const Expr *Init) { // Classes and structures. - if (auto *RT = Ty->getAs<RecordType>()) { - if (auto *Record = getOrCreateRecord(RT->getDecl())) + if (const auto *RT = Ty->getAs<RecordType>()) { + if (const auto *Record = getOrCreateRecord(RT->getDecl())) return allocateDescriptor(D, Record, MDSize, IsConst, IsTemporary, IsMutable); } // Arrays. - if (auto ArrayType = Ty->getAsArrayTypeUnsafe()) { + if (const auto ArrayType = Ty->getAsArrayTypeUnsafe()) { QualType ElemTy = ArrayType->getElementType(); // Array of well-known bounds. if (auto CAT = dyn_cast<ConstantArrayType>(ArrayType)) { @@ -338,7 +338,7 @@ Descriptor *Program::createDescriptor(const DeclTy &D, const Type *Ty, } else { // Arrays of composites. In this case, the array is a list of pointers, // followed by the actual elements. - Descriptor *ElemDesc = createDescriptor( + const Descriptor *ElemDesc = createDescriptor( D, ElemTy.getTypePtr(), std::nullopt, IsConst, IsTemporary); if (!ElemDesc) return nullptr; @@ -358,8 +358,8 @@ Descriptor *Program::createDescriptor(const DeclTy &D, const Type *Ty, return allocateDescriptor(D, *T, IsTemporary, Descriptor::UnknownSize{}); } else { - Descriptor *Desc = createDescriptor(D, ElemTy.getTypePtr(), MDSize, - IsConst, IsTemporary); + const Descriptor *Desc = createDescriptor(D, ElemTy.getTypePtr(), + MDSize, IsConst, IsTemporary); if (!Desc) return nullptr; return allocateDescriptor(D, Desc, IsTemporary, @@ -369,14 +369,14 @@ Descriptor *Program::createDescriptor(const DeclTy &D, const Type *Ty, } // Atomic types. - if (auto *AT = Ty->getAs<AtomicType>()) { + if (const auto *AT = Ty->getAs<AtomicType>()) { const Type *InnerTy = AT->getValueType().getTypePtr(); return createDescriptor(D, InnerTy, MDSize, IsConst, IsTemporary, IsMutable); } // Complex types - represented as arrays of elements. - if (auto *CT = Ty->getAs<ComplexType>()) { + if (const auto *CT = Ty->getAs<ComplexType>()) { PrimType ElemTy = *Ctx.classify(CT->getElementType()); return allocateDescriptor(D, ElemTy, MDSize, 2, IsConst, IsTemporary, IsMutable); diff --git a/clang/lib/AST/Type.cpp b/clang/lib/AST/Type.cpp index a894d3289eb1..b419fc8836b0 100644 --- a/clang/lib/AST/Type.cpp +++ b/clang/lib/AST/Type.cpp @@ -2604,19 +2604,22 @@ bool QualType::isTrivialType(const ASTContext &Context) const { return false; } -bool QualType::isTriviallyCopyableType(const ASTContext &Context) const { - if ((*this)->isArrayType()) - return Context.getBaseElementType(*this).isTriviallyCopyableType(Context); +static bool isTriviallyCopyableTypeImpl(const QualType &type, + const ASTContext &Context, + bool IsCopyConstructible) { + if (type->isArrayType()) + return isTriviallyCopyableTypeImpl(Context.getBaseElementType(type), + Context, IsCopyConstructible); - if (hasNonTrivialObjCLifetime()) + if (type.hasNonTrivialObjCLifetime()) return false; // C++11 [basic.types]p9 - See Core 2094 // Scalar types, trivially copyable class types, arrays of such types, and // cv-qualified versions of these types are collectively - // called trivially copyable types. + // called trivially copy constructible types. - QualType CanonicalType = getCanonicalType(); + QualType CanonicalType = type.getCanonicalType(); if (CanonicalType->isDependentType()) return false; @@ -2634,16 +2637,29 @@ bool QualType::isTriviallyCopyableType(const ASTContext &Context) const { if (const auto *RT = CanonicalType->getAs<RecordType>()) { if (const auto *ClassDecl = dyn_cast<CXXRecordDecl>(RT->getDecl())) { - if (!ClassDecl->isTriviallyCopyable()) return false; + if (IsCopyConstructible) { + return ClassDecl->isTriviallyCopyConstructible(); + } else { + return ClassDecl->isTriviallyCopyable(); + } } - return true; } - // No other types can match. return false; } +bool QualType::isTriviallyCopyableType(const ASTContext &Context) const { + return isTriviallyCopyableTypeImpl(*this, Context, + /*IsCopyConstructible=*/false); +} + +bool QualType::isTriviallyCopyConstructibleType( + const ASTContext &Context) const { + return isTriviallyCopyableTypeImpl(*this, Context, + /*IsCopyConstructible=*/true); +} + bool QualType::isTriviallyRelocatableType(const ASTContext &Context) const { QualType BaseElementType = Context.getBaseElementType(*this); diff --git a/clang/lib/Analysis/ExprMutationAnalyzer.cpp b/clang/lib/Analysis/ExprMutationAnalyzer.cpp index 624a643cc60e..c0de9277ff86 100644 --- a/clang/lib/Analysis/ExprMutationAnalyzer.cpp +++ b/clang/lib/Analysis/ExprMutationAnalyzer.cpp @@ -15,6 +15,81 @@ namespace clang { using namespace ast_matchers; +// Check if result of Source expression could be a Target expression. +// Checks: +// - Implicit Casts +// - Binary Operators +// - ConditionalOperator +// - BinaryConditionalOperator +static bool canExprResolveTo(const Expr *Source, const Expr *Target) { + + const auto IgnoreDerivedToBase = [](const Expr *E, auto Matcher) { + if (Matcher(E)) + return true; + if (const auto *Cast = dyn_cast<ImplicitCastExpr>(E)) { + if ((Cast->getCastKind() == CK_DerivedToBase || + Cast->getCastKind() == CK_UncheckedDerivedToBase) && + Matcher(Cast->getSubExpr())) + return true; + } + return false; + }; + + const auto EvalCommaExpr = [](const Expr *E, auto Matcher) { + const Expr *Result = E; + while (const auto *BOComma = + dyn_cast_or_null<BinaryOperator>(Result->IgnoreParens())) { + if (!BOComma->isCommaOp()) + break; + Result = BOComma->getRHS(); + } + + return Result != E && Matcher(Result); + }; + + // The 'ConditionalOperatorM' matches on `<anything> ? <expr> : <expr>`. + // This matching must be recursive because `<expr>` can be anything resolving + // to the `InnerMatcher`, for example another conditional operator. + // The edge-case `BaseClass &b = <cond> ? DerivedVar1 : DerivedVar2;` + // is handled, too. The implicit cast happens outside of the conditional. + // This is matched by `IgnoreDerivedToBase(canResolveToExpr(InnerMatcher))` + // below. + const auto ConditionalOperatorM = [Target](const Expr *E) { + if (const auto *OP = dyn_cast<ConditionalOperator>(E)) { + if (const auto *TE = OP->getTrueExpr()->IgnoreParens()) + if (canExprResolveTo(TE, Target)) + return true; + if (const auto *FE = OP->getFalseExpr()->IgnoreParens()) + if (canExprResolveTo(FE, Target)) + return true; + } + return false; + }; + + const auto ElvisOperator = [Target](const Expr *E) { + if (const auto *OP = dyn_cast<BinaryConditionalOperator>(E)) { + if (const auto *TE = OP->getTrueExpr()->IgnoreParens()) + if (canExprResolveTo(TE, Target)) + return true; + if (const auto *FE = OP->getFalseExpr()->IgnoreParens()) + if (canExprResolveTo(FE, Target)) + return true; + } + return false; + }; + + const Expr *SourceExprP = Source->IgnoreParens(); + return IgnoreDerivedToBase(SourceExprP, + [&](const Expr *E) { + return E == Target || ConditionalOperatorM(E) || + ElvisOperator(E); + }) || + EvalCommaExpr(SourceExprP, [&](const Expr *E) { + return IgnoreDerivedToBase( + E->IgnoreParens(), [&](const Expr *EE) { return EE == Target; }); + }); +} + namespace { AST_MATCHER_P(LambdaExpr, hasCaptureInit, const Expr *, E) { @@ -27,56 +102,14 @@ AST_MATCHER_P(CXXForRangeStmt, hasRangeStmt, return InnerMatcher.matches(*Range, Finder, Builder); } -AST_MATCHER_P(Expr, maybeEvalCommaExpr, ast_matchers::internal::Matcher<Expr>, - InnerMatcher) { - const Expr *Result = &Node; - while (const auto *BOComma = - dyn_cast_or_null<BinaryOperator>(Result->IgnoreParens())) { - if (!BOComma->isCommaOp()) - break; - Result = BOComma->getRHS(); - } - return InnerMatcher.matches(*Result, Finder, Builder); -} - -AST_MATCHER_P(Stmt, canResolveToExpr, ast_matchers::internal::Matcher<Stmt>, - InnerMatcher) { +AST_MATCHER_P(Stmt, canResolveToExpr, const Stmt *, Inner) { auto *Exp = dyn_cast<Expr>(&Node); - if (!Exp) { - return stmt().matches(Node, Finder, Builder); - } - - auto DerivedToBase = [](const ast_matchers::internal::Matcher<Expr> &Inner) { - return implicitCastExpr(anyOf(hasCastKind(CK_DerivedToBase), - hasCastKind(CK_UncheckedDerivedToBase)), - hasSourceExpression(Inner)); - }; - auto IgnoreDerivedToBase = - [&DerivedToBase](const ast_matchers::internal::Matcher<Expr> &Inner) { - return ignoringParens(expr(anyOf(Inner, DerivedToBase(Inner)))); - }; - - // The 'ConditionalOperator' matches on `<anything> ? <expr> : <expr>`. - // This matching must be recursive because `<expr>` can be anything resolving - // to the `InnerMatcher`, for example another conditional operator. - // The edge-case `BaseClass &b = <cond> ? DerivedVar1 : DerivedVar2;` - // is handled, too. The implicit cast happens outside of the conditional. - // This is matched by `IgnoreDerivedToBase(canResolveToExpr(InnerMatcher))` - // below. - auto const ConditionalOperator = conditionalOperator(anyOf( - hasTrueExpression(ignoringParens(canResolveToExpr(InnerMatcher))), - hasFalseExpression(ignoringParens(canResolveToExpr(InnerMatcher))))); - auto const ElvisOperator = binaryConditionalOperator(anyOf( - hasTrueExpression(ignoringParens(canResolveToExpr(InnerMatcher))), - hasFalseExpression(ignoringParens(canResolveToExpr(InnerMatcher))))); - - auto const ComplexMatcher = ignoringParens( - expr(anyOf(IgnoreDerivedToBase(InnerMatcher), - maybeEvalCommaExpr(IgnoreDerivedToBase(InnerMatcher)), - IgnoreDerivedToBase(ConditionalOperator), - IgnoreDerivedToBase(ElvisOperator)))); - - return ComplexMatcher.matches(*Exp, Finder, Builder); + if (!Exp) + return true; + auto *Target = dyn_cast<Expr>(Inner); + if (!Target) + return false; + return canExprResolveTo(Exp, Target); } // Similar to 'hasAnyArgument', but does not work because 'InitListExpr' does @@ -121,6 +154,12 @@ AST_MATCHER_P(GenericSelectionExpr, hasControllingExpr, return InnerMatcher.matches(*Node.getControllingExpr(), Finder, Builder); } +template <typename T> +ast_matchers::internal::Matcher<T> +findFirst(const ast_matchers::internal::Matcher<T> &Matcher) { + return anyOf(Matcher, hasDescendant(Matcher)); +} + const auto nonConstReferenceType = [] { return hasUnqualifiedDesugaredType( referenceType(pointee(unless(isConstQualified())))); @@ -220,8 +259,8 @@ bool ExprMutationAnalyzer::isUnevaluated(const Stmt *Exp, const Stmt &Stm, return selectFirst<Stmt>( NodeID<Expr>::value, match( - findAll( - stmt(canResolveToExpr(equalsNode(Exp)), + findFirst( + stmt(canResolveToExpr(Exp), anyOf( // `Exp` is part of the underlying expression of // decltype/typeof if it has an ancestor of @@ -275,44 +314,41 @@ const Stmt *ExprMutationAnalyzer::findDeclPointeeMutation( const Stmt *ExprMutationAnalyzer::findDirectMutation(const Expr *Exp) { // LHS of any assignment operators. - const auto AsAssignmentLhs = binaryOperator( - isAssignmentOperator(), hasLHS(canResolveToExpr(equalsNode(Exp)))); + const auto AsAssignmentLhs = + binaryOperator(isAssignmentOperator(), hasLHS(canResolveToExpr(Exp))); // Operand of increment/decrement operators. const auto AsIncDecOperand = unaryOperator(anyOf(hasOperatorName("++"), hasOperatorName("--")), - hasUnaryOperand(canResolveToExpr(equalsNode(Exp)))); + hasUnaryOperand(canResolveToExpr(Exp))); // Invoking non-const member function. // A member function is assumed to be non-const when it is unresolved. const auto NonConstMethod = cxxMethodDecl(unless(isConst())); const auto AsNonConstThis = expr(anyOf( - cxxMemberCallExpr(on(canResolveToExpr(equalsNode(Exp))), - unless(isConstCallee())), + cxxMemberCallExpr(on(canResolveToExpr(Exp)), unless(isConstCallee())), cxxOperatorCallExpr(callee(NonConstMethod), - hasArgument(0, canResolveToExpr(equalsNode(Exp)))), + hasArgument(0, canResolveToExpr(Exp))), // In case of a templated type, calling overloaded operators is not // resolved and modelled as `binaryOperator` on a dependent type. // Such instances are considered a modification, because they can modify // in different instantiations of the template. - binaryOperator( - hasEitherOperand(ignoringImpCasts(canResolveToExpr(equalsNode(Exp)))), - isTypeDependent()), + binaryOperator(isTypeDependent(), + hasEitherOperand(ignoringImpCasts(canResolveToExpr(Exp)))), // Within class templates and member functions the member expression might // not be resolved. In that case, the `callExpr` is considered to be a // modification. - callExpr( - callee(expr(anyOf(unresolvedMemberExpr(hasObjectExpression( - canResolveToExpr(equalsNode(Exp)))), - cxxDependentScopeMemberExpr(hasObjectExpression( - canResolveToExpr(equalsNode(Exp)))))))), + callExpr(callee(expr(anyOf( + unresolvedMemberExpr(hasObjectExpression(canResolveToExpr(Exp))), + cxxDependentScopeMemberExpr( + hasObjectExpression(canResolveToExpr(Exp))))))), // Match on a call to a known method, but the call itself is type // dependent (e.g. `vector<T> v; v.push(T{});` in a templated function). - callExpr(allOf(isTypeDependent(), - callee(memberExpr(hasDeclaration(NonConstMethod), - hasObjectExpression(canResolveToExpr( - equalsNode(Exp))))))))); + callExpr(allOf( + isTypeDependent(), + callee(memberExpr(hasDeclaration(NonConstMethod), + hasObjectExpression(canResolveToExpr(Exp)))))))); // Taking address of 'Exp'. // We're assuming 'Exp' is mutated as soon as its address is taken, though in @@ -322,11 +358,10 @@ const Stmt *ExprMutationAnalyzer::findDirectMutation(const Expr *Exp) { unaryOperator(hasOperatorName("&"), // A NoOp implicit cast is adding const. unless(hasParent(implicitCastExpr(hasCastKind(CK_NoOp)))), - hasUnaryOperand(canResolveToExpr(equalsNode(Exp)))); - const auto AsPointerFromArrayDecay = - castExpr(hasCastKind(CK_ArrayToPointerDecay), - unless(hasParent(arraySubscriptExpr())), - has(canResolveToExpr(equalsNode(Exp)))); + hasUnaryOperand(canResolveToExpr(Exp))); + const auto AsPointerFromArrayDecay = castExpr( + hasCastKind(CK_ArrayToPointerDecay), + unless(hasParent(arraySubscriptExpr())), has(canResolveToExpr(Exp))); // Treat calling `operator->()` of move-only classes as taking address. // These are typically smart pointers with unique ownership so we treat // mutation of pointee as mutation of the smart pointer itself. @@ -334,7 +369,7 @@ const Stmt *ExprMutationAnalyzer::findDirectMutation(const Expr *Exp) { hasOverloadedOperatorName("->"), callee( cxxMethodDecl(ofClass(isMoveOnly()), returns(nonConstPointerType()))), - argumentCountIs(1), hasArgument(0, canResolveToExpr(equalsNode(Exp)))); + argumentCountIs(1), hasArgument(0, canResolveToExpr(Exp))); // Used as non-const-ref argument when calling a function. // An argument is assumed to be non-const-ref when the function is unresolved. @@ -342,8 +377,8 @@ const Stmt *ExprMutationAnalyzer::findDirectMutation(const Expr *Exp) { // findFunctionArgMutation which has additional smarts for handling forwarding // references. const auto NonConstRefParam = forEachArgumentWithParamType( - anyOf(canResolveToExpr(equalsNode(Exp)), - memberExpr(hasObjectExpression(canResolveToExpr(equalsNode(Exp))))), + anyOf(canResolveToExpr(Exp), + memberExpr(hasObjectExpression(canResolveToExpr(Exp)))), nonConstReferenceType()); const auto NotInstantiated = unless(hasDeclaration(isInstantiated())); const auto TypeDependentCallee = @@ -354,19 +389,17 @@ const Stmt *ExprMutationAnalyzer::findDirectMutation(const Expr *Exp) { const auto AsNonConstRefArg = anyOf( callExpr(NonConstRefParam, NotInstantiated), cxxConstructExpr(NonConstRefParam, NotInstantiated), - callExpr(TypeDependentCallee, - hasAnyArgument(canResolveToExpr(equalsNode(Exp)))), - cxxUnresolvedConstructExpr( - hasAnyArgument(canResolveToExpr(equalsNode(Exp)))), + callExpr(TypeDependentCallee, hasAnyArgument(canResolveToExpr(Exp))), + cxxUnresolvedConstructExpr(hasAnyArgument(canResolveToExpr(Exp))), // Previous False Positive in the following Code: // `template <typename T> void f() { int i = 42; new Type<T>(i); }` // Where the constructor of `Type` takes its argument as reference. // The AST does not resolve in a `cxxConstructExpr` because it is // type-dependent. - parenListExpr(hasDescendant(expr(canResolveToExpr(equalsNode(Exp))))), + parenListExpr(hasDescendant(expr(canResolveToExpr(Exp)))), // If the initializer is for a reference type, there is no cast for // the variable. Values are cast to RValue first. - initListExpr(hasAnyInit(expr(canResolveToExpr(equalsNode(Exp)))))); + initListExpr(hasAnyInit(expr(canResolveToExpr(Exp))))); // Captured by a lambda by reference. // If we're initializing a capture with 'Exp' directly then we're initializing @@ -380,76 +413,72 @@ const Stmt *ExprMutationAnalyzer::findDirectMutation(const Expr *Exp) { // For returning by const-ref there will be an ImplicitCastExpr <NoOp> (for // adding const.) const auto AsNonConstRefReturn = - returnStmt(hasReturnValue(canResolveToExpr(equalsNode(Exp)))); + returnStmt(hasReturnValue(canResolveToExpr(Exp))); // It is used as a non-const-reference for initalizing a range-for loop. - const auto AsNonConstRefRangeInit = cxxForRangeStmt( - hasRangeInit(declRefExpr(allOf(canResolveToExpr(equalsNode(Exp)), - hasType(nonConstReferenceType()))))); + const auto AsNonConstRefRangeInit = cxxForRangeStmt(hasRangeInit(declRefExpr( + allOf(canResolveToExpr(Exp), hasType(nonConstReferenceType()))))); const auto Matches = match( - traverse(TK_AsIs, - findAll(stmt(anyOf(AsAssignmentLhs, AsIncDecOperand, - AsNonConstThis, AsAmpersandOperand, - AsPointerFromArrayDecay, AsOperatorArrowThis, - AsNonConstRefArg, AsLambdaRefCaptureInit, - AsNonConstRefReturn, AsNonConstRefRangeInit)) - .bind("stmt"))), + traverse( + TK_AsIs, + findFirst(stmt(anyOf(AsAssignmentLhs, AsIncDecOperand, AsNonConstThis, + AsAmpersandOperand, AsPointerFromArrayDecay, + AsOperatorArrowThis, AsNonConstRefArg, + AsLambdaRefCaptureInit, AsNonConstRefReturn, + AsNonConstRefRangeInit)) + .bind("stmt"))), Stm, Context); return selectFirst<Stmt>("stmt", Matches); } const Stmt *ExprMutationAnalyzer::findMemberMutation(const Expr *Exp) { // Check whether any member of 'Exp' is mutated. - const auto MemberExprs = - match(findAll(expr(anyOf(memberExpr(hasObjectExpression( - canResolveToExpr(equalsNode(Exp)))), - cxxDependentScopeMemberExpr(hasObjectExpression( - canResolveToExpr(equalsNode(Exp)))), - binaryOperator(hasOperatorName(".*"), - hasLHS(equalsNode(Exp))))) - .bind(NodeID<Expr>::value)), - Stm, Context); + const auto MemberExprs = match( + findAll(expr(anyOf(memberExpr(hasObjectExpression(canResolveToExpr(Exp))), + cxxDependentScopeMemberExpr( + hasObjectExpression(canResolveToExpr(Exp))), + binaryOperator(hasOperatorName(".*"), + hasLHS(equalsNode(Exp))))) + .bind(NodeID<Expr>::value)), + Stm, Context); return findExprMutation(MemberExprs); } const Stmt *ExprMutationAnalyzer::findArrayElementMutation(const Expr *Exp) { // Check whether any element of an array is mutated. - const auto SubscriptExprs = - match(findAll(arraySubscriptExpr( - anyOf(hasBase(canResolveToExpr(equalsNode(Exp))), - hasBase(implicitCastExpr( - allOf(hasCastKind(CK_ArrayToPointerDecay), - hasSourceExpression(canResolveToExpr( - equalsNode(Exp)))))))) - .bind(NodeID<Expr>::value)), - Stm, Context); + const auto SubscriptExprs = match( + findAll(arraySubscriptExpr( + anyOf(hasBase(canResolveToExpr(Exp)), + hasBase(implicitCastExpr(allOf( + hasCastKind(CK_ArrayToPointerDecay), + hasSourceExpression(canResolveToExpr(Exp))))))) + .bind(NodeID<Expr>::value)), + Stm, Context); return findExprMutation(SubscriptExprs); } const Stmt *ExprMutationAnalyzer::findCastMutation(const Expr *Exp) { // If the 'Exp' is explicitly casted to a non-const reference type the // 'Exp' is considered to be modified. - const auto ExplicitCast = match( - findAll( - stmt(castExpr(hasSourceExpression(canResolveToExpr(equalsNode(Exp))), - explicitCastExpr( - hasDestinationType(nonConstReferenceType())))) - .bind("stmt")), - Stm, Context); + const auto ExplicitCast = + match(findFirst(stmt(castExpr(hasSourceExpression(canResolveToExpr(Exp)), + explicitCastExpr(hasDestinationType( + nonConstReferenceType())))) + .bind("stmt")), + Stm, Context); if (const auto *CastStmt = selectFirst<Stmt>("stmt", ExplicitCast)) return CastStmt; // If 'Exp' is casted to any non-const reference type, check the castExpr. const auto Casts = match( - findAll( - expr(castExpr(hasSourceExpression(canResolveToExpr(equalsNode(Exp))), - anyOf(explicitCastExpr( - hasDestinationType(nonConstReferenceType())), - implicitCastExpr(hasImplicitDestinationType( - nonConstReferenceType()))))) - .bind(NodeID<Expr>::value)), + findAll(expr(castExpr(hasSourceExpression(canResolveToExpr(Exp)), + anyOf(explicitCastExpr(hasDestinationType( + nonConstReferenceType())), + implicitCastExpr(hasImplicitDestinationType( + nonConstReferenceType()))))) + .bind(NodeID<Expr>::value)), Stm, Context); if (const Stmt *S = findExprMutation(Casts)) @@ -458,7 +487,7 @@ const Stmt *ExprMutationAnalyzer::findCastMutation(const Expr *Exp) { const auto Calls = match(findAll(callExpr(callee(namedDecl( hasAnyName("::std::move", "::std::forward"))), - hasArgument(0, canResolveToExpr(equalsNode(Exp)))) + hasArgument(0, canResolveToExpr(Exp))) .bind("expr")), Stm, Context); return findExprMutation(Calls); @@ -473,16 +502,16 @@ const Stmt *ExprMutationAnalyzer::findRangeLoopMutation(const Expr *Exp) { // array is considered modified if the loop-variable is a non-const reference. const auto DeclStmtToNonRefToArray = declStmt(hasSingleDecl(varDecl(hasType( hasUnqualifiedDesugaredType(referenceType(pointee(arrayType()))))))); - const auto RefToArrayRefToElements = - match(findAll(stmt(cxxForRangeStmt( - hasLoopVariable( - varDecl(anyOf(hasType(nonConstReferenceType()), - hasType(nonConstPointerType()))) - .bind(NodeID<Decl>::value)), - hasRangeStmt(DeclStmtToNonRefToArray), - hasRangeInit(canResolveToExpr(equalsNode(Exp))))) - .bind("stmt")), - Stm, Context); + const auto RefToArrayRefToElements = match( + findFirst(stmt(cxxForRangeStmt( + hasLoopVariable( + varDecl(anyOf(hasType(nonConstReferenceType()), + hasType(nonConstPointerType()))) + .bind(NodeID<Decl>::value)), + hasRangeStmt(DeclStmtToNonRefToArray), + hasRangeInit(canResolveToExpr(Exp)))) + .bind("stmt")), + Stm, Context); if (const auto *BadRangeInitFromArray = selectFirst<Stmt>("stmt", RefToArrayRefToElements)) @@ -505,12 +534,12 @@ const Stmt *ExprMutationAnalyzer::findRangeLoopMutation(const Expr *Exp) { hasSingleDecl(varDecl(hasType(hasUnqualifiedDesugaredType(referenceType( pointee(hasDeclaration(cxxRecordDecl(HasAnyNonConstIterator))))))))); - const auto RefToContainerBadIterators = - match(findAll(stmt(cxxForRangeStmt(allOf( - hasRangeStmt(DeclStmtToNonConstIteratorContainer), - hasRangeInit(canResolveToExpr(equalsNode(Exp)))))) - .bind("stmt")), - Stm, Context); + const auto RefToContainerBadIterators = match( + findFirst(stmt(cxxForRangeStmt(allOf( + hasRangeStmt(DeclStmtToNonConstIteratorContainer), + hasRangeInit(canResolveToExpr(Exp))))) + .bind("stmt")), + Stm, Context); if (const auto *BadIteratorsContainer = selectFirst<Stmt>("stmt", RefToContainerBadIterators)) @@ -522,7 +551,7 @@ const Stmt *ExprMutationAnalyzer::findRangeLoopMutation(const Expr *Exp) { match(findAll(cxxForRangeStmt( hasLoopVariable(varDecl(hasType(nonConstReferenceType())) .bind(NodeID<Decl>::value)), - hasRangeInit(canResolveToExpr(equalsNode(Exp))))), + hasRangeInit(canResolveToExpr(Exp)))), Stm, Context); return findDeclMutation(LoopVars); } @@ -531,31 +560,29 @@ const Stmt *ExprMutationAnalyzer::findReferenceMutation(const Expr *Exp) { // Follow non-const reference returned by `operator*()` of move-only classes. // These are typically smart pointers with unique ownership so we treat // mutation of pointee as mutation of the smart pointer itself. - const auto Ref = - match(findAll(cxxOperatorCallExpr( - hasOverloadedOperatorName("*"), - callee(cxxMethodDecl(ofClass(isMoveOnly()), - returns(nonConstReferenceType()))), - argumentCountIs(1), - hasArgument(0, canResolveToExpr(equalsNode(Exp)))) - .bind(NodeID<Expr>::value)), - Stm, Context); + const auto Ref = match( + findAll(cxxOperatorCallExpr( + hasOverloadedOperatorName("*"), + callee(cxxMethodDecl(ofClass(isMoveOnly()), + returns(nonConstReferenceType()))), + argumentCountIs(1), hasArgument(0, canResolveToExpr(Exp))) + .bind(NodeID<Expr>::value)), + Stm, Context); if (const Stmt *S = findExprMutation(Ref)) return S; // If 'Exp' is bound to a non-const reference, check all declRefExpr to that. const auto Refs = match( stmt(forEachDescendant( - varDecl( - hasType(nonConstReferenceType()), - hasInitializer(anyOf(canResolveToExpr(equalsNode(Exp)), - memberExpr(hasObjectExpression( - canResolveToExpr(equalsNode(Exp)))))), - hasParent(declStmt().bind("stmt")), - // Don't follow the reference in range statement, we've - // handled that separately. - unless(hasParent(declStmt(hasParent( - cxxForRangeStmt(hasRangeStmt(equalsBoundNode("stmt")))))))) + varDecl(hasType(nonConstReferenceType()), + hasInitializer(anyOf( + canResolveToExpr(Exp), + memberExpr(hasObjectExpression(canResolveToExpr(Exp))))), + hasParent(declStmt().bind("stmt")), + // Don't follow the reference in range statement, we've + // handled that separately. + unless(hasParent(declStmt(hasParent(cxxForRangeStmt( + hasRangeStmt(equalsBoundNode("stmt")))))))) .bind(NodeID<Decl>::value))), Stm, Context); return findDeclMutation(Refs); @@ -563,7 +590,7 @@ const Stmt *ExprMutationAnalyzer::findReferenceMutation(const Expr *Exp) { const Stmt *ExprMutationAnalyzer::findFunctionArgMutation(const Expr *Exp) { const auto NonConstRefParam = forEachArgumentWithParam( - canResolveToExpr(equalsNode(Exp)), + canResolveToExpr(Exp), parmVarDecl(hasType(nonConstReferenceType())).bind("parm")); const auto IsInstantiated = hasDeclaration(isInstantiated()); const auto FuncDecl = hasDeclaration(functionDecl().bind("func")); diff --git a/clang/lib/Basic/Targets/AArch64.cpp b/clang/lib/Basic/Targets/AArch64.cpp index 2f8395cb8932..9ebaf4d40cd7 100644 --- a/clang/lib/Basic/Targets/AArch64.cpp +++ b/clang/lib/Basic/Targets/AArch64.cpp @@ -226,6 +226,7 @@ bool AArch64TargetInfo::validateBranchProtection(StringRef Spec, StringRef, BPI.BranchTargetEnforcement = PBP.BranchTargetEnforcement; BPI.BranchProtectionPAuthLR = PBP.BranchProtectionPAuthLR; + BPI.GuardedControlStack = PBP.GuardedControlStack; return true; } @@ -532,6 +533,9 @@ void AArch64TargetInfo::getTargetDefines(const LangOptions &Opts, if (Opts.BranchTargetEnforcement) Builder.defineMacro("__ARM_FEATURE_BTI_DEFAULT", "1"); + if (Opts.GuardedControlStack) + Builder.defineMacro("__ARM_FEATURE_GCS_DEFAULT", "1"); + if (HasLS64) Builder.defineMacro("__ARM_FEATURE_LS64", "1"); @@ -544,6 +548,9 @@ void AArch64TargetInfo::getTargetDefines(const LangOptions &Opts, if (HasD128) Builder.defineMacro("__ARM_FEATURE_SYSREG128", "1"); + if (HasGCS) + Builder.defineMacro("__ARM_FEATURE_GCS", "1"); + if (*ArchInfo == llvm::AArch64::ARMV8_1A) getTargetDefinesARMV81A(Opts, Builder); else if (*ArchInfo == llvm::AArch64::ARMV8_2A) diff --git a/clang/lib/Basic/Targets/RISCV.cpp b/clang/lib/Basic/Targets/RISCV.cpp index daaa8639ae83..fb312b6cf26e 100644 --- a/clang/lib/Basic/Targets/RISCV.cpp +++ b/clang/lib/Basic/Targets/RISCV.cpp @@ -163,9 +163,8 @@ void RISCVTargetInfo::getTargetDefines(const LangOptions &Opts, auto ExtName = Extension.first; auto ExtInfo = Extension.second; - Builder.defineMacro( - Twine("__riscv_", ExtName), - Twine(getVersionValue(ExtInfo.MajorVersion, ExtInfo.MinorVersion))); + Builder.defineMacro(Twine("__riscv_", ExtName), + Twine(getVersionValue(ExtInfo.Major, ExtInfo.Minor))); } if (ISAInfo->hasExtension("m") || ISAInfo->hasExtension("zmmul")) diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index f71dbf1729a1..998fcc3af581 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -25,6 +25,7 @@ #include "clang/AST/Attr.h" #include "clang/AST/Decl.h" #include "clang/AST/OSLog.h" +#include "clang/AST/OperationKinds.h" #include "clang/Basic/TargetBuiltins.h" #include "clang/Basic/TargetInfo.h" #include "clang/Basic/TargetOptions.h" @@ -818,6 +819,238 @@ CodeGenFunction::evaluateOrEmitBuiltinObjectSize(const Expr *E, unsigned Type, return ConstantInt::get(ResType, ObjectSize, /*isSigned=*/true); } +const FieldDecl *CodeGenFunction::FindFlexibleArrayMemberField( + ASTContext &Ctx, const RecordDecl *RD, StringRef Name, uint64_t &Offset) { + const LangOptions::StrictFlexArraysLevelKind StrictFlexArraysLevel = + getLangOpts().getStrictFlexArraysLevel(); + unsigned FieldNo = 0; + bool IsUnion = RD->isUnion(); + + for (const Decl *D : RD->decls()) { + if (const auto *Field = dyn_cast<FieldDecl>(D); + Field && (Name.empty() || Field->getNameAsString() == Name) && + Decl::isFlexibleArrayMemberLike( + Ctx, Field, Field->getType(), StrictFlexArraysLevel, + /*IgnoreTemplateOrMacroSubstitution=*/true)) { + const ASTRecordLayout &Layout = Ctx.getASTRecordLayout(RD); + Offset += Layout.getFieldOffset(FieldNo); + return Field; + } + + if (const auto *Record = dyn_cast<RecordDecl>(D)) + if (const FieldDecl *Field = + FindFlexibleArrayMemberField(Ctx, Record, Name, Offset)) { + const ASTRecordLayout &Layout = Ctx.getASTRecordLayout(RD); + Offset += Layout.getFieldOffset(FieldNo); + return Field; + } + + if (!IsUnion && isa<FieldDecl>(D)) + ++FieldNo; + } + + return nullptr; +} + +static unsigned CountCountedByAttrs(const RecordDecl *RD) { + unsigned Num = 0; + + for (const Decl *D : RD->decls()) { + if (const auto *FD = dyn_cast<FieldDecl>(D); + FD && FD->hasAttr<CountedByAttr>()) { + return ++Num; + } + + if (const auto *Rec = dyn_cast<RecordDecl>(D)) + Num += CountCountedByAttrs(Rec); + } + + return Num; +} + +llvm::Value * +CodeGenFunction::emitFlexibleArrayMemberSize(const Expr *E, unsigned Type, + llvm::IntegerType *ResType) { + // The code generated here calculates the size of a struct with a flexible + // array member that uses the counted_by attribute. There are two instances + // we handle: + // + // struct s { + // unsigned long flags; + // int count; + // int array[] __attribute__((counted_by(count))); + // } + // + // 1) bdos of the flexible array itself: + // + // __builtin_dynamic_object_size(p->array, 1) == + // p->count * sizeof(*p->array) + // + // 2) bdos of a pointer into the flexible array: + // + // __builtin_dynamic_object_size(&p->array[42], 1) == + // (p->count - 42) * sizeof(*p->array) + // + // 2) bdos of the whole struct, including the flexible array: + // + // __builtin_dynamic_object_size(p, 1) == + // max(sizeof(struct s), + // offsetof(struct s, array) + p->count * sizeof(*p->array)) + // + ASTContext &Ctx = getContext(); + const Expr *Base = E->IgnoreParenImpCasts(); + const Expr *Idx = nullptr; + + if (const auto *UO = dyn_cast<UnaryOperator>(Base); + UO && UO->getOpcode() == UO_AddrOf) { + Expr *SubExpr = UO->getSubExpr()->IgnoreParenImpCasts(); + if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(SubExpr)) { + Base = ASE->getBase()->IgnoreParenImpCasts(); + Idx = ASE->getIdx()->IgnoreParenImpCasts(); + + if (const auto *IL = dyn_cast<IntegerLiteral>(Idx)) { + int64_t Val = IL->getValue().getSExtValue(); + if (Val < 0) + return getDefaultBuiltinObjectSizeResult(Type, ResType); + + if (Val == 0) + // The index is 0, so we don't need to take it into account. + Idx = nullptr; + } + } else { + // Potential pointer to another element in the struct. + Base = SubExpr; + } + } + + // Get the flexible array member Decl. + const RecordDecl *OuterRD = nullptr; + std::string FAMName; + if (const auto *ME = dyn_cast<MemberExpr>(Base)) { + // Check if \p Base is referencing the FAM itself. + const ValueDecl *VD = ME->getMemberDecl(); + OuterRD = VD->getDeclContext()->getOuterLexicalRecordContext(); + FAMName = VD->getNameAsString(); + } else if (const auto *DRE = dyn_cast<DeclRefExpr>(Base)) { + // Check if we're pointing to the whole struct. + QualType Ty = DRE->getDecl()->getType(); + if (Ty->isPointerType()) + Ty = Ty->getPointeeType(); + OuterRD = Ty->getAsRecordDecl(); + + // If we have a situation like this: + // + // struct union_of_fams { + // int flags; + // union { + // signed char normal_field; + // struct { + // int count1; + // int arr1[] __counted_by(count1); + // }; + // struct { + // signed char count2; + // int arr2[] __counted_by(count2); + // }; + // }; + // }; + // + // We don't konw which 'count' to use in this scenario: + // + // size_t get_size(struct union_of_fams *p) { + // return __builtin_dynamic_object_size(p, 1); + // } + // + // Instead of calculating a wrong number, we give up. + if (OuterRD && CountCountedByAttrs(OuterRD) > 1) + return nullptr; + } + + if (!OuterRD) + return nullptr; + + uint64_t Offset = 0; + const FieldDecl *FAMDecl = + FindFlexibleArrayMemberField(Ctx, OuterRD, FAMName, Offset); + Offset = Ctx.toCharUnitsFromBits(Offset).getQuantity(); + + if (!FAMDecl || !FAMDecl->hasAttr<CountedByAttr>()) + // No flexible array member found or it doesn't have the "counted_by" + // attribute. + return nullptr; + + const FieldDecl *CountedByFD = FindCountedByField(FAMDecl); + if (!CountedByFD) + // Can't find the field referenced by the "counted_by" attribute. + return nullptr; + + // Build a load of the counted_by field. + bool IsSigned = CountedByFD->getType()->isSignedIntegerType(); + Value *CountedByInst = EmitCountedByFieldExpr(Base, FAMDecl, CountedByFD); + if (!CountedByInst) + return getDefaultBuiltinObjectSizeResult(Type, ResType); + + CountedByInst = Builder.CreateIntCast(CountedByInst, ResType, IsSigned); + + // Build a load of the index and subtract it from the count. + Value *IdxInst = nullptr; + if (Idx) { + if (Idx->HasSideEffects(getContext())) + // We can't have side-effects. + return getDefaultBuiltinObjectSizeResult(Type, ResType); + + bool IdxSigned = Idx->getType()->isSignedIntegerType(); + IdxInst = EmitAnyExprToTemp(Idx).getScalarVal(); + IdxInst = Builder.CreateIntCast(IdxInst, ResType, IdxSigned); + + // We go ahead with the calculation here. If the index turns out to be + // negative, we'll catch it at the end. + CountedByInst = + Builder.CreateSub(CountedByInst, IdxInst, "", !IsSigned, IsSigned); + } + + // Calculate how large the flexible array member is in bytes. + const ArrayType *ArrayTy = Ctx.getAsArrayType(FAMDecl->getType()); + CharUnits Size = Ctx.getTypeSizeInChars(ArrayTy->getElementType()); + llvm::Constant *ElemSize = + llvm::ConstantInt::get(ResType, Size.getQuantity(), IsSigned); + Value *FAMSize = + Builder.CreateMul(CountedByInst, ElemSize, "", !IsSigned, IsSigned); + FAMSize = Builder.CreateIntCast(FAMSize, ResType, IsSigned); + Value *Res = FAMSize; + + if (const auto *DRE = dyn_cast<DeclRefExpr>(Base)) { + // The whole struct is specificed in the __bdos. + const ASTRecordLayout &Layout = Ctx.getASTRecordLayout(OuterRD); + + // Get the offset of the FAM. + llvm::Constant *FAMOffset = ConstantInt::get(ResType, Offset, IsSigned); + Value *OffsetAndFAMSize = + Builder.CreateAdd(FAMOffset, Res, "", !IsSigned, IsSigned); + + // Get the full size of the struct. + llvm::Constant *SizeofStruct = + ConstantInt::get(ResType, Layout.getSize().getQuantity(), IsSigned); + + // max(sizeof(struct s), + // offsetof(struct s, array) + p->count * sizeof(*p->array)) + Res = IsSigned + ? Builder.CreateBinaryIntrinsic(llvm::Intrinsic::smax, + OffsetAndFAMSize, SizeofStruct) + : Builder.CreateBinaryIntrinsic(llvm::Intrinsic::umax, + OffsetAndFAMSize, SizeofStruct); + } + + // A negative \p IdxInst or \p CountedByInst means that the index lands + // outside of the flexible array member. If that's the case, we want to + // return 0. + Value *Cmp = Builder.CreateIsNotNeg(CountedByInst); + if (IdxInst) + Cmp = Builder.CreateAnd(Builder.CreateIsNotNeg(IdxInst), Cmp); + + return Builder.CreateSelect(Cmp, Res, ConstantInt::get(ResType, 0, IsSigned)); +} + /// Returns a Value corresponding to the size of the given expression. /// This Value may be either of the following: /// - A llvm::Argument (if E is a param with the pass_object_size attribute on @@ -850,6 +1083,13 @@ CodeGenFunction::emitBuiltinObjectSize(const Expr *E, unsigned Type, } } + if (IsDynamic) { + // Emit special code for a flexible array member with the "counted_by" + // attribute. + if (Value *V = emitFlexibleArrayMemberSize(E, Type, ResType)) + return V; + } + // LLVM can't handle Type=3 appropriately, and __builtin_object_size shouldn't // evaluate E for side-effects. In either case, we shouldn't lower to // @llvm.objectsize. @@ -9681,8 +9921,8 @@ Value *CodeGenFunction::EmitSVEMaskedStore(const CallExpr *E, bool IsQuadStore = false; switch (IntrinsicID) { - case Intrinsic::aarch64_sve_st1uwq: - case Intrinsic::aarch64_sve_st1udq: + case Intrinsic::aarch64_sve_st1wq: + case Intrinsic::aarch64_sve_st1dq: AddrMemoryTy = llvm::ScalableVectorType::get(MemEltTy, 1); PredTy = llvm::ScalableVectorType::get(IntegerType::get(getLLVMContext(), 1), 1); diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp index 51a43b5f85b3..13677cf150ae 100644 --- a/clang/lib/CodeGen/CGCall.cpp +++ b/clang/lib/CodeGen/CGCall.cpp @@ -2612,6 +2612,8 @@ void CodeGenModule::ConstructAttributeList(StringRef Name, if (IRFunctionArgs.hasSRetArg()) { llvm::AttrBuilder SRETAttrs(getLLVMContext()); SRETAttrs.addStructRetAttr(getTypes().ConvertTypeForMem(RetTy)); + SRETAttrs.addAttribute(llvm::Attribute::Writable); + SRETAttrs.addAttribute(llvm::Attribute::DeadOnUnwind); hasUsedSRet = true; if (RetAI.getInReg()) SRETAttrs.addAttribute(llvm::Attribute::InReg); diff --git a/clang/lib/CodeGen/CGException.cpp b/clang/lib/CodeGen/CGException.cpp index 0d507da5c1ba..56a246eb65e0 100644 --- a/clang/lib/CodeGen/CGException.cpp +++ b/clang/lib/CodeGen/CGException.cpp @@ -156,7 +156,9 @@ static const EHPersonality &getObjCPersonality(const TargetInfo &Target, case ObjCRuntime::WatchOS: return EHPersonality::NeXT_ObjC; case ObjCRuntime::GNUstep: - if (L.ObjCRuntime.getVersion() >= VersionTuple(1, 7)) + if (T.isOSCygMing()) + return EHPersonality::GNU_CPlusPlus_SEH; + else if (L.ObjCRuntime.getVersion() >= VersionTuple(1, 7)) return EHPersonality::GNUstep_ObjC; [[fallthrough]]; case ObjCRuntime::GCC: @@ -210,7 +212,8 @@ static const EHPersonality &getObjCXXPersonality(const TargetInfo &Target, return getObjCPersonality(Target, L); case ObjCRuntime::GNUstep: - return EHPersonality::GNU_ObjCXX; + return Target.getTriple().isOSCygMing() ? EHPersonality::GNU_CPlusPlus_SEH + : EHPersonality::GNU_ObjCXX; // The GCC runtime's personality function inherently doesn't support // mixed EH. Use the ObjC personality just to avoid returning null. diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp index 3f277725d9e7..d12e85b48d0b 100644 --- a/clang/lib/CodeGen/CGExpr.cpp +++ b/clang/lib/CodeGen/CGExpr.cpp @@ -26,10 +26,12 @@ #include "clang/AST/Attr.h" #include "clang/AST/DeclObjC.h" #include "clang/AST/NSAPI.h" +#include "clang/AST/StmtVisitor.h" #include "clang/Basic/Builtins.h" #include "clang/Basic/CodeGenOptions.h" #include "clang/Basic/SourceManager.h" #include "llvm/ADT/Hashing.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringExtras.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Intrinsics.h" @@ -925,16 +927,21 @@ static llvm::Value *getArrayIndexingBound(CodeGenFunction &CGF, if (CE->getCastKind() == CK_ArrayToPointerDecay && !CE->getSubExpr()->isFlexibleArrayMemberLike(CGF.getContext(), StrictFlexArraysLevel)) { + CodeGenFunction::SanitizerScope SanScope(&CGF); + IndexedType = CE->getSubExpr()->getType(); const ArrayType *AT = IndexedType->castAsArrayTypeUnsafe(); if (const auto *CAT = dyn_cast<ConstantArrayType>(AT)) return CGF.Builder.getInt(CAT->getSize()); - else if (const auto *VAT = dyn_cast<VariableArrayType>(AT)) + + if (const auto *VAT = dyn_cast<VariableArrayType>(AT)) return CGF.getVLASize(VAT).NumElts; // Ignore pass_object_size here. It's not applicable on decayed pointers. } } + CodeGenFunction::SanitizerScope SanScope(&CGF); + QualType EltTy{Base->getType()->getPointeeOrArrayElementType(), 0}; if (llvm::Value *POS = CGF.LoadPassedObjectSize(Base, EltTy)) { IndexedType = Base->getType(); @@ -944,22 +951,248 @@ static llvm::Value *getArrayIndexingBound(CodeGenFunction &CGF, return nullptr; } +namespace { + +/// \p StructAccessBase returns the base \p Expr of a field access. It returns +/// either a \p DeclRefExpr, representing the base pointer to the struct, i.e.: +/// +/// p in p-> a.b.c +/// +/// or a \p MemberExpr, if the \p MemberExpr has the \p RecordDecl we're +/// looking for: +/// +/// struct s { +/// struct s *ptr; +/// int count; +/// char array[] __attribute__((counted_by(count))); +/// }; +/// +/// If we have an expression like \p p->ptr->array[index], we want the +/// \p MemberExpr for \p p->ptr instead of \p p. +class StructAccessBase + : public ConstStmtVisitor<StructAccessBase, const Expr *> { + const RecordDecl *ExpectedRD; + + bool IsExpectedRecordDecl(const Expr *E) const { + QualType Ty = E->getType(); + if (Ty->isPointerType()) + Ty = Ty->getPointeeType(); + return ExpectedRD == Ty->getAsRecordDecl(); + } + +public: + StructAccessBase(const RecordDecl *ExpectedRD) : ExpectedRD(ExpectedRD) {} + + //===--------------------------------------------------------------------===// + // Visitor Methods + //===--------------------------------------------------------------------===// + + // NOTE: If we build C++ support for counted_by, then we'll have to handle + // horrors like this: + // + // struct S { + // int x, y; + // int blah[] __attribute__((counted_by(x))); + // } s; + // + // int foo(int index, int val) { + // int (S::*IHatePMDs)[] = &S::blah; + // (s.*IHatePMDs)[index] = val; + // } + + const Expr *Visit(const Expr *E) { + return ConstStmtVisitor<StructAccessBase, const Expr *>::Visit(E); + } + + const Expr *VisitStmt(const Stmt *S) { return nullptr; } + + // These are the types we expect to return (in order of most to least + // likely): + // + // 1. DeclRefExpr - This is the expression for the base of the structure. + // It's exactly what we want to build an access to the \p counted_by + // field. + // 2. MemberExpr - This is the expression that has the same \p RecordDecl + // as the flexble array member's lexical enclosing \p RecordDecl. This + // allows us to catch things like: "p->p->array" + // 3. CompoundLiteralExpr - This is for people who create something + // heretical like (struct foo has a flexible array member): + // + // (struct foo){ 1, 2 }.blah[idx]; + const Expr *VisitDeclRefExpr(const DeclRefExpr *E) { + return IsExpectedRecordDecl(E) ? E : nullptr; + } + const Expr *VisitMemberExpr(const MemberExpr *E) { + if (IsExpectedRecordDecl(E) && E->isArrow()) + return E; + const Expr *Res = Visit(E->getBase()); + return !Res && IsExpectedRecordDecl(E) ? E : Res; + } + const Expr *VisitCompoundLiteralExpr(const CompoundLiteralExpr *E) { + return IsExpectedRecordDecl(E) ? E : nullptr; + } + const Expr *VisitCallExpr(const CallExpr *E) { + return IsExpectedRecordDecl(E) ? E : nullptr; + } + + const Expr *VisitArraySubscriptExpr(const ArraySubscriptExpr *E) { + if (IsExpectedRecordDecl(E)) + return E; + return Visit(E->getBase()); + } + const Expr *VisitCastExpr(const CastExpr *E) { + return Visit(E->getSubExpr()); + } + const Expr *VisitParenExpr(const ParenExpr *E) { + return Visit(E->getSubExpr()); + } + const Expr *VisitUnaryAddrOf(const UnaryOperator *E) { + return Visit(E->getSubExpr()); + } + const Expr *VisitUnaryDeref(const UnaryOperator *E) { + return Visit(E->getSubExpr()); + } +}; + +} // end anonymous namespace + +using RecIndicesTy = + SmallVector<std::pair<const RecordDecl *, llvm::Value *>, 8>; + +static bool getGEPIndicesToField(CodeGenFunction &CGF, const RecordDecl *RD, + const FieldDecl *FD, RecIndicesTy &Indices) { + const CGRecordLayout &Layout = CGF.CGM.getTypes().getCGRecordLayout(RD); + int64_t FieldNo = -1; + for (const Decl *D : RD->decls()) { + if (const auto *Field = dyn_cast<FieldDecl>(D)) { + FieldNo = Layout.getLLVMFieldNo(Field); + if (FD == Field) { + Indices.emplace_back(std::make_pair(RD, CGF.Builder.getInt32(FieldNo))); + return true; + } + } + + if (const auto *Record = dyn_cast<RecordDecl>(D)) { + ++FieldNo; + if (getGEPIndicesToField(CGF, Record, FD, Indices)) { + if (RD->isUnion()) + FieldNo = 0; + Indices.emplace_back(std::make_pair(RD, CGF.Builder.getInt32(FieldNo))); + return true; + } + } + } + + return false; +} + +/// This method is typically called in contexts where we can't generate +/// side-effects, like in __builtin_dynamic_object_size. When finding +/// expressions, only choose those that have either already been emitted or can +/// be loaded without side-effects. +/// +/// - \p FAMDecl: the \p Decl for the flexible array member. It may not be +/// within the top-level struct. +/// - \p CountDecl: must be within the same non-anonymous struct as \p FAMDecl. +llvm::Value *CodeGenFunction::EmitCountedByFieldExpr( + const Expr *Base, const FieldDecl *FAMDecl, const FieldDecl *CountDecl) { + const RecordDecl *RD = CountDecl->getParent()->getOuterLexicalRecordContext(); + + // Find the base struct expr (i.e. p in p->a.b.c.d). + const Expr *StructBase = StructAccessBase(RD).Visit(Base); + if (!StructBase || StructBase->HasSideEffects(getContext())) + return nullptr; + + llvm::Value *Res = nullptr; + if (const auto *DRE = dyn_cast<DeclRefExpr>(StructBase)) { + Res = EmitDeclRefLValue(DRE).getPointer(*this); + Res = Builder.CreateAlignedLoad(ConvertType(DRE->getType()), Res, + getPointerAlign(), "dre.load"); + } else if (const MemberExpr *ME = dyn_cast<MemberExpr>(StructBase)) { + LValue LV = EmitMemberExpr(ME); + Address Addr = LV.getAddress(*this); + Res = Addr.getPointer(); + } else if (StructBase->getType()->isPointerType()) { + LValueBaseInfo BaseInfo; + TBAAAccessInfo TBAAInfo; + Address Addr = EmitPointerWithAlignment(StructBase, &BaseInfo, &TBAAInfo); + Res = Addr.getPointer(); + } else { + return nullptr; + } + + llvm::Value *Zero = Builder.getInt32(0); + RecIndicesTy Indices; + + getGEPIndicesToField(*this, RD, CountDecl, Indices); + + for (auto I = Indices.rbegin(), E = Indices.rend(); I != E; ++I) + Res = Builder.CreateInBoundsGEP( + ConvertType(QualType(I->first->getTypeForDecl(), 0)), Res, + {Zero, I->second}, "..counted_by.gep"); + + return Builder.CreateAlignedLoad(ConvertType(CountDecl->getType()), Res, + getIntAlign(), "..counted_by.load"); +} + +const FieldDecl *CodeGenFunction::FindCountedByField(const FieldDecl *FD) { + if (!FD || !FD->hasAttr<CountedByAttr>()) + return nullptr; + + const auto *CBA = FD->getAttr<CountedByAttr>(); + if (!CBA) + return nullptr; + + auto GetNonAnonStructOrUnion = + [](const RecordDecl *RD) -> const RecordDecl * { + while (RD && RD->isAnonymousStructOrUnion()) { + const auto *R = dyn_cast<RecordDecl>(RD->getDeclContext()); + if (!R) + return nullptr; + RD = R; + } + return RD; + }; + const RecordDecl *EnclosingRD = GetNonAnonStructOrUnion(FD->getParent()); + if (!EnclosingRD) + return nullptr; + + DeclarationName DName(CBA->getCountedByField()); + DeclContext::lookup_result Lookup = EnclosingRD->lookup(DName); + + if (Lookup.empty()) + return nullptr; + + const NamedDecl *ND = Lookup.front(); + if (const auto *IFD = dyn_cast<IndirectFieldDecl>(ND)) + ND = IFD->getAnonField(); + + return dyn_cast<FieldDecl>(ND); +} + void CodeGenFunction::EmitBoundsCheck(const Expr *E, const Expr *Base, llvm::Value *Index, QualType IndexType, bool Accessed) { assert(SanOpts.has(SanitizerKind::ArrayBounds) && "should not be called unless adding bounds checks"); - SanitizerScope SanScope(this); - const LangOptions::StrictFlexArraysLevelKind StrictFlexArraysLevel = - getLangOpts().getStrictFlexArraysLevel(); - + getLangOpts().getStrictFlexArraysLevel(); QualType IndexedType; llvm::Value *Bound = getArrayIndexingBound(*this, Base, IndexedType, StrictFlexArraysLevel); + + EmitBoundsCheckImpl(E, Bound, Index, IndexType, IndexedType, Accessed); +} + +void CodeGenFunction::EmitBoundsCheckImpl(const Expr *E, llvm::Value *Bound, + llvm::Value *Index, + QualType IndexType, + QualType IndexedType, bool Accessed) { if (!Bound) return; + SanitizerScope SanScope(this); + bool IndexSigned = IndexType->isSignedIntegerOrEnumerationType(); llvm::Value *IndexVal = Builder.CreateIntCast(Index, SizeTy, IndexSigned); llvm::Value *BoundVal = Builder.CreateIntCast(Bound, SizeTy, false); @@ -975,7 +1208,6 @@ void CodeGenFunction::EmitBoundsCheck(const Expr *E, const Expr *Base, SanitizerHandler::OutOfBounds, StaticData, Index); } - CodeGenFunction::ComplexPairTy CodeGenFunction:: EmitComplexPrePostIncDec(const UnaryOperator *E, LValue LV, bool isInc, bool isPre) { @@ -3823,6 +4055,61 @@ static Address emitArraySubscriptGEP(CodeGenFunction &CGF, Address addr, return Address(eltPtr, CGF.ConvertTypeForMem(eltType), eltAlign); } +/// The offset of a field from the beginning of the record. +static bool getFieldOffsetInBits(CodeGenFunction &CGF, const RecordDecl *RD, + const FieldDecl *FD, int64_t &Offset) { + ASTContext &Ctx = CGF.getContext(); + const ASTRecordLayout &Layout = Ctx.getASTRecordLayout(RD); + unsigned FieldNo = 0; + + for (const Decl *D : RD->decls()) { + if (const auto *Record = dyn_cast<RecordDecl>(D)) + if (getFieldOffsetInBits(CGF, Record, FD, Offset)) { + Offset += Layout.getFieldOffset(FieldNo); + return true; + } + + if (const auto *Field = dyn_cast<FieldDecl>(D)) + if (FD == Field) { + Offset += Layout.getFieldOffset(FieldNo); + return true; + } + + if (isa<FieldDecl>(D)) + ++FieldNo; + } + + return false; +} + +/// Returns the relative offset difference between \p FD1 and \p FD2. +/// \code +/// offsetof(struct foo, FD1) - offsetof(struct foo, FD2) +/// \endcode +/// Both fields must be within the same struct. +static std::optional<int64_t> getOffsetDifferenceInBits(CodeGenFunction &CGF, + const FieldDecl *FD1, + const FieldDecl *FD2) { + const RecordDecl *FD1OuterRec = + FD1->getParent()->getOuterLexicalRecordContext(); + const RecordDecl *FD2OuterRec = + FD2->getParent()->getOuterLexicalRecordContext(); + + if (FD1OuterRec != FD2OuterRec) + // Fields must be within the same RecordDecl. + return std::optional<int64_t>(); + + int64_t FD1Offset = 0; + if (!getFieldOffsetInBits(CGF, FD1OuterRec, FD1, FD1Offset)) + return std::optional<int64_t>(); + + int64_t FD2Offset = 0; + if (!getFieldOffsetInBits(CGF, FD2OuterRec, FD2, FD2Offset)) + return std::optional<int64_t>(); + + return std::make_optional<int64_t>(FD1Offset - FD2Offset); +} + LValue CodeGenFunction::EmitArraySubscriptExpr(const ArraySubscriptExpr *E, bool Accessed) { // The index must always be an integer, which is not an aggregate. Emit it @@ -3950,6 +4237,47 @@ LValue CodeGenFunction::EmitArraySubscriptExpr(const ArraySubscriptExpr *E, ArrayLV = EmitLValue(Array); auto *Idx = EmitIdxAfterBase(/*Promote*/true); + if (SanOpts.has(SanitizerKind::ArrayBounds)) { + // If the array being accessed has a "counted_by" attribute, generate + // bounds checking code. The "count" field is at the top level of the + // struct or in an anonymous struct, that's also at the top level. Future + // expansions may allow the "count" to reside at any place in the struct, + // but the value of "counted_by" will be a "simple" path to the count, + // i.e. "a.b.count", so we shouldn't need the full force of EmitLValue or + // similar to emit the correct GEP. + const LangOptions::StrictFlexArraysLevelKind StrictFlexArraysLevel = + getLangOpts().getStrictFlexArraysLevel(); + + if (const auto *ME = dyn_cast<MemberExpr>(Array); + ME && + ME->isFlexibleArrayMemberLike(getContext(), StrictFlexArraysLevel) && + ME->getMemberDecl()->hasAttr<CountedByAttr>()) { + const FieldDecl *FAMDecl = dyn_cast<FieldDecl>(ME->getMemberDecl()); + if (const FieldDecl *CountFD = FindCountedByField(FAMDecl)) { + if (std::optional<int64_t> Diff = + getOffsetDifferenceInBits(*this, CountFD, FAMDecl)) { + CharUnits OffsetDiff = CGM.getContext().toCharUnitsFromBits(*Diff); + + // Create a GEP with a byte offset between the FAM and count and + // use that to load the count value. + Addr = Builder.CreatePointerBitCastOrAddrSpaceCast( + ArrayLV.getAddress(*this), Int8PtrTy, Int8Ty); + + llvm::Type *CountTy = ConvertType(CountFD->getType()); + llvm::Value *Res = Builder.CreateInBoundsGEP( + Int8Ty, Addr.getPointer(), + Builder.getInt32(OffsetDiff.getQuantity()), ".counted_by.gep"); + Res = Builder.CreateAlignedLoad(CountTy, Res, getIntAlign(), + ".counted_by.load"); + + // Now emit the bounds checking. + EmitBoundsCheckImpl(E, Res, Idx, E->getIdx()->getType(), + Array->getType(), Accessed); + } + } + } + } + // Propagate the alignment from the array itself to the result. QualType arrayType = Array->getType(); Addr = emitArraySubscriptGEP( diff --git a/clang/lib/CodeGen/CGObjCGNU.cpp b/clang/lib/CodeGen/CGObjCGNU.cpp index 9443fecf9b79..cd1a0b6a130f 100644 --- a/clang/lib/CodeGen/CGObjCGNU.cpp +++ b/clang/lib/CodeGen/CGObjCGNU.cpp @@ -168,6 +168,8 @@ protected: /// Does the current target use SEH-based exceptions? False implies /// Itanium-style DWARF unwinding. bool usesSEHExceptions; + /// Does the current target uses C++-based exceptions? + bool usesCxxExceptions; /// Helper to check if we are targeting a specific runtime version or later. bool isRuntime(ObjCRuntime::Kind kind, unsigned major, unsigned minor=0) { @@ -819,12 +821,18 @@ class CGObjCGNUstep : public CGObjCGNU { SlotLookupSuperFn.init(&CGM, "objc_slot_lookup_super", SlotTy, PtrToObjCSuperTy, SelectorTy); // If we're in ObjC++ mode, then we want to make - if (usesSEHExceptions) { - llvm::Type *VoidTy = llvm::Type::getVoidTy(VMContext); - // void objc_exception_rethrow(void) - ExceptionReThrowFn.init(&CGM, "objc_exception_rethrow", VoidTy); + llvm::Type *VoidTy = llvm::Type::getVoidTy(VMContext); + if (usesCxxExceptions) { + // void *__cxa_begin_catch(void *e) + EnterCatchFn.init(&CGM, "__cxa_begin_catch", PtrTy, PtrTy); + // void __cxa_end_catch(void) + ExitCatchFn.init(&CGM, "__cxa_end_catch", VoidTy); + // void objc_exception_rethrow(void*) + ExceptionReThrowFn.init(&CGM, "__cxa_rethrow", PtrTy); + } else if (usesSEHExceptions) { + // void objc_exception_rethrow(void) + ExceptionReThrowFn.init(&CGM, "objc_exception_rethrow", VoidTy); } else if (CGM.getLangOpts().CPlusPlus) { - llvm::Type *VoidTy = llvm::Type::getVoidTy(VMContext); // void *__cxa_begin_catch(void *e) EnterCatchFn.init(&CGM, "__cxa_begin_catch", PtrTy, PtrTy); // void __cxa_end_catch(void) @@ -833,7 +841,6 @@ class CGObjCGNUstep : public CGObjCGNU { ExceptionReThrowFn.init(&CGM, "_Unwind_Resume_or_Rethrow", VoidTy, PtrTy); } else if (R.getVersion() >= VersionTuple(1, 7)) { - llvm::Type *VoidTy = llvm::Type::getVoidTy(VMContext); // id objc_begin_catch(void *e) EnterCatchFn.init(&CGM, "objc_begin_catch", IdTy, PtrTy); // void objc_end_catch(void) @@ -841,7 +848,6 @@ class CGObjCGNUstep : public CGObjCGNU { // void _Unwind_Resume_or_Rethrow(void*) ExceptionReThrowFn.init(&CGM, "objc_exception_rethrow", VoidTy, PtrTy); } - llvm::Type *VoidTy = llvm::Type::getVoidTy(VMContext); SetPropertyAtomic.init(&CGM, "objc_setProperty_atomic", VoidTy, IdTy, SelectorTy, IdTy, PtrDiffTy); SetPropertyAtomicCopy.init(&CGM, "objc_setProperty_atomic_copy", VoidTy, @@ -2126,6 +2132,9 @@ CGObjCGNU::CGObjCGNU(CodeGenModule &cgm, unsigned runtimeABIVersion, msgSendMDKind = VMContext.getMDKindID("GNUObjCMessageSend"); usesSEHExceptions = cgm.getContext().getTargetInfo().getTriple().isWindowsMSVCEnvironment(); + usesCxxExceptions = + cgm.getContext().getTargetInfo().getTriple().isOSCygMing() && + isRuntime(ObjCRuntime::GNUstep, 2); CodeGenTypes &Types = CGM.getTypes(); IntTy = cast<llvm::IntegerType>( @@ -2212,7 +2221,10 @@ CGObjCGNU::CGObjCGNU(CodeGenModule &cgm, unsigned runtimeABIVersion, // void objc_exception_throw(id); ExceptionThrowFn.init(&CGM, "objc_exception_throw", VoidTy, IdTy); - ExceptionReThrowFn.init(&CGM, "objc_exception_throw", VoidTy, IdTy); + ExceptionReThrowFn.init(&CGM, + usesCxxExceptions ? "objc_exception_rethrow" + : "objc_exception_throw", + VoidTy, IdTy); // int objc_sync_enter(id); SyncEnterFn.init(&CGM, "objc_sync_enter", IntTy, IdTy); // int objc_sync_exit(id); @@ -2389,7 +2401,7 @@ llvm::Constant *CGObjCGNUstep::GetEHType(QualType T) { if (usesSEHExceptions) return CGM.getCXXABI().getAddrOfRTTIDescriptor(T); - if (!CGM.getLangOpts().CPlusPlus) + if (!CGM.getLangOpts().CPlusPlus && !usesCxxExceptions) return CGObjCGNU::GetEHType(T); // For Objective-C++, we want to provide the ability to catch both C++ and @@ -3995,7 +4007,7 @@ void CGObjCGNU::EmitThrowStmt(CodeGenFunction &CGF, ExceptionAsObject = CGF.ObjCEHValueStack.back(); isRethrow = true; } - if (isRethrow && usesSEHExceptions) { + if (isRethrow && (usesSEHExceptions || usesCxxExceptions)) { // For SEH, ExceptionAsObject may be undef, because the catch handler is // not passed it for catchalls and so it is not visible to the catch // funclet. The real thrown object will still be live on the stack at this @@ -4005,8 +4017,7 @@ void CGObjCGNU::EmitThrowStmt(CodeGenFunction &CGF, // argument. llvm::CallBase *Throw = CGF.EmitRuntimeCallOrInvoke(ExceptionReThrowFn); Throw->setDoesNotReturn(); - } - else { + } else { ExceptionAsObject = CGF.Builder.CreateBitCast(ExceptionAsObject, IdTy); llvm::CallBase *Throw = CGF.EmitRuntimeCallOrInvoke(ExceptionThrowFn, ExceptionAsObject); diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h index 07c7678df87e..143ad64e8816 100644 --- a/clang/lib/CodeGen/CodeGenFunction.h +++ b/clang/lib/CodeGen/CodeGenFunction.h @@ -3073,6 +3073,25 @@ public: /// this expression is used as an lvalue, for instance in "&Arr[Idx]". void EmitBoundsCheck(const Expr *E, const Expr *Base, llvm::Value *Index, QualType IndexType, bool Accessed); + void EmitBoundsCheckImpl(const Expr *E, llvm::Value *Bound, + llvm::Value *Index, QualType IndexType, + QualType IndexedType, bool Accessed); + + // Find a struct's flexible array member. It may be embedded inside multiple + // sub-structs, but must still be the last field. + const FieldDecl *FindFlexibleArrayMemberField(ASTContext &Ctx, + const RecordDecl *RD, + StringRef Name, + uint64_t &Offset); + + /// Find the FieldDecl specified in a FAM's "counted_by" attribute. Returns + /// \p nullptr if either the attribute or the field doesn't exist. + const FieldDecl *FindCountedByField(const FieldDecl *FD); + + /// Build an expression accessing the "counted_by" field. + llvm::Value *EmitCountedByFieldExpr(const Expr *Base, + const FieldDecl *FAMDecl, + const FieldDecl *CountDecl); llvm::Value *EmitScalarPrePostIncDec(const UnaryOperator *E, LValue LV, bool isInc, bool isPre); @@ -4873,6 +4892,9 @@ private: llvm::Value *EmittedE, bool IsDynamic); + llvm::Value *emitFlexibleArrayMemberSize(const Expr *E, unsigned Type, + llvm::IntegerType *ResType); + void emitZeroOrPatternForAutoVarInit(QualType type, const VarDecl &D, Address Loc); diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp index 4fd32337cccc..ad6fc71c1e50 100644 --- a/clang/lib/CodeGen/CodeGenModule.cpp +++ b/clang/lib/CodeGen/CodeGenModule.cpp @@ -1109,6 +1109,8 @@ void CodeGenModule::Release() { if (LangOpts.BranchProtectionPAuthLR) getModule().addModuleFlag(llvm::Module::Min, "branch-protection-pauth-lr", 1); + if (LangOpts.GuardedControlStack) + getModule().addModuleFlag(llvm::Module::Min, "guarded-control-stack", 1); if (LangOpts.hasSignReturnAddress()) getModule().addModuleFlag(llvm::Module::Min, "sign-return-address", 1); if (LangOpts.isSignReturnAddressScopeAll()) diff --git a/clang/lib/CodeGen/CoverageMappingGen.cpp b/clang/lib/CodeGen/CoverageMappingGen.cpp index bf227386a71b..b245abd16c3f 100644 --- a/clang/lib/CodeGen/CoverageMappingGen.cpp +++ b/clang/lib/CodeGen/CoverageMappingGen.cpp @@ -1712,7 +1712,11 @@ struct CounterCoverageMappingBuilder extendRegion(S->getCond()); Counter ParentCount = getRegion().getCounter(); - Counter ThenCount = getRegionCounter(S); + + // If this is "if !consteval" the then-branch will never be taken, we don't + // need to change counter + Counter ThenCount = + S->isNegatedConsteval() ? ParentCount : getRegionCounter(S); if (!S->isConsteval()) { // Emitting a counter for the condition makes it easier to interpret the @@ -1729,7 +1733,12 @@ struct CounterCoverageMappingBuilder extendRegion(S->getThen()); Counter OutCount = propagateCounts(ThenCount, S->getThen()); - Counter ElseCount = subtractCounters(ParentCount, ThenCount); + // If this is "if consteval" the else-branch will never be taken, we don't + // need to change counter + Counter ElseCount = S->isNonNegatedConsteval() + ? ParentCount + : subtractCounters(ParentCount, ThenCount); + if (const Stmt *Else = S->getElse()) { bool ThenHasTerminateStmt = HasTerminateStmt; HasTerminateStmt = false; diff --git a/clang/lib/CodeGen/Targets/AArch64.cpp b/clang/lib/CodeGen/Targets/AArch64.cpp index 7102d190fe00..ee7f95084d2e 100644 --- a/clang/lib/CodeGen/Targets/AArch64.cpp +++ b/clang/lib/CodeGen/Targets/AArch64.cpp @@ -138,6 +138,8 @@ public: BPI.BranchTargetEnforcement ? "true" : "false"); Fn->addFnAttr("branch-protection-pauth-lr", BPI.BranchProtectionPAuthLR ? "true" : "false"); + Fn->addFnAttr("guarded-control-stack", + BPI.GuardedControlStack ? "true" : "false"); } bool isScalarizableAsmOperand(CodeGen::CodeGenFunction &CGF, diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index 2d8ef841d4f6..1ee7ae602f3c 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -1508,7 +1508,7 @@ static void CollectARMPACBTIOptions(const ToolChain &TC, const ArgList &Args, << Triple.getArchName(); StringRef Scope, Key; - bool IndirectBranches, BranchProtectionPAuthLR; + bool IndirectBranches, BranchProtectionPAuthLR, GuardedControlStack; if (A->getOption().matches(options::OPT_msign_return_address_EQ)) { Scope = A->getValue(); @@ -1518,6 +1518,7 @@ static void CollectARMPACBTIOptions(const ToolChain &TC, const ArgList &Args, Key = "a_key"; IndirectBranches = false; BranchProtectionPAuthLR = false; + GuardedControlStack = false; } else { StringRef DiagMsg; llvm::ARM::ParsedBranchProtection PBP; @@ -1531,6 +1532,7 @@ static void CollectARMPACBTIOptions(const ToolChain &TC, const ArgList &Args, Key = PBP.Key; BranchProtectionPAuthLR = PBP.BranchProtectionPAuthLR; IndirectBranches = PBP.BranchTargetEnforcement; + GuardedControlStack = PBP.GuardedControlStack; } CmdArgs.push_back( @@ -1543,6 +1545,8 @@ static void CollectARMPACBTIOptions(const ToolChain &TC, const ArgList &Args, Args.MakeArgString(Twine("-mbranch-protection-pauth-lr"))); if (IndirectBranches) CmdArgs.push_back("-mbranch-target-enforce"); + if (GuardedControlStack) + CmdArgs.push_back("-mguarded-control-stack"); } void Clang::AddARMTargetArgs(const llvm::Triple &Triple, const ArgList &Args, diff --git a/clang/lib/Driver/ToolChains/Flang.cpp b/clang/lib/Driver/ToolChains/Flang.cpp index 41eaad3bbad0..03d68c3df7fb 100644 --- a/clang/lib/Driver/ToolChains/Flang.cpp +++ b/clang/lib/Driver/ToolChains/Flang.cpp @@ -7,6 +7,7 @@ //===----------------------------------------------------------------------===// #include "Flang.h" +#include "Arch/RISCV.h" #include "CommonArgs.h" #include "clang/Basic/CodeGenOptions.h" @@ -14,6 +15,8 @@ #include "llvm/Frontend/Debug/Options.h" #include "llvm/Support/FileSystem.h" #include "llvm/Support/Path.h" +#include "llvm/Support/RISCVISAInfo.h" +#include "llvm/TargetParser/RISCVTargetParser.h" #include <cassert> @@ -203,6 +206,51 @@ void Flang::AddAArch64TargetArgs(const ArgList &Args, } } +void Flang::AddRISCVTargetArgs(const ArgList &Args, + ArgStringList &CmdArgs) const { + const llvm::Triple &Triple = getToolChain().getTriple(); + // Handle -mrvv-vector-bits=<bits> + if (Arg *A = Args.getLastArg(options::OPT_mrvv_vector_bits_EQ)) { + StringRef Val = A->getValue(); + const Driver &D = getToolChain().getDriver(); + + // Get minimum VLen from march. + unsigned MinVLen = 0; + StringRef Arch = riscv::getRISCVArch(Args, Triple); + auto ISAInfo = llvm::RISCVISAInfo::parseArchString( + Arch, /*EnableExperimentalExtensions*/ true); + // Ignore parsing error. + if (!errorToBool(ISAInfo.takeError())) + MinVLen = (*ISAInfo)->getMinVLen(); + + // If the value is "zvl", use MinVLen from march. Otherwise, try to parse + // as integer as long as we have a MinVLen. + unsigned Bits = 0; + if (Val.equals("zvl") && MinVLen >= llvm::RISCV::RVVBitsPerBlock) { + Bits = MinVLen; + } else if (!Val.getAsInteger(10, Bits)) { + // Only accept power of 2 values beteen RVVBitsPerBlock and 65536 that + // at least MinVLen. + if (Bits < MinVLen || Bits < llvm::RISCV::RVVBitsPerBlock || + Bits > 65536 || !llvm::isPowerOf2_32(Bits)) + Bits = 0; + } + + // If we got a valid value try to use it. + if (Bits != 0) { + unsigned VScaleMin = Bits / llvm::RISCV::RVVBitsPerBlock; + CmdArgs.push_back( + Args.MakeArgString("-mvscale-max=" + llvm::Twine(VScaleMin))); + CmdArgs.push_back( + Args.MakeArgString("-mvscale-min=" + llvm::Twine(VScaleMin))); + } else if (!Val.equals("scalable")) { + // Handle the unsupported values passed to mrvv-vector-bits. + D.Diag(diag::err_drv_unsupported_option_argument) + << A->getSpelling() << Val; + } + } +} + static void addVSDefines(const ToolChain &TC, const ArgList &Args, ArgStringList &CmdArgs) { @@ -321,6 +369,9 @@ void Flang::addTargetOptions(const ArgList &Args, AddAMDGPUTargetArgs(Args, CmdArgs); break; case llvm::Triple::riscv64: + getTargetFeatures(D, Triple, Args, CmdArgs, /*ForAs*/ false); + AddRISCVTargetArgs(Args, CmdArgs); + break; case llvm::Triple::x86_64: getTargetFeatures(D, Triple, Args, CmdArgs, /*ForAs*/ false); break; @@ -352,12 +403,10 @@ void Flang::addTargetOptions(const ArgList &Args, if (A->getValue() == StringRef{"Accelerate"}) { CmdArgs.push_back("-framework"); CmdArgs.push_back("Accelerate"); - A->render(Args, CmdArgs); } } - } else { - A->render(Args, CmdArgs); } + A->render(Args, CmdArgs); } if (Triple.isKnownWindowsMSVCEnvironment()) { @@ -428,6 +477,8 @@ void Flang::addOffloadOptions(Compilation &C, const InputInfoList &Inputs, CmdArgs.push_back("-fopenmp-assume-no-thread-state"); if (Args.hasArg(options::OPT_fopenmp_assume_no_nested_parallelism)) CmdArgs.push_back("-fopenmp-assume-no-nested-parallelism"); + if (Args.hasArg(options::OPT_nogpulib)) + CmdArgs.push_back("-nogpulib"); } } diff --git a/clang/lib/Driver/ToolChains/Flang.h b/clang/lib/Driver/ToolChains/Flang.h index 8d35080e1c0c..ec2e545a1d0b 100644 --- a/clang/lib/Driver/ToolChains/Flang.h +++ b/clang/lib/Driver/ToolChains/Flang.h @@ -70,6 +70,13 @@ private: void AddAMDGPUTargetArgs(const llvm::opt::ArgList &Args, llvm::opt::ArgStringList &CmdArgs) const; + /// Add specific options for RISC-V target. + /// + /// \param [in] Args The list of input driver arguments + /// \param [out] CmdArgs The list of output command arguments + void AddRISCVTargetArgs(const llvm::opt::ArgList &Args, + llvm::opt::ArgStringList &CmdArgs) const; + /// Extract offload options from the driver arguments and add them to /// the command arguments. /// \param [in] C The current compilation for the driver invocation diff --git a/clang/lib/Driver/ToolChains/Gnu.cpp b/clang/lib/Driver/ToolChains/Gnu.cpp index 24681dfdc99c..771240dac7a8 100644 --- a/clang/lib/Driver/ToolChains/Gnu.cpp +++ b/clang/lib/Driver/ToolChains/Gnu.cpp @@ -2668,7 +2668,9 @@ void Generic_GCC::GCCInstallationDetector::AddDefaultGCCPrefixes( case llvm::Triple::arm: case llvm::Triple::thumb: LibDirs.append(begin(ARMLibDirs), end(ARMLibDirs)); - if (TargetTriple.getEnvironment() == llvm::Triple::GNUEABIHF) { + if (TargetTriple.getEnvironment() == llvm::Triple::GNUEABIHF || + TargetTriple.getEnvironment() == llvm::Triple::MuslEABIHF || + TargetTriple.getEnvironment() == llvm::Triple::EABIHF) { TripleAliases.append(begin(ARMHFTriples), end(ARMHFTriples)); } else { TripleAliases.append(begin(ARMTriples), end(ARMTriples)); @@ -2677,7 +2679,9 @@ void Generic_GCC::GCCInstallationDetector::AddDefaultGCCPrefixes( case llvm::Triple::armeb: case llvm::Triple::thumbeb: LibDirs.append(begin(ARMebLibDirs), end(ARMebLibDirs)); - if (TargetTriple.getEnvironment() == llvm::Triple::GNUEABIHF) { + if (TargetTriple.getEnvironment() == llvm::Triple::GNUEABIHF || + TargetTriple.getEnvironment() == llvm::Triple::MuslEABIHF || + TargetTriple.getEnvironment() == llvm::Triple::EABIHF) { TripleAliases.append(begin(ARMebHFTriples), end(ARMebHFTriples)); } else { TripleAliases.append(begin(ARMebTriples), end(ARMebTriples)); diff --git a/clang/lib/Driver/ToolChains/Linux.cpp b/clang/lib/Driver/ToolChains/Linux.cpp index 735af54f114c..4300a2bdff17 100644 --- a/clang/lib/Driver/ToolChains/Linux.cpp +++ b/clang/lib/Driver/ToolChains/Linux.cpp @@ -61,12 +61,16 @@ std::string Linux::getMultiarchTriple(const Driver &D, case llvm::Triple::thumb: if (IsAndroid) return "arm-linux-androideabi"; - if (TargetEnvironment == llvm::Triple::GNUEABIHF) + if (TargetEnvironment == llvm::Triple::GNUEABIHF || + TargetEnvironment == llvm::Triple::MuslEABIHF || + TargetEnvironment == llvm::Triple::EABIHF) return "arm-linux-gnueabihf"; return "arm-linux-gnueabi"; case llvm::Triple::armeb: case llvm::Triple::thumbeb: - if (TargetEnvironment == llvm::Triple::GNUEABIHF) + if (TargetEnvironment == llvm::Triple::GNUEABIHF || + TargetEnvironment == llvm::Triple::MuslEABIHF || + TargetEnvironment == llvm::Triple::EABIHF) return "armeb-linux-gnueabihf"; return "armeb-linux-gnueabi"; case llvm::Triple::x86: diff --git a/clang/lib/Format/Format.cpp b/clang/lib/Format/Format.cpp index f798d555bf99..ff5ed6c306f3 100644 --- a/clang/lib/Format/Format.cpp +++ b/clang/lib/Format/Format.cpp @@ -76,41 +76,39 @@ template <> struct MappingTraits<FormatStyle::AlignConsecutiveStyle> { FormatStyle::AlignConsecutiveStyle( {/*Enabled=*/false, /*AcrossEmptyLines=*/false, /*AcrossComments=*/false, /*AlignCompound=*/false, - /*PadOperators=*/true})); + /*AlignFunctionPointers=*/false, /*PadOperators=*/true})); IO.enumCase(Value, "Consecutive", FormatStyle::AlignConsecutiveStyle( {/*Enabled=*/true, /*AcrossEmptyLines=*/false, /*AcrossComments=*/false, /*AlignCompound=*/false, - /*PadOperators=*/true})); + /*AlignFunctionPointers=*/false, /*PadOperators=*/true})); IO.enumCase(Value, "AcrossEmptyLines", FormatStyle::AlignConsecutiveStyle( {/*Enabled=*/true, /*AcrossEmptyLines=*/true, /*AcrossComments=*/false, /*AlignCompound=*/false, - /*PadOperators=*/true})); + /*AlignFunctionPointers=*/false, /*PadOperators=*/true})); IO.enumCase(Value, "AcrossComments", - FormatStyle::AlignConsecutiveStyle({/*Enabled=*/true, - /*AcrossEmptyLines=*/false, - /*AcrossComments=*/true, - /*AlignCompound=*/false, - /*PadOperators=*/true})); + FormatStyle::AlignConsecutiveStyle( + {/*Enabled=*/true, /*AcrossEmptyLines=*/false, + /*AcrossComments=*/true, /*AlignCompound=*/false, + /*AlignFunctionPointers=*/false, /*PadOperators=*/true})); IO.enumCase(Value, "AcrossEmptyLinesAndComments", - FormatStyle::AlignConsecutiveStyle({/*Enabled=*/true, - /*AcrossEmptyLines=*/true, - /*AcrossComments=*/true, - /*AlignCompound=*/false, - /*PadOperators=*/true})); + FormatStyle::AlignConsecutiveStyle( + {/*Enabled=*/true, /*AcrossEmptyLines=*/true, + /*AcrossComments=*/true, /*AlignCompound=*/false, + /*AlignFunctionPointers=*/false, /*PadOperators=*/true})); // For backward compatibility. IO.enumCase(Value, "true", FormatStyle::AlignConsecutiveStyle( {/*Enabled=*/true, /*AcrossEmptyLines=*/false, /*AcrossComments=*/false, /*AlignCompound=*/false, - /*PadOperators=*/true})); + /*AlignFunctionPointers=*/false, /*PadOperators=*/true})); IO.enumCase(Value, "false", FormatStyle::AlignConsecutiveStyle( {/*Enabled=*/false, /*AcrossEmptyLines=*/false, /*AcrossComments=*/false, /*AlignCompound=*/false, - /*PadOperators=*/true})); + /*AlignFunctionPointers=*/false, /*PadOperators=*/true})); } static void mapping(IO &IO, FormatStyle::AlignConsecutiveStyle &Value) { @@ -118,6 +116,7 @@ template <> struct MappingTraits<FormatStyle::AlignConsecutiveStyle> { IO.mapOptional("AcrossEmptyLines", Value.AcrossEmptyLines); IO.mapOptional("AcrossComments", Value.AcrossComments); IO.mapOptional("AlignCompound", Value.AlignCompound); + IO.mapOptional("AlignFunctionPointers", Value.AlignFunctionPointers); IO.mapOptional("PadOperators", Value.PadOperators); } }; @@ -1432,6 +1431,7 @@ FormatStyle getLLVMStyle(FormatStyle::LanguageKind Language) { LLVMStyle.AlignConsecutiveAssignments.AcrossEmptyLines = false; LLVMStyle.AlignConsecutiveAssignments.AcrossComments = false; LLVMStyle.AlignConsecutiveAssignments.AlignCompound = false; + LLVMStyle.AlignConsecutiveAssignments.AlignFunctionPointers = false; LLVMStyle.AlignConsecutiveAssignments.PadOperators = true; LLVMStyle.AlignConsecutiveBitFields = {}; LLVMStyle.AlignConsecutiveDeclarations = {}; diff --git a/clang/lib/Format/FormatToken.cpp b/clang/lib/Format/FormatToken.cpp index 7a2df8c53952..b791c5a26bbe 100644 --- a/clang/lib/Format/FormatToken.cpp +++ b/clang/lib/Format/FormatToken.cpp @@ -113,8 +113,8 @@ unsigned CommaSeparatedList::formatAfterToken(LineState &State, if (!State.NextToken || !State.NextToken->Previous) return 0; - if (Formats.size() == 1) - return 0; // Handled by formatFromToken + if (Formats.size() <= 1) + return 0; // Handled by formatFromToken (1) or avoid severe penalty (0). // Ensure that we start on the opening brace. const FormatToken *LBrace = diff --git a/clang/lib/Format/FormatToken.h b/clang/lib/Format/FormatToken.h index 3f9664f8f78a..981592aa094a 100644 --- a/clang/lib/Format/FormatToken.h +++ b/clang/lib/Format/FormatToken.h @@ -275,14 +275,15 @@ class AnnotatedLine; struct FormatToken { FormatToken() : HasUnescapedNewline(false), IsMultiline(false), IsFirst(false), - MustBreakBefore(false), IsUnterminatedLiteral(false), - CanBreakBefore(false), ClosesTemplateDeclaration(false), - StartsBinaryExpression(false), EndsBinaryExpression(false), - PartOfMultiVariableDeclStmt(false), ContinuesLineCommentSection(false), - Finalized(false), ClosesRequiresClause(false), - EndsCppAttributeGroup(false), BlockKind(BK_Unknown), - Decision(FD_Unformatted), PackingKind(PPK_Inconclusive), - TypeIsFinalized(false), Type(TT_Unknown) {} + MustBreakBefore(false), MustBreakBeforeFinalized(false), + IsUnterminatedLiteral(false), CanBreakBefore(false), + ClosesTemplateDeclaration(false), StartsBinaryExpression(false), + EndsBinaryExpression(false), PartOfMultiVariableDeclStmt(false), + ContinuesLineCommentSection(false), Finalized(false), + ClosesRequiresClause(false), EndsCppAttributeGroup(false), + BlockKind(BK_Unknown), Decision(FD_Unformatted), + PackingKind(PPK_Inconclusive), TypeIsFinalized(false), + Type(TT_Unknown) {} /// The \c Token. Token Tok; @@ -318,6 +319,10 @@ struct FormatToken { /// before the token. unsigned MustBreakBefore : 1; + /// Whether MustBreakBefore is finalized during parsing and must not + /// be reset between runs. + unsigned MustBreakBeforeFinalized : 1; + /// Set to \c true if this token is an unterminated literal. unsigned IsUnterminatedLiteral : 1; @@ -416,10 +421,14 @@ public: /// to another one please use overwriteFixedType, or even better remove the /// need to reassign the type. void setFinalizedType(TokenType T) { + if (MacroCtx && MacroCtx->Role == MR_UnexpandedArg) + return; Type = T; TypeIsFinalized = true; } void overwriteFixedType(TokenType T) { + if (MacroCtx && MacroCtx->Role == MR_UnexpandedArg) + return; TypeIsFinalized = false; setType(T); } diff --git a/clang/lib/Format/TokenAnnotator.cpp b/clang/lib/Format/TokenAnnotator.cpp index 8b43438c72df..227aa0b97af6 100644 --- a/clang/lib/Format/TokenAnnotator.cpp +++ b/clang/lib/Format/TokenAnnotator.cpp @@ -2769,13 +2769,6 @@ public: // Consume operators with higher precedence. parse(Precedence + 1); - // Do not assign fake parenthesis to tokens that are part of an - // unexpanded macro call. The line within the macro call contains - // the parenthesis and commas, and we will not find operators within - // that structure. - if (Current && Current->MacroParent) - break; - int CurrentPrecedence = getCurrentPrecedence(); if (Precedence == CurrentPrecedence && Current && @@ -2919,6 +2912,13 @@ private: void addFakeParenthesis(FormatToken *Start, prec::Level Precedence, FormatToken *End = nullptr) { + // Do not assign fake parenthesis to tokens that are part of an + // unexpanded macro call. The line within the macro call contains + // the parenthesis and commas, and we will not find operators within + // that structure. + if (Start->MacroParent) + return; + Start->FakeLParens.push_back(Precedence); if (Precedence > prec::Unknown) Start->StartsBinaryExpression = true; diff --git a/clang/lib/Format/UnwrappedLineFormatter.cpp b/clang/lib/Format/UnwrappedLineFormatter.cpp index 56077499c39d..27983a330ac4 100644 --- a/clang/lib/Format/UnwrappedLineFormatter.cpp +++ b/clang/lib/Format/UnwrappedLineFormatter.cpp @@ -954,13 +954,15 @@ static void markFinalized(FormatToken *Tok) { // will be modified as unexpanded arguments (as part of the macro call // formatting) in the next pass. Tok->MacroCtx->Role = MR_UnexpandedArg; - // Reset whether spaces are required before this token, as that is context - // dependent, and that context may change when formatting the macro call. - // For example, given M(x) -> 2 * x, and the macro call M(var), - // the token 'var' will have SpacesRequiredBefore = 1 after being + // Reset whether spaces or a line break are required before this token, as + // that is context dependent, and that context may change when formatting + // the macro call. For example, given M(x) -> 2 * x, and the macro call + // M(var), the token 'var' will have SpacesRequiredBefore = 1 after being // formatted as part of the expanded macro, but SpacesRequiredBefore = 0 // for its position within the macro call. Tok->SpacesRequiredBefore = 0; + if (!Tok->MustBreakBeforeFinalized) + Tok->MustBreakBefore = 0; } else { Tok->Finalized = true; } diff --git a/clang/lib/Format/UnwrappedLineParser.cpp b/clang/lib/Format/UnwrappedLineParser.cpp index 684609747a55..50d41c9f57a6 100644 --- a/clang/lib/Format/UnwrappedLineParser.cpp +++ b/clang/lib/Format/UnwrappedLineParser.cpp @@ -2308,7 +2308,7 @@ bool UnwrappedLineParser::tryToParseLambdaIntroducer() { LeftSquare->isCppStructuredBinding(Style)) { return false; } - if (FormatTok->is(tok::l_square)) + if (FormatTok->is(tok::l_square) || tok::isLiteral(FormatTok->Tok.getKind())) return false; if (FormatTok->is(tok::r_square)) { const FormatToken *Next = Tokens->peekNextToken(/*SkipComment=*/true); @@ -4675,6 +4675,7 @@ void UnwrappedLineParser::readToken(int LevelDifference) { conditionalCompilationEnd(); FormatTok = Tokens->getNextToken(); FormatTok->MustBreakBefore = true; + FormatTok->MustBreakBeforeFinalized = true; } auto IsFirstNonCommentOnLine = [](bool FirstNonCommentOnLine, @@ -4891,6 +4892,7 @@ void UnwrappedLineParser::pushToken(FormatToken *Tok) { Line->Tokens.push_back(UnwrappedLineNode(Tok)); if (MustBreakBeforeNextToken) { Line->Tokens.back().Tok->MustBreakBefore = true; + Line->Tokens.back().Tok->MustBreakBeforeFinalized = true; MustBreakBeforeNextToken = false; } } diff --git a/clang/lib/Format/WhitespaceManager.cpp b/clang/lib/Format/WhitespaceManager.cpp index 3bc6915b8df0..f1d176f182ff 100644 --- a/clang/lib/Format/WhitespaceManager.cpp +++ b/clang/lib/Format/WhitespaceManager.cpp @@ -978,7 +978,14 @@ void WhitespaceManager::alignConsecutiveDeclarations() { AlignTokens( Style, - [](Change const &C) { + [&](Change const &C) { + if (Style.AlignConsecutiveDeclarations.AlignFunctionPointers) { + for (const auto *Prev = C.Tok->Previous; Prev; Prev = Prev->Previous) + if (Prev->is(tok::equal)) + return false; + if (C.Tok->is(TT_FunctionTypeLParen)) + return true; + } if (C.Tok->is(TT_FunctionDeclarationName)) return true; if (C.Tok->isNot(TT_StartOfName)) diff --git a/clang/lib/Format/WhitespaceManager.h b/clang/lib/Format/WhitespaceManager.h index 24fe492dcb02..dc6f60e5deee 100644 --- a/clang/lib/Format/WhitespaceManager.h +++ b/clang/lib/Format/WhitespaceManager.h @@ -282,6 +282,7 @@ private: for (auto PrevIter = Start; PrevIter != End; ++PrevIter) { // If we broke the line the initial spaces are already // accounted for. + assert(PrevIter->Index < Changes.size()); if (Changes[PrevIter->Index].NewlinesBefore > 0) NetWidth = 0; NetWidth += diff --git a/clang/lib/Frontend/VerifyDiagnosticConsumer.cpp b/clang/lib/Frontend/VerifyDiagnosticConsumer.cpp index 8a3d2286cd16..f508408ba706 100644 --- a/clang/lib/Frontend/VerifyDiagnosticConsumer.cpp +++ b/clang/lib/Frontend/VerifyDiagnosticConsumer.cpp @@ -611,12 +611,19 @@ static bool ParseDirective(StringRef S, ExpectedData *ED, SourceManager &SM, diag::err_verify_missing_start) << KindStr; continue; } + llvm::SmallString<8> CloseBrace("}}"); + const char *const DelimBegin = PH.C; PH.Advance(); + // Count the number of opening braces for `string` kinds + for (; !D.RegexKind && PH.Next("{"); PH.Advance()) + CloseBrace += '}'; const char* const ContentBegin = PH.C; // mark content begin - // Search for token: }} - if (!PH.SearchClosingBrace("{{", "}}")) { - Diags.Report(Pos.getLocWithOffset(PH.C-PH.Begin), - diag::err_verify_missing_end) << KindStr; + // Search for closing brace + StringRef OpenBrace(DelimBegin, ContentBegin - DelimBegin); + if (!PH.SearchClosingBrace(OpenBrace, CloseBrace)) { + Diags.Report(Pos.getLocWithOffset(PH.C - PH.Begin), + diag::err_verify_missing_end) + << KindStr << CloseBrace; continue; } const char* const ContentEnd = PH.P; // mark content end diff --git a/clang/lib/Headers/ia32intrin.h b/clang/lib/Headers/ia32intrin.h index f1904efd71c4..a8b59dfaad89 100644 --- a/clang/lib/Headers/ia32intrin.h +++ b/clang/lib/Headers/ia32intrin.h @@ -26,51 +26,48 @@ #define __DEFAULT_FN_ATTRS_CONSTEXPR __DEFAULT_FN_ATTRS #endif -/** Find the first set bit starting from the lsb. Result is undefined if - * input is 0. - * - * \headerfile <x86intrin.h> - * - * This intrinsic corresponds to the <c> BSF </c> instruction or the - * <c> TZCNT </c> instruction. - * - * \param __A - * A 32-bit integer operand. - * \returns A 32-bit integer containing the bit number. - */ +/// Find the first set bit starting from the lsb. Result is undefined if +/// input is 0. +/// +/// \headerfile <x86intrin.h> +/// +/// This intrinsic corresponds to the \c BSF instruction or the +/// \c TZCNT instruction. +/// +/// \param __A +/// A 32-bit integer operand. +/// \returns A 32-bit integer containing the bit number. static __inline__ int __DEFAULT_FN_ATTRS_CONSTEXPR __bsfd(int __A) { return __builtin_ctz((unsigned int)__A); } -/** Find the first set bit starting from the msb. Result is undefined if - * input is 0. - * - * \headerfile <x86intrin.h> - * - * This intrinsic corresponds to the <c> BSR </c> instruction or the - * <c> LZCNT </c> instruction and an <c> XOR </c>. - * - * \param __A - * A 32-bit integer operand. - * \returns A 32-bit integer containing the bit number. - */ +/// Find the first set bit starting from the msb. Result is undefined if +/// input is 0. +/// +/// \headerfile <x86intrin.h> +/// +/// This intrinsic corresponds to the \c BSR instruction or the +/// \c LZCNT instruction and an \c XOR. +/// +/// \param __A +/// A 32-bit integer operand. +/// \returns A 32-bit integer containing the bit number. static __inline__ int __DEFAULT_FN_ATTRS_CONSTEXPR __bsrd(int __A) { return 31 - __builtin_clz((unsigned int)__A); } -/** Swaps the bytes in the input. Converting little endian to big endian or - * vice versa. - * - * \headerfile <x86intrin.h> - * - * This intrinsic corresponds to the <c> BSWAP </c> instruction. - * - * \param __A - * A 32-bit integer operand. - * \returns A 32-bit integer containing the swapped bytes. - */ +/// Swaps the bytes in the input. Converting little endian to big endian or +/// vice versa. +/// +/// \headerfile <x86intrin.h> +/// +/// This intrinsic corresponds to the \c BSWAP instruction. +/// +/// \param __A +/// A 32-bit integer operand. +/// \returns A 32-bit integer containing the swapped bytes. static __inline__ int __DEFAULT_FN_ATTRS_CONSTEXPR __bswapd(int __A) { return (int)__builtin_bswap32((unsigned int)__A); @@ -85,51 +82,48 @@ _bswap(int __A) { #define _bit_scan_reverse(A) __bsrd((A)) #ifdef __x86_64__ -/** Find the first set bit starting from the lsb. Result is undefined if - * input is 0. - * - * \headerfile <x86intrin.h> - * - * This intrinsic corresponds to the <c> BSF </c> instruction or the - * <c> TZCNT </c> instruction. - * - * \param __A - * A 64-bit integer operand. - * \returns A 32-bit integer containing the bit number. - */ +/// Find the first set bit starting from the lsb. Result is undefined if +/// input is 0. +/// +/// \headerfile <x86intrin.h> +/// +/// This intrinsic corresponds to the \c BSF instruction or the +/// \c TZCNT instruction. +/// +/// \param __A +/// A 64-bit integer operand. +/// \returns A 32-bit integer containing the bit number. static __inline__ int __DEFAULT_FN_ATTRS_CONSTEXPR __bsfq(long long __A) { return (long long)__builtin_ctzll((unsigned long long)__A); } -/** Find the first set bit starting from the msb. Result is undefined if - * input is 0. - * - * \headerfile <x86intrin.h> - * - * This intrinsic corresponds to the <c> BSR </c> instruction or the - * <c> LZCNT </c> instruction and an <c> XOR </c>. - * - * \param __A - * A 64-bit integer operand. - * \returns A 32-bit integer containing the bit number. - */ +/// Find the first set bit starting from the msb. Result is undefined if +/// input is 0. +/// +/// \headerfile <x86intrin.h> +/// +/// This intrinsic corresponds to the \c BSR instruction or the +/// \c LZCNT instruction and an \c XOR. +/// +/// \param __A +/// A 64-bit integer operand. +/// \returns A 32-bit integer containing the bit number. static __inline__ int __DEFAULT_FN_ATTRS_CONSTEXPR __bsrq(long long __A) { return 63 - __builtin_clzll((unsigned long long)__A); } -/** Swaps the bytes in the input. Converting little endian to big endian or - * vice versa. - * - * \headerfile <x86intrin.h> - * - * This intrinsic corresponds to the <c> BSWAP </c> instruction. - * - * \param __A - * A 64-bit integer operand. - * \returns A 64-bit integer containing the swapped bytes. - */ +/// Swaps the bytes in the input. Converting little endian to big endian or +/// vice versa. +/// +/// \headerfile <x86intrin.h> +/// +/// This intrinsic corresponds to the \c BSWAP instruction. +/// +/// \param __A +/// A 64-bit integer operand. +/// \returns A 64-bit integer containing the swapped bytes. static __inline__ long long __DEFAULT_FN_ATTRS_CONSTEXPR __bswapq(long long __A) { return (long long)__builtin_bswap64((unsigned long long)__A); @@ -138,18 +132,17 @@ __bswapq(long long __A) { #define _bswap64(A) __bswapq((A)) #endif -/** Counts the number of bits in the source operand having a value of 1. - * - * \headerfile <x86intrin.h> - * - * This intrinsic corresponds to the <c> POPCNT </c> instruction or a - * a sequence of arithmetic and logic ops to calculate it. - * - * \param __A - * An unsigned 32-bit integer operand. - * \returns A 32-bit integer containing the number of bits with value 1 in the - * source operand. - */ +/// Counts the number of bits in the source operand having a value of 1. +/// +/// \headerfile <x86intrin.h> +/// +/// This intrinsic corresponds to the \c POPCNT instruction or a +/// a sequence of arithmetic and logic ops to calculate it. +/// +/// \param __A +/// An unsigned 32-bit integer operand. +/// \returns A 32-bit integer containing the number of bits with value 1 in the +/// source operand. static __inline__ int __DEFAULT_FN_ATTRS_CONSTEXPR __popcntd(unsigned int __A) { @@ -159,18 +152,17 @@ __popcntd(unsigned int __A) #define _popcnt32(A) __popcntd((A)) #ifdef __x86_64__ -/** Counts the number of bits in the source operand having a value of 1. - * - * \headerfile <x86intrin.h> - * - * This intrinsic corresponds to the <c> POPCNT </c> instruction or a - * a sequence of arithmetic and logic ops to calculate it. - * - * \param __A - * An unsigned 64-bit integer operand. - * \returns A 64-bit integer containing the number of bits with value 1 in the - * source operand. - */ +/// Counts the number of bits in the source operand having a value of 1. +/// +/// \headerfile <x86intrin.h> +/// +/// This intrinsic corresponds to the \c POPCNT instruction or a +/// a sequence of arithmetic and logic ops to calculate it. +/// +/// \param __A +/// An unsigned 64-bit integer operand. +/// \returns A 64-bit integer containing the number of bits with value 1 in the +/// source operand. static __inline__ long long __DEFAULT_FN_ATTRS_CONSTEXPR __popcntq(unsigned long long __A) { @@ -207,123 +199,120 @@ __writeeflags(unsigned int __f) } #endif /* !__x86_64__ */ -/** Cast a 32-bit float value to a 32-bit unsigned integer value - * - * \headerfile <x86intrin.h> - * This intrinsic corresponds to the <c> VMOVD / MOVD </c> instruction in x86_64, - * and corresponds to the <c> VMOVL / MOVL </c> instruction in ia32. - * - * \param __A - * A 32-bit float value. - * \returns a 32-bit unsigned integer containing the converted value. - */ +/// Cast a 32-bit float value to a 32-bit unsigned integer value. +/// +/// \headerfile <x86intrin.h> +/// +/// This intrinsic corresponds to the \c VMOVD / \c MOVD instruction in x86_64, +/// and corresponds to the \c VMOVL / \c MOVL instruction in ia32. +/// +/// \param __A +/// A 32-bit float value. +/// \returns a 32-bit unsigned integer containing the converted value. static __inline__ unsigned int __DEFAULT_FN_ATTRS_CAST _castf32_u32(float __A) { return __builtin_bit_cast(unsigned int, __A); } -/** Cast a 64-bit float value to a 64-bit unsigned integer value - * - * \headerfile <x86intrin.h> - * This intrinsic corresponds to the <c> VMOVQ / MOVQ </c> instruction in x86_64, - * and corresponds to the <c> VMOVL / MOVL </c> instruction in ia32. - * - * \param __A - * A 64-bit float value. - * \returns a 64-bit unsigned integer containing the converted value. - */ +/// Cast a 64-bit float value to a 64-bit unsigned integer value. +/// +/// \headerfile <x86intrin.h> +/// +/// This intrinsic corresponds to the \c VMOVQ / \c MOVQ instruction in x86_64, +/// and corresponds to the \c VMOVL / \c MOVL instruction in ia32. +/// +/// \param __A +/// A 64-bit float value. +/// \returns a 64-bit unsigned integer containing the converted value. static __inline__ unsigned long long __DEFAULT_FN_ATTRS_CAST _castf64_u64(double __A) { return __builtin_bit_cast(unsigned long long, __A); } -/** Cast a 32-bit unsigned integer value to a 32-bit float value - * - * \headerfile <x86intrin.h> - * This intrinsic corresponds to the <c> VMOVQ / MOVQ </c> instruction in x86_64, - * and corresponds to the <c> FLDS </c> instruction in ia32. - * - * \param __A - * A 32-bit unsigned integer value. - * \returns a 32-bit float value containing the converted value. - */ +/// Cast a 32-bit unsigned integer value to a 32-bit float value. +/// +/// \headerfile <x86intrin.h> +/// +/// This intrinsic corresponds to the \c VMOVQ / \c MOVQ instruction in x86_64, +/// and corresponds to the \c FLDS instruction in ia32. +/// +/// \param __A +/// A 32-bit unsigned integer value. +/// \returns a 32-bit float value containing the converted value. static __inline__ float __DEFAULT_FN_ATTRS_CAST _castu32_f32(unsigned int __A) { return __builtin_bit_cast(float, __A); } -/** Cast a 64-bit unsigned integer value to a 64-bit float value - * - * \headerfile <x86intrin.h> - * This intrinsic corresponds to the <c> VMOVQ / MOVQ </c> instruction in x86_64, - * and corresponds to the <c> FLDL </c> instruction in ia32. - * - * \param __A - * A 64-bit unsigned integer value. - * \returns a 64-bit float value containing the converted value. - */ +/// Cast a 64-bit unsigned integer value to a 64-bit float value. +/// +/// \headerfile <x86intrin.h> +/// +/// This intrinsic corresponds to the \c VMOVQ / \c MOVQ instruction in x86_64, +/// and corresponds to the \c FLDL instruction in ia32. +/// +/// \param __A +/// A 64-bit unsigned integer value. +/// \returns a 64-bit float value containing the converted value. static __inline__ double __DEFAULT_FN_ATTRS_CAST _castu64_f64(unsigned long long __A) { return __builtin_bit_cast(double, __A); } -/** Adds the unsigned integer operand to the CRC-32C checksum of the - * unsigned char operand. - * - * \headerfile <x86intrin.h> - * - * This intrinsic corresponds to the <c> CRC32B </c> instruction. - * - * \param __C - * An unsigned integer operand to add to the CRC-32C checksum of operand - * \a __D. - * \param __D - * An unsigned 8-bit integer operand used to compute the CRC-32C checksum. - * \returns The result of adding operand \a __C to the CRC-32C checksum of - * operand \a __D. - */ +/// Adds the unsigned integer operand to the CRC-32C checksum of the +/// unsigned char operand. +/// +/// \headerfile <x86intrin.h> +/// +/// This intrinsic corresponds to the \c CRC32B instruction. +/// +/// \param __C +/// An unsigned integer operand to add to the CRC-32C checksum of operand +/// \a __D. +/// \param __D +/// An unsigned 8-bit integer operand used to compute the CRC-32C checksum. +/// \returns The result of adding operand \a __C to the CRC-32C checksum of +/// operand \a __D. static __inline__ unsigned int __DEFAULT_FN_ATTRS_CRC32 __crc32b(unsigned int __C, unsigned char __D) { return __builtin_ia32_crc32qi(__C, __D); } -/** Adds the unsigned integer operand to the CRC-32C checksum of the - * unsigned short operand. - * - * \headerfile <x86intrin.h> - * - * This intrinsic corresponds to the <c> CRC32W </c> instruction. - * - * \param __C - * An unsigned integer operand to add to the CRC-32C checksum of operand - * \a __D. - * \param __D - * An unsigned 16-bit integer operand used to compute the CRC-32C checksum. - * \returns The result of adding operand \a __C to the CRC-32C checksum of - * operand \a __D. - */ +/// Adds the unsigned integer operand to the CRC-32C checksum of the +/// unsigned short operand. +/// +/// \headerfile <x86intrin.h> +/// +/// This intrinsic corresponds to the \c CRC32W instruction. +/// +/// \param __C +/// An unsigned integer operand to add to the CRC-32C checksum of operand +/// \a __D. +/// \param __D +/// An unsigned 16-bit integer operand used to compute the CRC-32C checksum. +/// \returns The result of adding operand \a __C to the CRC-32C checksum of +/// operand \a __D. static __inline__ unsigned int __DEFAULT_FN_ATTRS_CRC32 __crc32w(unsigned int __C, unsigned short __D) { return __builtin_ia32_crc32hi(__C, __D); } -/** Adds the unsigned integer operand to the CRC-32C checksum of the - * second unsigned integer operand. - * - * \headerfile <x86intrin.h> - * - * This intrinsic corresponds to the <c> CRC32D </c> instruction. - * - * \param __C - * An unsigned integer operand to add to the CRC-32C checksum of operand - * \a __D. - * \param __D - * An unsigned 32-bit integer operand used to compute the CRC-32C checksum. - * \returns The result of adding operand \a __C to the CRC-32C checksum of - * operand \a __D. - */ +/// Adds the unsigned integer operand to the CRC-32C checksum of the +/// second unsigned integer operand. +/// +/// \headerfile <x86intrin.h> +/// +/// This intrinsic corresponds to the \c CRC32D instruction. +/// +/// \param __C +/// An unsigned integer operand to add to the CRC-32C checksum of operand +/// \a __D. +/// \param __D +/// An unsigned 32-bit integer operand used to compute the CRC-32C checksum. +/// \returns The result of adding operand \a __C to the CRC-32C checksum of +/// operand \a __D. static __inline__ unsigned int __DEFAULT_FN_ATTRS_CRC32 __crc32d(unsigned int __C, unsigned int __D) { @@ -331,21 +320,20 @@ __crc32d(unsigned int __C, unsigned int __D) } #ifdef __x86_64__ -/** Adds the unsigned integer operand to the CRC-32C checksum of the - * unsigned 64-bit integer operand. - * - * \headerfile <x86intrin.h> - * - * This intrinsic corresponds to the <c> CRC32Q </c> instruction. - * - * \param __C - * An unsigned integer operand to add to the CRC-32C checksum of operand - * \a __D. - * \param __D - * An unsigned 64-bit integer operand used to compute the CRC-32C checksum. - * \returns The result of adding operand \a __C to the CRC-32C checksum of - * operand \a __D. - */ +/// Adds the unsigned integer operand to the CRC-32C checksum of the +/// unsigned 64-bit integer operand. +/// +/// \headerfile <x86intrin.h> +/// +/// This intrinsic corresponds to the \c CRC32Q instruction. +/// +/// \param __C +/// An unsigned integer operand to add to the CRC-32C checksum of operand +/// \a __D. +/// \param __D +/// An unsigned 64-bit integer operand used to compute the CRC-32C checksum. +/// \returns The result of adding operand \a __C to the CRC-32C checksum of +/// operand \a __D. static __inline__ unsigned long long __DEFAULT_FN_ATTRS_CRC32 __crc32q(unsigned long long __C, unsigned long long __D) { diff --git a/clang/lib/Interpreter/Interpreter.cpp b/clang/lib/Interpreter/Interpreter.cpp index c9fcef5b5b5a..734fe90d0d89 100644 --- a/clang/lib/Interpreter/Interpreter.cpp +++ b/clang/lib/Interpreter/Interpreter.cpp @@ -148,6 +148,7 @@ IncrementalCompilerBuilder::create(std::vector<const char *> &ClangArgv) { // We do C++ by default; append right after argv[0] if no "-x" given ClangArgv.insert(ClangArgv.end(), "-Xclang"); ClangArgv.insert(ClangArgv.end(), "-fincremental-extensions"); + ClangArgv.insert(ClangArgv.end(), "-mcpu=native"); ClangArgv.insert(ClangArgv.end(), "-c"); // Put a dummy C++ file on to ensure there's at least one compile job for the diff --git a/clang/lib/Parse/ParseDecl.cpp b/clang/lib/Parse/ParseDecl.cpp index b60ae293ef8c..ed684c5d57b1 100644 --- a/clang/lib/Parse/ParseDecl.cpp +++ b/clang/lib/Parse/ParseDecl.cpp @@ -2661,7 +2661,12 @@ Decl *Parser::ParseDeclarationAfterDeclaratorAndAttributes( // ProduceConstructorSignatureHelp only on VarDecls. ExpressionStarts = SetPreferredType; } - if (ParseExpressionList(Exprs, ExpressionStarts)) { + + bool SawError = ParseExpressionList(Exprs, ExpressionStarts); + + InitScope.pop(); + + if (SawError) { if (ThisVarDecl && PP.isCodeCompletionReached() && !CalledSignatureHelp) { Actions.ProduceConstructorSignatureHelp( ThisVarDecl->getType()->getCanonicalTypeInternal(), @@ -2674,7 +2679,6 @@ Decl *Parser::ParseDeclarationAfterDeclaratorAndAttributes( } else { // Match the ')'. T.consumeClose(); - InitScope.pop(); ExprResult Initializer = Actions.ActOnParenListExpr(T.getOpenLocation(), T.getCloseLocation(), diff --git a/clang/lib/Parse/ParseExpr.cpp b/clang/lib/Parse/ParseExpr.cpp index 897810557976..dcfd290d39cc 100644 --- a/clang/lib/Parse/ParseExpr.cpp +++ b/clang/lib/Parse/ParseExpr.cpp @@ -1974,10 +1974,11 @@ Parser::ParsePostfixExpressionSuffix(ExprResult LHS) { PreferredType.enterSubscript(Actions, Tok.getLocation(), LHS.get()); // We try to parse a list of indexes in all language mode first - // and, in we find 0 or one index, we try to parse an OpenMP array + // and, in we find 0 or one index, we try to parse an OpenMP/OpenACC array // section. This allow us to support C++23 multi dimensional subscript and - // OpenMp sections in the same language mode. - if (!getLangOpts().OpenMP || Tok.isNot(tok::colon)) { + // OpenMP/OpenACC sections in the same language mode. + if ((!getLangOpts().OpenMP && !AllowOpenACCArraySections) || + Tok.isNot(tok::colon)) { if (!getLangOpts().CPlusPlus23) { ExprResult Idx; if (getLangOpts().CPlusPlus11 && Tok.is(tok::l_brace)) { @@ -2001,7 +2002,18 @@ Parser::ParsePostfixExpressionSuffix(ExprResult LHS) { } } - if (ArgExprs.size() <= 1 && getLangOpts().OpenMP) { + // Handle OpenACC first, since 'AllowOpenACCArraySections' is only enabled + // when actively parsing a 'var' in a 'var-list' during clause/'cache' + // parsing, so it is the most specific, and best allows us to handle + // OpenACC and OpenMP at the same time. + if (ArgExprs.size() <= 1 && AllowOpenACCArraySections) { + ColonProtectionRAIIObject RAII(*this); + if (Tok.is(tok::colon)) { + // Consume ':' + ColonLocFirst = ConsumeToken(); + Length = Actions.CorrectDelayedTyposInExpr(ParseExpression()); + } + } else if (ArgExprs.size() <= 1 && getLangOpts().OpenMP) { ColonProtectionRAIIObject RAII(*this); if (Tok.is(tok::colon)) { // Consume ':' @@ -2031,6 +2043,12 @@ Parser::ParsePostfixExpressionSuffix(ExprResult LHS) { if (!LHS.isInvalid() && !HasError && !Length.isInvalid() && !Stride.isInvalid() && Tok.is(tok::r_square)) { if (ColonLocFirst.isValid() || ColonLocSecond.isValid()) { + // FIXME: OpenACC hasn't implemented Sema/Array section handling at a + // semantic level yet. For now, just reuse the OpenMP implementation + // as it gets the parsing/type management mostly right, and we can + // replace this call to ActOnOpenACCArraySectionExpr in the future. + // Eventually we'll genericize the OPenMPArraySectionExpr type as + // well. LHS = Actions.ActOnOMPArraySectionExpr( LHS.get(), Loc, ArgExprs.empty() ? nullptr : ArgExprs[0], ColonLocFirst, ColonLocSecond, Length.get(), Stride.get(), RLoc); diff --git a/clang/lib/Parse/ParseOpenACC.cpp b/clang/lib/Parse/ParseOpenACC.cpp index c9224d3ae910..fc82324e235d 100644 --- a/clang/lib/Parse/ParseOpenACC.cpp +++ b/clang/lib/Parse/ParseOpenACC.cpp @@ -554,49 +554,17 @@ ExprResult Parser::ParseOpenACCIDExpression() { return getActions().CorrectDelayedTyposInExpr(Res); } -/// OpenACC 3.3, section 2.10: -/// A 'var' in a cache directive must be a single array element or a simple -/// subarray. In C and C++, a simple subarray is an array name followed by an -/// extended array range specification in brackets, with a start and length such -/// as: -/// -/// arr[lower:length] -/// -bool Parser::ParseOpenACCCacheVar() { - ExprResult ArrayName = ParseOpenACCIDExpression(); - if (ArrayName.isInvalid()) - return true; - - // If the expression is invalid, just continue parsing the brackets, there - // is likely other useful diagnostics we can emit inside of those. - - BalancedDelimiterTracker SquareBrackets(*this, tok::l_square, - tok::annot_pragma_openacc_end); - - // Square brackets are required, so error here, and try to recover by moving - // until the next comma, or the close paren/end of pragma. - if (SquareBrackets.expectAndConsume()) { - SkipUntil(tok::comma, tok::r_paren, tok::annot_pragma_openacc_end, - Parser::StopBeforeMatch); - return true; - } - - ExprResult Lower = getActions().CorrectDelayedTyposInExpr(ParseExpression()); - if (Lower.isInvalid()) - return true; - - // The 'length' expression is optional, as this could be a single array - // element. If there is no colon, we can treat it as that. - if (getCurToken().is(tok::colon)) { - ConsumeToken(); - ExprResult Length = - getActions().CorrectDelayedTyposInExpr(ParseExpression()); - if (Length.isInvalid()) - return true; - } - - // Diagnose the square bracket being in the wrong place and continue. - return SquareBrackets.consumeClose(); +/// OpenACC 3.3, section 1.6: +/// In this spec, a 'var' (in italics) is one of the following: +/// - a variable name (a scalar, array, or compisite variable name) +/// - a subarray specification with subscript ranges +/// - an array element +/// - a member of a composite variable +/// - a common block name between slashes (fortran only) +bool Parser::ParseOpenACCVar() { + OpenACCArraySectionRAII ArraySections(*this); + ExprResult Res = ParseAssignmentExpression(); + return Res.isInvalid(); } /// OpenACC 3.3, section 2.10: @@ -627,7 +595,16 @@ void Parser::ParseOpenACCCacheVarList() { if (!FirstArray) ExpectAndConsume(tok::comma); FirstArray = false; - if (ParseOpenACCCacheVar()) + + // OpenACC 3.3, section 2.10: + // A 'var' in a cache directive must be a single array element or a simple + // subarray. In C and C++, a simple subarray is an array name followed by + // an extended array range specification in brackets, with a start and + // length such as: + // + // arr[lower:length] + // + if (ParseOpenACCVar()) SkipUntil(tok::r_paren, tok::annot_pragma_openacc_end, tok::comma, StopBeforeMatch); } diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp index 8e46c4984d93..e92fd104d78e 100644 --- a/clang/lib/Sema/SemaDecl.cpp +++ b/clang/lib/Sema/SemaDecl.cpp @@ -2315,6 +2315,12 @@ void Sema::ActOnPopScope(SourceLocation Loc, Scope *S) { } ShadowingDecls.erase(ShadowI); } + + if (!getLangOpts().CPlusPlus && S->isClassScope()) { + if (auto *FD = dyn_cast<FieldDecl>(TmpD); + FD && FD->hasAttr<CountedByAttr>()) + CheckCountedByAttr(S, FD); + } } llvm::sort(DeclDiags, diff --git a/clang/lib/Sema/SemaDeclAttr.cpp b/clang/lib/Sema/SemaDeclAttr.cpp index d059b406ef86..1a58cfd8e417 100644 --- a/clang/lib/Sema/SemaDeclAttr.cpp +++ b/clang/lib/Sema/SemaDeclAttr.cpp @@ -8460,6 +8460,135 @@ static void handleZeroCallUsedRegsAttr(Sema &S, Decl *D, const ParsedAttr &AL) { D->addAttr(ZeroCallUsedRegsAttr::Create(S.Context, Kind, AL)); } +static void handleCountedByAttr(Sema &S, Decl *D, const ParsedAttr &AL) { + if (!AL.isArgIdent(0)) { + S.Diag(AL.getLoc(), diag::err_attribute_argument_type) + << AL << AANT_ArgumentIdentifier; + return; + } + + IdentifierLoc *IL = AL.getArgAsIdent(0); + CountedByAttr *CBA = + ::new (S.Context) CountedByAttr(S.Context, AL, IL->Ident); + CBA->setCountedByFieldLoc(IL->Loc); + D->addAttr(CBA); +} + +static const FieldDecl * +FindFieldInTopLevelOrAnonymousStruct(const RecordDecl *RD, + const IdentifierInfo *FieldName) { + for (const Decl *D : RD->decls()) { + if (const auto *FD = dyn_cast<FieldDecl>(D)) + if (FD->getName() == FieldName->getName()) + return FD; + + if (const auto *R = dyn_cast<RecordDecl>(D)) + if (const FieldDecl *FD = + FindFieldInTopLevelOrAnonymousStruct(R, FieldName)) + return FD; + } + + return nullptr; +} + +bool Sema::CheckCountedByAttr(Scope *S, const FieldDecl *FD) { + LangOptions::StrictFlexArraysLevelKind StrictFlexArraysLevel = + LangOptions::StrictFlexArraysLevelKind::IncompleteOnly; + if (!Decl::isFlexibleArrayMemberLike(Context, FD, FD->getType(), + StrictFlexArraysLevel, true)) { + // The "counted_by" attribute must be on a flexible array member. + SourceRange SR = FD->getLocation(); + Diag(SR.getBegin(), diag::err_counted_by_attr_not_on_flexible_array_member) + << SR; + return true; + } + + const auto *CBA = FD->getAttr<CountedByAttr>(); + const IdentifierInfo *FieldName = CBA->getCountedByField(); + + auto GetNonAnonStructOrUnion = [](const RecordDecl *RD) { + while (RD && !RD->getDeclName()) + if (const auto *R = dyn_cast<RecordDecl>(RD->getDeclContext())) + RD = R; + else + break; + + return RD; + }; + + const RecordDecl *EnclosingRD = GetNonAnonStructOrUnion(FD->getParent()); + const FieldDecl *CountFD = + FindFieldInTopLevelOrAnonymousStruct(EnclosingRD, FieldName); + + if (!CountFD) { + DeclarationNameInfo NameInfo(FieldName, + CBA->getCountedByFieldLoc().getBegin()); + LookupResult MemResult(*this, NameInfo, Sema::LookupMemberName); + LookupName(MemResult, S); + + if (!MemResult.empty()) { + SourceRange SR = CBA->getCountedByFieldLoc(); + Diag(SR.getBegin(), diag::err_flexible_array_count_not_in_same_struct) + << CBA->getCountedByField() << SR; + + if (auto *ND = MemResult.getAsSingle<NamedDecl>()) { + SR = ND->getLocation(); + Diag(SR.getBegin(), diag::note_flexible_array_counted_by_attr_field) + << ND << SR; + } + + return true; + } else { + // The "counted_by" field needs to exist in the struct. + LookupResult OrdResult(*this, NameInfo, Sema::LookupOrdinaryName); + LookupName(OrdResult, S); + + if (!OrdResult.empty()) { + SourceRange SR = FD->getLocation(); + Diag(SR.getBegin(), diag::err_counted_by_must_be_in_structure) + << FieldName << SR; + + if (auto *ND = OrdResult.getAsSingle<NamedDecl>()) { + SR = ND->getLocation(); + Diag(SR.getBegin(), diag::note_flexible_array_counted_by_attr_field) + << ND << SR; + } + + return true; + } + } + + CXXScopeSpec SS; + DeclFilterCCC<FieldDecl> Filter(FieldName); + return DiagnoseEmptyLookup(S, SS, MemResult, Filter, nullptr, std::nullopt, + const_cast<DeclContext *>(FD->getDeclContext())); + } + + if (CountFD->hasAttr<CountedByAttr>()) { + // The "counted_by" field can't point to the flexible array member. + SourceRange SR = CBA->getCountedByFieldLoc(); + Diag(SR.getBegin(), diag::err_counted_by_attr_refers_to_flexible_array) + << CBA->getCountedByField() << SR; + return true; + } + + if (!CountFD->getType()->isIntegerType() || + CountFD->getType()->isBooleanType()) { + // The "counted_by" field must have an integer type. + SourceRange SR = CBA->getCountedByFieldLoc(); + Diag(SR.getBegin(), + diag::err_flexible_array_counted_by_attr_field_not_integer) + << CBA->getCountedByField() << SR; + + SR = CountFD->getLocation(); + Diag(SR.getBegin(), diag::note_flexible_array_counted_by_attr_field) + << CountFD << SR; + return true; + } + + return false; +} + static void handleFunctionReturnThunksAttr(Sema &S, Decl *D, const ParsedAttr &AL) { StringRef KindStr; @@ -9420,6 +9549,10 @@ ProcessDeclAttribute(Sema &S, Scope *scope, Decl *D, const ParsedAttr &AL, handleAvailableOnlyInDefaultEvalMethod(S, D, AL); break; + case ParsedAttr::AT_CountedBy: + handleCountedByAttr(S, D, AL); + break; + // Microsoft attributes: case ParsedAttr::AT_LayoutVersion: handleLayoutVersion(S, D, AL); diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp index 60ad035570c8..2f48ea237cdf 100644 --- a/clang/lib/Sema/SemaExpr.cpp +++ b/clang/lib/Sema/SemaExpr.cpp @@ -2469,7 +2469,8 @@ bool Sema::DiagnoseDependentMemberLookup(const LookupResult &R) { bool Sema::DiagnoseEmptyLookup(Scope *S, CXXScopeSpec &SS, LookupResult &R, CorrectionCandidateCallback &CCC, TemplateArgumentListInfo *ExplicitTemplateArgs, - ArrayRef<Expr *> Args, TypoExpr **Out) { + ArrayRef<Expr *> Args, DeclContext *LookupCtx, + TypoExpr **Out) { DeclarationName Name = R.getLookupName(); unsigned diagnostic = diag::err_undeclared_var_use; @@ -2485,7 +2486,8 @@ bool Sema::DiagnoseEmptyLookup(Scope *S, CXXScopeSpec &SS, LookupResult &R, // unqualified lookup. This is useful when (for example) the // original lookup would not have found something because it was a // dependent name. - DeclContext *DC = SS.isEmpty() ? CurContext : nullptr; + DeclContext *DC = + LookupCtx ? LookupCtx : (SS.isEmpty() ? CurContext : nullptr); while (DC) { if (isa<CXXRecordDecl>(DC)) { LookupQualifiedName(R, DC); @@ -2528,12 +2530,12 @@ bool Sema::DiagnoseEmptyLookup(Scope *S, CXXScopeSpec &SS, LookupResult &R, emitEmptyLookupTypoDiagnostic(TC, *this, SS, Name, TypoLoc, Args, diagnostic, diagnostic_suggest); }, - nullptr, CTK_ErrorRecovery); + nullptr, CTK_ErrorRecovery, LookupCtx); if (*Out) return true; - } else if (S && - (Corrected = CorrectTypo(R.getLookupNameInfo(), R.getLookupKind(), - S, &SS, CCC, CTK_ErrorRecovery))) { + } else if (S && (Corrected = + CorrectTypo(R.getLookupNameInfo(), R.getLookupKind(), S, + &SS, CCC, CTK_ErrorRecovery, LookupCtx))) { std::string CorrectedStr(Corrected.getAsString(getLangOpts())); bool DroppedSpecifier = Corrected.WillReplaceSpecifier() && Name.getAsString() == CorrectedStr; @@ -2823,7 +2825,7 @@ Sema::ActOnIdExpression(Scope *S, CXXScopeSpec &SS, // a template name, but we happen to have always already looked up the name // before we get here if it must be a template name. if (DiagnoseEmptyLookup(S, SS, R, CCC ? *CCC : DefaultValidator, nullptr, - std::nullopt, &TE)) { + std::nullopt, nullptr, &TE)) { if (TE && KeywordReplacement) { auto &State = getTypoExprState(TE); auto BestTC = State.Consumer->getNextCorrection(); diff --git a/clang/lib/Sema/SemaExprMember.cpp b/clang/lib/Sema/SemaExprMember.cpp index 2abec3d86a27..32998ae60eaf 100644 --- a/clang/lib/Sema/SemaExprMember.cpp +++ b/clang/lib/Sema/SemaExprMember.cpp @@ -782,7 +782,8 @@ Sema::BuildMemberReferenceExpr(Expr *Base, QualType BaseType, const Scope *S, ActOnMemberAccessExtraArgs *ExtraArgs) { if (BaseType->isDependentType() || - (SS.isSet() && isDependentScopeSpecifier(SS))) + (SS.isSet() && isDependentScopeSpecifier(SS)) || + NameInfo.getName().isDependentName()) return ActOnDependentMemberExpr(Base, BaseType, IsArrow, OpLoc, SS, TemplateKWLoc, FirstQualifierInScope, diff --git a/clang/lib/Sema/SemaOverload.cpp b/clang/lib/Sema/SemaOverload.cpp index e6c267bb79e6..64bc38519802 100644 --- a/clang/lib/Sema/SemaOverload.cpp +++ b/clang/lib/Sema/SemaOverload.cpp @@ -6056,6 +6056,16 @@ static ExprResult BuildConvertedConstantExpression(Sema &S, Expr *From, diag::err_typecheck_converted_constant_expression_indirect) << From->getType() << From->getSourceRange() << T; } + // 'TryCopyInitialization' returns incorrect info for attempts to bind + // a reference to a bit-field due to C++ [over.ics.ref]p4. Namely, + // 'SCS->DirectBinding' occurs to be set to 'true' despite it is not + // the direct binding according to C++ [dcl.init.ref]p5. Hence, check this + // case explicitly. + if (From->refersToBitField() && T.getTypePtr()->isReferenceType()) { + return S.Diag(From->getBeginLoc(), + diag::err_reference_bind_to_bitfield_in_cce) + << From->getSourceRange(); + } // Usually we can simply apply the ImplicitConversionSequence we formed // earlier, but that's not guaranteed to work when initializing an object of diff --git a/clang/lib/Sema/SemaStmt.cpp b/clang/lib/Sema/SemaStmt.cpp index f0b03db69084..21efe25ed84a 100644 --- a/clang/lib/Sema/SemaStmt.cpp +++ b/clang/lib/Sema/SemaStmt.cpp @@ -3200,7 +3200,7 @@ static void DiagnoseForRangeConstVariableCopies(Sema &SemaRef, // (The function `getTypeSize` returns the size in bits.) ASTContext &Ctx = SemaRef.Context; if (Ctx.getTypeSize(VariableType) <= 64 * 8 && - (VariableType.isTriviallyCopyableType(Ctx) || + (VariableType.isTriviallyCopyConstructibleType(Ctx) || hasTrivialABIAttr(VariableType))) return; diff --git a/clang/lib/Sema/TreeTransform.h b/clang/lib/Sema/TreeTransform.h index c8c5a51bf9f9..1a1bc87d2b32 100644 --- a/clang/lib/Sema/TreeTransform.h +++ b/clang/lib/Sema/TreeTransform.h @@ -6192,6 +6192,13 @@ bool TreeTransform<Derived>::TransformExceptionSpec( // Instantiate a dynamic noexcept expression, if any. if (isComputedNoexcept(ESI.Type)) { + // Update this scrope because ContextDecl in Sema will be used in + // TransformExpr. + auto *Method = dyn_cast_if_present<CXXMethodDecl>(ESI.SourceTemplate); + Sema::CXXThisScopeRAII ThisScope( + SemaRef, Method ? Method->getParent() : nullptr, + Method ? Method->getMethodQualifiers() : Qualifiers{}, + Method != nullptr); EnterExpressionEvaluationContext Unevaluated( getSema(), Sema::ExpressionEvaluationContext::ConstantEvaluated); ExprResult NoexceptExpr = getDerived().TransformExpr(ESI.NoexceptExpr); @@ -7732,7 +7739,11 @@ TreeTransform<Derived>::TransformIfStmt(IfStmt *S) { if (Then.isInvalid()) return StmtError(); } else { - Then = new (getSema().Context) NullStmt(S->getThen()->getBeginLoc()); + // Discarded branch is replaced with empty CompoundStmt so we can keep + // proper source location for start and end of original branch, so + // subsequent transformations like CoverageMapping work properly + Then = new (getSema().Context) + CompoundStmt(S->getThen()->getBeginLoc(), S->getThen()->getEndLoc()); } // Transform the "else" branch. @@ -7741,6 +7752,13 @@ TreeTransform<Derived>::TransformIfStmt(IfStmt *S) { Else = getDerived().TransformStmt(S->getElse()); if (Else.isInvalid()) return StmtError(); + } else if (S->getElse() && ConstexprConditionValue && + *ConstexprConditionValue) { + // Same thing here as with <then> branch, we are discarding it, we can't + // replace it with NULL nor NullStmt as we need to keep for source location + // range, for CoverageMapping + Else = new (getSema().Context) + CompoundStmt(S->getElse()->getBeginLoc(), S->getElse()->getEndLoc()); } if (!getDerived().AlwaysRebuild() && diff --git a/clang/lib/StaticAnalyzer/Checkers/StdLibraryFunctionsChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/StdLibraryFunctionsChecker.cpp index 034825d88a44..3b36565681a7 100644 --- a/clang/lib/StaticAnalyzer/Checkers/StdLibraryFunctionsChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/StdLibraryFunctionsChecker.cpp @@ -2201,6 +2201,25 @@ void StdLibraryFunctionsChecker::initFunctionSummaries( ErrnoNEZeroIrrelevant, GenericFailureMsg) .ArgConstraint(NotNull(ArgNo(0)))); + // int ungetc(int c, FILE *stream); + addToFunctionSummaryMap( + "ungetc", Signature(ArgTypes{IntTy, FilePtrTy}, RetType{IntTy}), + Summary(NoEvalCall) + .Case({ReturnValueCondition(BO_EQ, ArgNo(0)), + ArgumentCondition(0, WithinRange, {{0, UCharRangeMax}})}, + ErrnoMustNotBeChecked, GenericSuccessMsg) + .Case({ReturnValueCondition(WithinRange, SingleValue(EOFv)), + ArgumentCondition(0, WithinRange, {{EOFv, EOFv}})}, + ErrnoNEZeroIrrelevant, + "Assuming that 'ungetc' fails because EOF was passed as " + "character") + .Case({ReturnValueCondition(WithinRange, SingleValue(EOFv)), + ArgumentCondition(0, WithinRange, {{0, UCharRangeMax}})}, + ErrnoNEZeroIrrelevant, GenericFailureMsg) + .ArgConstraint(ArgumentCondition( + 0, WithinRange, {{EOFv, EOFv}, {0, UCharRangeMax}})) + .ArgConstraint(NotNull(ArgNo(1)))); + // int fseek(FILE *stream, long offset, int whence); // FIXME: It can be possible to get the 'SEEK_' values (like EOFv) and use // these for condition of arg 2. @@ -2255,7 +2274,7 @@ void StdLibraryFunctionsChecker::initFunctionSummaries( addToFunctionSummaryMap( "ftell", Signature(ArgTypes{FilePtrTy}, RetType{LongTy}), Summary(NoEvalCall) - .Case({ReturnValueCondition(WithinRange, Range(1, LongMax))}, + .Case({ReturnValueCondition(WithinRange, Range(0, LongMax))}, ErrnoUnchanged, GenericSuccessMsg) .Case(ReturnsMinusOne, ErrnoNEZeroIrrelevant, GenericFailureMsg) .ArgConstraint(NotNull(ArgNo(0)))); diff --git a/clang/lib/StaticAnalyzer/Checkers/StreamChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/StreamChecker.cpp index 25da3c18e851..742426a628e0 100644 --- a/clang/lib/StaticAnalyzer/Checkers/StreamChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/StreamChecker.cpp @@ -263,10 +263,17 @@ private: {{{"fputs"}, 2}, {std::bind(&StreamChecker::preReadWrite, _1, _2, _3, _4, false), std::bind(&StreamChecker::evalFputx, _1, _2, _3, _4, false), 1}}, + {{{"ungetc"}, 2}, + {std::bind(&StreamChecker::preReadWrite, _1, _2, _3, _4, false), + std::bind(&StreamChecker::evalUngetc, _1, _2, _3, _4), 1}}, {{{"fseek"}, 3}, {&StreamChecker::preFseek, &StreamChecker::evalFseek, 0}}, + {{{"fseeko"}, 3}, + {&StreamChecker::preFseek, &StreamChecker::evalFseek, 0}}, {{{"ftell"}, 1}, {&StreamChecker::preDefault, &StreamChecker::evalFtell, 0}}, + {{{"ftello"}, 1}, + {&StreamChecker::preDefault, &StreamChecker::evalFtell, 0}}, {{{"fflush"}, 1}, {&StreamChecker::preFflush, &StreamChecker::evalFflush, 0}}, {{{"rewind"}, 1}, @@ -332,6 +339,9 @@ private: void evalFputx(const FnDescription *Desc, const CallEvent &Call, CheckerContext &C, bool IsSingleChar) const; + void evalUngetc(const FnDescription *Desc, const CallEvent &Call, + CheckerContext &C) const; + void preFseek(const FnDescription *Desc, const CallEvent &Call, CheckerContext &C) const; void evalFseek(const FnDescription *Desc, const CallEvent &Call, @@ -916,6 +926,45 @@ void StreamChecker::evalFputx(const FnDescription *Desc, const CallEvent &Call, C.addTransition(StateFailed); } +void StreamChecker::evalUngetc(const FnDescription *Desc, const CallEvent &Call, + CheckerContext &C) const { + ProgramStateRef State = C.getState(); + SymbolRef StreamSym = getStreamArg(Desc, Call).getAsSymbol(); + if (!StreamSym) + return; + + const CallExpr *CE = dyn_cast_or_null<CallExpr>(Call.getOriginExpr()); + if (!CE) + return; + + const StreamState *OldSS = State->get<StreamMap>(StreamSym); + if (!OldSS) + return; + + assertStreamStateOpened(OldSS); + + // Generate a transition for the success state. + std::optional<NonLoc> PutVal = Call.getArgSVal(0).getAs<NonLoc>(); + if (!PutVal) + return; + ProgramStateRef StateNotFailed = + State->BindExpr(CE, C.getLocationContext(), *PutVal); + StateNotFailed = + StateNotFailed->set<StreamMap>(StreamSym, StreamState::getOpened(Desc)); + C.addTransition(StateNotFailed); + + // Add transition for the failed state. + // Failure of 'ungetc' does not result in feof or ferror state. + // If the PutVal has value of EofVal the function should "fail", but this is + // the same transition as the success state. + // In this case only one state transition is added by the analyzer (the two + // new states may be similar). + ProgramStateRef StateFailed = bindInt(*EofVal, State, C, CE); + StateFailed = + StateFailed->set<StreamMap>(StreamSym, StreamState::getOpened(Desc)); + C.addTransition(StateFailed); +} + void StreamChecker::preFseek(const FnDescription *Desc, const CallEvent &Call, CheckerContext &C) const { ProgramStateRef State = C.getState(); @@ -1068,10 +1117,10 @@ void StreamChecker::evalFtell(const FnDescription *Desc, const CallEvent &Call, NonLoc RetVal = makeRetVal(C, CE).castAs<NonLoc>(); ProgramStateRef StateNotFailed = State->BindExpr(CE, C.getLocationContext(), RetVal); - auto Cond = SVB.evalBinOp(State, BO_GE, RetVal, - SVB.makeZeroVal(C.getASTContext().LongTy), - SVB.getConditionType()) - .getAs<DefinedOrUnknownSVal>(); + auto Cond = + SVB.evalBinOp(State, BO_GE, RetVal, SVB.makeZeroVal(Call.getResultType()), + SVB.getConditionType()) + .getAs<DefinedOrUnknownSVal>(); if (!Cond) return; StateNotFailed = StateNotFailed->assume(*Cond, true); @@ -1079,7 +1128,7 @@ void StreamChecker::evalFtell(const FnDescription *Desc, const CallEvent &Call, return; ProgramStateRef StateFailed = State->BindExpr( - CE, C.getLocationContext(), SVB.makeIntVal(-1, C.getASTContext().LongTy)); + CE, C.getLocationContext(), SVB.makeIntVal(-1, Call.getResultType())); // This function does not affect the stream state. // Still we add success and failure state with the appropriate return value. diff --git a/clang/tools/clang-format/ClangFormat.cpp b/clang/tools/clang-format/ClangFormat.cpp index be34dbbe886a..49ab7677a3ee 100644 --- a/clang/tools/clang-format/ClangFormat.cpp +++ b/clang/tools/clang-format/ClangFormat.cpp @@ -571,6 +571,11 @@ static int dumpConfig(bool IsSTDIN) { return 0; } +using String = SmallString<128>; +static String IgnoreDir; // Directory of .clang-format-ignore file. +static String PrevDir; // Directory of previous `FilePath`. +static SmallVector<String> Patterns; // Patterns in .clang-format-ignore file. + // Check whether `FilePath` is ignored according to the nearest // .clang-format-ignore file based on the rules below: // - A blank line is skipped. @@ -586,33 +591,50 @@ static bool isIgnored(StringRef FilePath) { if (!is_regular_file(FilePath)) return false; - using namespace llvm::sys::path; - SmallString<128> Path, AbsPath{FilePath}; + String Path; + String AbsPath{FilePath}; + using namespace llvm::sys::path; make_absolute(AbsPath); remove_dots(AbsPath, /*remove_dot_dot=*/true); - StringRef IgnoreDir{AbsPath}; - do { - IgnoreDir = parent_path(IgnoreDir); - if (IgnoreDir.empty()) + if (StringRef Dir{parent_path(AbsPath)}; PrevDir != Dir) { + PrevDir = Dir; + + for (;;) { + Path = Dir; + append(Path, ".clang-format-ignore"); + if (is_regular_file(Path)) + break; + Dir = parent_path(Dir); + if (Dir.empty()) + return false; + } + + IgnoreDir = convert_to_slash(Dir); + + std::ifstream IgnoreFile{Path.c_str()}; + if (!IgnoreFile.good()) return false; - Path = IgnoreDir; - append(Path, ".clang-format-ignore"); - } while (!is_regular_file(Path)); + Patterns.clear(); - std::ifstream IgnoreFile{Path.c_str()}; - if (!IgnoreFile.good()) - return false; + for (std::string Line; std::getline(IgnoreFile, Line);) { + if (const auto Pattern{StringRef{Line}.trim()}; + // Skip empty and comment lines. + !Pattern.empty() && Pattern[0] != '#') { + Patterns.push_back(Pattern); + } + } + } - const auto Pathname = convert_to_slash(AbsPath); - for (std::string Line; std::getline(IgnoreFile, Line);) { - auto Pattern = StringRef(Line).trim(); - if (Pattern.empty() || Pattern[0] == '#') - continue; + if (IgnoreDir.empty()) + return false; - const bool IsNegated = Pattern[0] == '!'; + const auto Pathname{convert_to_slash(AbsPath)}; + for (const auto &Pat : Patterns) { + const bool IsNegated = Pat[0] == '!'; + StringRef Pattern{Pat}; if (IsNegated) Pattern = Pattern.drop_front(); @@ -620,11 +642,14 @@ static bool isIgnored(StringRef FilePath) { continue; Pattern = Pattern.ltrim(); + + // `Pattern` is relative to `IgnoreDir` unless it starts with a slash. + // This doesn't support patterns containing drive names (e.g. `C:`). if (Pattern[0] != '/') { - Path = convert_to_slash(IgnoreDir); + Path = IgnoreDir; append(Path, Style::posix, Pattern); remove_dots(Path, /*remove_dot_dot=*/true, Style::posix); - Pattern = Path.str(); + Pattern = Path; } if (clang::format::matchFilePath(Pattern, Pathname) == !IsNegated) diff --git a/compiler-rt/include/profile/InstrProfData.inc b/compiler-rt/include/profile/InstrProfData.inc index f5de23ff4b94..25df899b3f36 100644 --- a/compiler-rt/include/profile/InstrProfData.inc +++ b/compiler-rt/include/profile/InstrProfData.inc @@ -123,6 +123,8 @@ INSTR_PROF_VALUE_NODE(PtrToNodeT, llvm::PointerType::getUnqual(Ctx), Next, \ /* INSTR_PROF_RAW_HEADER start */ /* Definition of member fields of the raw profile header data structure. */ +/* Please update llvm/docs/InstrProfileFormat.rst as appropriate when updating + raw profile format. */ #ifndef INSTR_PROF_RAW_HEADER #define INSTR_PROF_RAW_HEADER(Type, Name, Initializer) #else diff --git a/compiler-rt/lib/profile/InstrProfilingPlatformLinux.c b/compiler-rt/lib/profile/InstrProfilingPlatformLinux.c index d0c42462e5e3..19266ab6c6fb 100644 --- a/compiler-rt/lib/profile/InstrProfilingPlatformLinux.c +++ b/compiler-rt/lib/profile/InstrProfilingPlatformLinux.c @@ -20,15 +20,6 @@ #include "InstrProfiling.h" #include "InstrProfilingInternal.h" -#if defined(__FreeBSD__) && !defined(ElfW) -/* - * FreeBSD's elf.h and link.h headers do not define the ElfW(type) macro yet. - * If this is added to all supported FreeBSD versions in the future, this - * compatibility macro can be removed. - */ -#define ElfW(type) __ElfN(type) -#endif - #define PROF_DATA_START INSTR_PROF_SECT_START(INSTR_PROF_DATA_COMMON) #define PROF_DATA_STOP INSTR_PROF_SECT_STOP(INSTR_PROF_DATA_COMMON) #define PROF_NAME_START INSTR_PROF_SECT_START(INSTR_PROF_NAME_COMMON) diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_flags.inc b/compiler-rt/lib/sanitizer_common/sanitizer_flags.inc index 7836347d233a..c1e3530618c2 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_flags.inc +++ b/compiler-rt/lib/sanitizer_common/sanitizer_flags.inc @@ -279,3 +279,6 @@ COMMON_FLAG(bool, test_only_replace_dlopen_main_program, false, COMMON_FLAG(bool, enable_symbolizer_markup, SANITIZER_FUCHSIA, "Use sanitizer symbolizer markup, available on Linux " "and always set true for Fuchsia.") + +COMMON_FLAG(bool, detect_invalid_join, true, + "If set, check invalid joins of threads.") diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_report.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_report.cpp index 0cf250f72129..253dc10607a6 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_report.cpp +++ b/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_report.cpp @@ -95,17 +95,33 @@ void ReportErrorSummary(const char *error_type, const StackTrace *stack, #if !SANITIZER_GO if (!common_flags()->print_summary) return; - if (stack->size == 0) { - ReportErrorSummary(error_type); - return; + + // Find first non-internal stack frame. + for (uptr i = 0; i < stack->size; ++i) { + uptr pc = StackTrace::GetPreviousInstructionPc(stack->trace[i]); + SymbolizedStackHolder symbolized_stack( + Symbolizer::GetOrInit()->SymbolizePC(pc)); + if (const SymbolizedStack *frame = symbolized_stack.get()) { + if (const SymbolizedStack *summary_frame = SkipInternalFrames(frame)) { + ReportErrorSummary(error_type, summary_frame->info, alt_tool_name); + return; + } + } + } + + // Fallback to the top one. + if (stack->size) { + uptr pc = StackTrace::GetPreviousInstructionPc(stack->trace[0]); + SymbolizedStackHolder symbolized_stack( + Symbolizer::GetOrInit()->SymbolizePC(pc)); + if (const SymbolizedStack *frame = symbolized_stack.get()) { + ReportErrorSummary(error_type, frame->info, alt_tool_name); + return; + } } - // Currently, we include the first stack frame into the report summary. - // Maybe sometimes we need to choose another frame (e.g. skip memcpy/etc). - uptr pc = StackTrace::GetPreviousInstructionPc(stack->trace[0]); - SymbolizedStackHolder symbolized_stack( - Symbolizer::GetOrInit()->SymbolizePC(pc)); - const SymbolizedStack *frame = symbolized_stack.get(); - ReportErrorSummary(error_type, frame->info, alt_tool_name); + + // Fallback to a summary without location. + ReportErrorSummary(error_type); #endif } diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_thread_arg_retval.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_thread_arg_retval.cpp index bddb28521408..754fd7b65a1d 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_thread_arg_retval.cpp +++ b/compiler-rt/lib/sanitizer_common/sanitizer_thread_arg_retval.cpp @@ -23,6 +23,9 @@ void ThreadArgRetval::CreateLocked(uptr thread, bool detached, Data& t = data_[thread]; t = {}; t.gen = gen_++; + static_assert(sizeof(gen_) == sizeof(u32) && kInvalidGen == UINT32_MAX); + if (gen_ == kInvalidGen) + gen_ = 0; t.detached = detached; t.args = args; } @@ -53,16 +56,28 @@ void ThreadArgRetval::Finish(uptr thread, void* retval) { u32 ThreadArgRetval::BeforeJoin(uptr thread) const { __sanitizer::Lock lock(&mtx_); auto t = data_.find(thread); - CHECK(t); - CHECK(!t->second.detached); - return t->second.gen; + if (t && !t->second.detached) { + return t->second.gen; + } + if (!common_flags()->detect_invalid_join) + return kInvalidGen; + const char* reason = "unknown"; + if (!t) { + reason = "already joined"; + } else if (t->second.detached) { + reason = "detached"; + } + Report("ERROR: %s: Joining %s thread, aborting.\n", SanitizerToolName, + reason); + Die(); } void ThreadArgRetval::AfterJoin(uptr thread, u32 gen) { __sanitizer::Lock lock(&mtx_); auto t = data_.find(thread); if (!t || gen != t->second.gen) { - // Thread was reused and erased by any other event. + // Thread was reused and erased by any other event, or we had an invalid + // join. return; } CHECK(!t->second.detached); diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_thread_arg_retval.h b/compiler-rt/lib/sanitizer_common/sanitizer_thread_arg_retval.h index c77021beb67d..0e6d35131c23 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_thread_arg_retval.h +++ b/compiler-rt/lib/sanitizer_common/sanitizer_thread_arg_retval.h @@ -93,6 +93,7 @@ class SANITIZER_MUTEX ThreadArgRetval { // will keep pointers alive forever, missing leaks caused by cancelation. private: + static const u32 kInvalidGen = UINT32_MAX; struct Data { Args args; u32 gen; // Avoid collision if thread id re-used. diff --git a/compiler-rt/lib/scudo/standalone/condition_variable.h b/compiler-rt/lib/scudo/standalone/condition_variable.h index 549f6e9f787b..4afebdc9d04c 100644 --- a/compiler-rt/lib/scudo/standalone/condition_variable.h +++ b/compiler-rt/lib/scudo/standalone/condition_variable.h @@ -51,7 +51,7 @@ struct ConditionVariableState { template <typename Config> struct ConditionVariableState<Config, decltype(Config::UseConditionVariable)> { - static constexpr bool enabled() { return true; } + static constexpr bool enabled() { return Config::UseConditionVariable; } using ConditionVariableT = typename Config::ConditionVariableT; }; diff --git a/libcxx/include/__config b/libcxx/include/__config index 082c73e672c7..1958d5c50ca9 100644 --- a/libcxx/include/__config +++ b/libcxx/include/__config @@ -1133,11 +1133,6 @@ __sanitizer_verify_double_ended_contiguous_container(const void*, const void*, c # define _LIBCPP_HAS_TRIVIAL_CONDVAR_DESTRUCTION # endif -// Some systems do not provide gets() in their C library, for security reasons. -# if defined(_LIBCPP_MSVCRT) || (defined(__FreeBSD_version) && __FreeBSD_version >= 1300043) || defined(__OpenBSD__) -# define _LIBCPP_C_HAS_NO_GETS -# endif - # if defined(__BIONIC__) || defined(__NuttX__) || defined(__Fuchsia__) || defined(__wasi__) || \ defined(_LIBCPP_HAS_MUSL_LIBC) || defined(__OpenBSD__) # define _LIBCPP_PROVIDES_DEFAULT_RUNE_TABLE diff --git a/libcxx/include/__memory/shared_ptr.h b/libcxx/include/__memory/shared_ptr.h index 9aa938b22031..9a73d439306d 100644 --- a/libcxx/include/__memory/shared_ptr.h +++ b/libcxx/include/__memory/shared_ptr.h @@ -1166,12 +1166,12 @@ inline _LIBCPP_HIDE_FROM_ABI bool operator!=(nullptr_t, const shared_ptr<_Tp>& _ template <class _Tp> inline _LIBCPP_HIDE_FROM_ABI bool operator<(const shared_ptr<_Tp>& __x, nullptr_t) _NOEXCEPT { - return less<_Tp*>()(__x.get(), nullptr); + return less<typename shared_ptr<_Tp>::element_type*>()(__x.get(), nullptr); } template <class _Tp> inline _LIBCPP_HIDE_FROM_ABI bool operator<(nullptr_t, const shared_ptr<_Tp>& __x) _NOEXCEPT { - return less<_Tp*>()(nullptr, __x.get()); + return less<typename shared_ptr<_Tp>::element_type*>()(nullptr, __x.get()); } template <class _Tp> diff --git a/libcxx/include/cstdio b/libcxx/include/cstdio index b1b0ff8d3503..0a867cec1a38 100644 --- a/libcxx/include/cstdio +++ b/libcxx/include/cstdio @@ -159,7 +159,7 @@ using ::tmpfile _LIBCPP_USING_IF_EXISTS; using ::tmpnam _LIBCPP_USING_IF_EXISTS; using ::getchar _LIBCPP_USING_IF_EXISTS; -#if _LIBCPP_STD_VER <= 11 && !defined(_LIBCPP_C_HAS_NO_GETS) +#if _LIBCPP_STD_VER <= 11 using ::gets _LIBCPP_USING_IF_EXISTS; #endif using ::scanf _LIBCPP_USING_IF_EXISTS; diff --git a/libcxx/include/regex b/libcxx/include/regex index b575a267583b..0761d9de54a9 100644 --- a/libcxx/include/regex +++ b/libcxx/include/regex @@ -1889,6 +1889,9 @@ void __r_anchor_multiline<_CharT>::__exec(__state& __s) const { if (__s.__current_ == __s.__last_ && !(__s.__flags_ & regex_constants::match_not_eol)) { __s.__do_ = __state::__accept_but_not_consume; __s.__node_ = this->first(); + } else if (__s.__current_ == __s.__first_ && !(__s.__flags_ & regex_constants::match_not_eol)) { + __s.__do_ = __state::__accept_but_not_consume; + __s.__node_ = this->first(); } else if (__multiline_ && std::__is_eol(*__s.__current_)) { __s.__do_ = __state::__accept_but_not_consume; __s.__node_ = this->first(); diff --git a/libcxx/include/tuple b/libcxx/include/tuple index aa22c320b1ec..0e5f0b4831b4 100644 --- a/libcxx/include/tuple +++ b/libcxx/include/tuple @@ -141,7 +141,7 @@ template <class... Tuples> tuple<CTypes...> tuple_cat(Tuples&&... tpls); // cons // [tuple.apply], calling a function with a tuple of arguments: template <class F, class Tuple> - constexpr decltype(auto) apply(F&& f, Tuple&& t); // C++17 + constexpr decltype(auto) apply(F&& f, Tuple&& t) noexcept(see below); // C++17 noexcept since C++23 template <class T, class Tuple> constexpr T make_from_tuple(Tuple&& t); // C++17 diff --git a/lld/ELF/Arch/AArch64.cpp b/lld/ELF/Arch/AArch64.cpp index 54b0a84e5213..71a1b1111e42 100644 --- a/lld/ELF/Arch/AArch64.cpp +++ b/lld/ELF/Arch/AArch64.cpp @@ -165,6 +165,8 @@ RelExpr AArch64::getRelExpr(RelType type, const Symbol &s, case R_AARCH64_ADR_GOT_PAGE: case R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21: return R_AARCH64_GOT_PAGE_PC; + case R_AARCH64_GOTPCREL32: + return R_GOT_PC; case R_AARCH64_NONE: return R_NONE; default: @@ -374,6 +376,7 @@ void AArch64::relocate(uint8_t *loc, const Relocation &rel, write32(loc, val); break; case R_AARCH64_PLT32: + case R_AARCH64_GOTPCREL32: checkInt(loc, val, 32, rel); write32(loc, val); break; diff --git a/lld/ELF/Arch/LoongArch.cpp b/lld/ELF/Arch/LoongArch.cpp index 996f9957a63c..ab2ec5b447d0 100644 --- a/lld/ELF/Arch/LoongArch.cpp +++ b/lld/ELF/Arch/LoongArch.cpp @@ -82,89 +82,33 @@ static uint64_t getLoongArchPage(uint64_t p) { static uint32_t lo12(uint32_t val) { return val & 0xfff; } // Calculate the adjusted page delta between dest and PC. -uint64_t elf::getLoongArchPageDelta(uint64_t dest, uint64_t pc) { - // Consider the large code model access pattern, of which the smaller code - // models' access patterns are a subset: - // - // pcalau12i U, %foo_hi20(sym) ; b in [-0x80000, 0x7ffff] - // addi.d T, zero, %foo_lo12(sym) ; a in [-0x800, 0x7ff] - // lu32i.d T, %foo64_lo20(sym) ; c in [-0x80000, 0x7ffff] - // lu52i.d T, T, %foo64_hi12(sym) ; d in [-0x800, 0x7ff] - // {ldx,stx,add}.* dest, U, T - // - // Let page(pc) = 0xRRR'QQQQQ'PPPPP'000 and dest = 0xZZZ'YYYYY'XXXXX'AAA, - // with RQ, P, ZY, X and A representing the respective bitfields as unsigned - // integers. We have: - // - // page(dest) = 0xZZZ'YYYYY'XXXXX'000 - // - page(pc) = 0xRRR'QQQQQ'PPPPP'000 - // ---------------------------------- - // 0xddd'ccccc'bbbbb'000 - // - // Now consider the above pattern's actual effects: - // - // page(pc) 0xRRR'QQQQQ'PPPPP'000 - // pcalau12i + 0xiii'iiiii'bbbbb'000 - // addi + 0xjjj'jjjjj'kkkkk'AAA - // lu32i.d & lu52i.d + 0xddd'ccccc'00000'000 - // -------------------------------------------------- - // dest = U + T - // = ((RQ<<32) + (P<<12) + i + (b<<12)) + (j + k + A + (cd<<32)) - // = (((RQ+cd)<<32) + i + j) + (((P+b)<<12) + k) + A - // = (ZY<<32) + (X<<12) + A - // - // ZY<<32 = (RQ<<32)+(cd<<32)+i+j, X<<12 = (P<<12)+(b<<12)+k - // cd<<32 = (ZY<<32)-(RQ<<32)-i-j, b<<12 = (X<<12)-(P<<12)-k - // - // where i and k are terms representing the effect of b's and A's sign - // extension respectively. - // - // i = signed b < 0 ? -0x10000'0000 : 0 - // k = signed A < 0 ? -0x1000 : 0 - // - // The j term is a bit complex: it represents the higher half of - // sign-extended bits from A that are effectively lost if i == 0 but k != 0, - // due to overwriting by lu32i.d & lu52i.d. - // - // j = signed A < 0 && signed b >= 0 ? 0x10000'0000 : 0 - // - // The actual effect of the instruction sequence before the final addition, - // i.e. our desired result value, is thus: - // - // result = (cd<<32) + (b<<12) - // = (ZY<<32)-(RQ<<32)-i-j + (X<<12)-(P<<12)-k - // = ((ZY<<32)+(X<<12)) - ((RQ<<32)+(P<<12)) - i - j - k - // = page(dest) - page(pc) - i - j - k - // - // when signed A >= 0 && signed b >= 0: - // - // i = j = k = 0 - // result = page(dest) - page(pc) - // - // when signed A >= 0 && signed b < 0: - // - // i = -0x10000'0000, j = k = 0 - // result = page(dest) - page(pc) + 0x10000'0000 - // - // when signed A < 0 && signed b >= 0: - // - // i = 0, j = 0x10000'0000, k = -0x1000 - // result = page(dest) - page(pc) - 0x10000'0000 + 0x1000 - // - // when signed A < 0 && signed b < 0: - // - // i = -0x10000'0000, j = 0, k = -0x1000 - // result = page(dest) - page(pc) + 0x1000 - uint64_t result = getLoongArchPage(dest) - getLoongArchPage(pc); - bool negativeA = lo12(dest) > 0x7ff; - bool negativeB = (result & 0x8000'0000) != 0; - - if (negativeA) - result += 0x1000; - if (negativeA && !negativeB) - result -= 0x10000'0000; - else if (!negativeA && negativeB) - result += 0x10000'0000; +uint64_t elf::getLoongArchPageDelta(uint64_t dest, uint64_t pc, RelType type) { + // Note that if the sequence being relocated is `pcalau12i + addi.d + lu32i.d + // + lu52i.d`, they must be adjancent so that we can infer the PC of + // `pcalau12i` when calculating the page delta for the other two instructions + // (lu32i.d and lu52i.d). Compensate all the sign-extensions is a bit + // complicated. Just use psABI recommended algorithm. + uint64_t pcalau12i_pc; + switch (type) { + case R_LARCH_PCALA64_LO20: + case R_LARCH_GOT64_PC_LO20: + case R_LARCH_TLS_IE64_PC_LO20: + pcalau12i_pc = pc - 8; + break; + case R_LARCH_PCALA64_HI12: + case R_LARCH_GOT64_PC_HI12: + case R_LARCH_TLS_IE64_PC_HI12: + pcalau12i_pc = pc - 12; + break; + default: + pcalau12i_pc = pc; + break; + } + uint64_t result = getLoongArchPage(dest) - getLoongArchPage(pcalau12i_pc); + if (dest & 0x800) + result += 0x1000 - 0x1'0000'0000; + if (result & 0x8000'0000) + result += 0x1'0000'0000; return result; } diff --git a/lld/ELF/Arch/RISCV.cpp b/lld/ELF/Arch/RISCV.cpp index 1d3d179e5d6f..8906de073735 100644 --- a/lld/ELF/Arch/RISCV.cpp +++ b/lld/ELF/Arch/RISCV.cpp @@ -290,6 +290,7 @@ RelExpr RISCV::getRelExpr(const RelType type, const Symbol &s, case R_RISCV_PLT32: return R_PLT_PC; case R_RISCV_GOT_HI20: + case R_RISCV_GOT32_PCREL: return R_GOT_PC; case R_RISCV_PCREL_LO12_I: case R_RISCV_PCREL_LO12_S: @@ -499,6 +500,8 @@ void RISCV::relocate(uint8_t *loc, const Relocation &rel, uint64_t val) const { case R_RISCV_SET32: case R_RISCV_32_PCREL: case R_RISCV_PLT32: + case R_RISCV_GOT32_PCREL: + checkInt(loc, val, 32, rel); write32le(loc, val); return; @@ -954,8 +957,8 @@ static void mergeArch(RISCVISAInfo::OrderedExtensionMap &mergedExts, } else { for (const auto &ext : info.getExtensions()) { if (auto it = mergedExts.find(ext.first); it != mergedExts.end()) { - if (std::tie(it->second.MajorVersion, it->second.MinorVersion) >= - std::tie(ext.second.MajorVersion, ext.second.MinorVersion)) + if (std::tie(it->second.Major, it->second.Minor) >= + std::tie(ext.second.Major, ext.second.Minor)) continue; } mergedExts[ext.first] = ext.second; diff --git a/lld/ELF/InputSection.cpp b/lld/ELF/InputSection.cpp index 53b496bd0842..586404643cc1 100644 --- a/lld/ELF/InputSection.cpp +++ b/lld/ELF/InputSection.cpp @@ -716,8 +716,8 @@ uint64_t InputSectionBase::getRelocTargetVA(const InputFile *file, RelType type, return sym.getGotVA() + a - p; case R_LOONGARCH_GOT_PAGE_PC: if (sym.hasFlag(NEEDS_TLSGD)) - return getLoongArchPageDelta(in.got->getGlobalDynAddr(sym) + a, p); - return getLoongArchPageDelta(sym.getGotVA() + a, p); + return getLoongArchPageDelta(in.got->getGlobalDynAddr(sym) + a, p, type); + return getLoongArchPageDelta(sym.getGotVA() + a, p, type); case R_MIPS_GOTREL: return sym.getVA(a) - in.mipsGot->getGp(file); case R_MIPS_GOT_GP: @@ -767,7 +767,7 @@ uint64_t InputSectionBase::getRelocTargetVA(const InputFile *file, RelType type, return 0; } case R_LOONGARCH_PAGE_PC: - return getLoongArchPageDelta(sym.getVA(a), p); + return getLoongArchPageDelta(sym.getVA(a), p, type); case R_PC: case R_ARM_PCA: { uint64_t dest; @@ -802,7 +802,7 @@ uint64_t InputSectionBase::getRelocTargetVA(const InputFile *file, RelType type, case R_PPC64_CALL_PLT: return sym.getPltVA() + a - p; case R_LOONGARCH_PLT_PAGE_PC: - return getLoongArchPageDelta(sym.getPltVA() + a, p); + return getLoongArchPageDelta(sym.getPltVA() + a, p, type); case R_PLT_GOTPLT: return sym.getPltVA() + a - in.gotPlt->getVA(); case R_PPC32_PLTREL: @@ -864,7 +864,7 @@ uint64_t InputSectionBase::getRelocTargetVA(const InputFile *file, RelType type, case R_TLSGD_PC: return in.got->getGlobalDynAddr(sym) + a - p; case R_LOONGARCH_TLSGD_PAGE_PC: - return getLoongArchPageDelta(in.got->getGlobalDynAddr(sym) + a, p); + return getLoongArchPageDelta(in.got->getGlobalDynAddr(sym) + a, p, type); case R_TLSLD_GOTPLT: return in.got->getVA() + in.got->getTlsIndexOff() + a - in.gotPlt->getVA(); case R_TLSLD_GOT: diff --git a/lld/ELF/Target.h b/lld/ELF/Target.h index af7aaff8a4c0..ab6b6b9c013b 100644 --- a/lld/ELF/Target.h +++ b/lld/ELF/Target.h @@ -228,7 +228,7 @@ void addPPC64SaveRestore(); uint64_t getPPC64TocBase(); uint64_t getAArch64Page(uint64_t expr); template <typename ELFT> void writeARMCmseImportLib(); -uint64_t getLoongArchPageDelta(uint64_t dest, uint64_t pc); +uint64_t getLoongArchPageDelta(uint64_t dest, uint64_t pc, RelType type); void riscvFinalizeRelax(int passes); void mergeRISCVAttributesSections(); void addArmInputSectionMappingSymbols(); diff --git a/lldb/include/lldb/Core/Module.h b/lldb/include/lldb/Core/Module.h index f4973cdda1ef..0188057247a6 100644 --- a/lldb/include/lldb/Core/Module.h +++ b/lldb/include/lldb/Core/Module.h @@ -337,6 +337,12 @@ public: const ModuleFunctionSearchOptions &options, SymbolContextList &sc_list); + /// Find functions by compiler context. + void FindFunctions(llvm::ArrayRef<CompilerContext> compiler_ctx, + lldb::FunctionNameType name_type_mask, + const ModuleFunctionSearchOptions &options, + SymbolContextList &sc_list); + /// Find functions by name. /// /// If the function is an inlined function, it will have a block, diff --git a/lldb/include/lldb/Symbol/Type.h b/lldb/include/lldb/Symbol/Type.h index 307be6c55e01..acd1a769f13c 100644 --- a/lldb/include/lldb/Symbol/Type.h +++ b/lldb/include/lldb/Symbol/Type.h @@ -247,6 +247,10 @@ public: /// match. void AddLanguage(lldb::LanguageType language); + /// Set the list of languages that should produce a match to only the ones + /// specified in \ref languages. + void SetLanguages(LanguageSet languages); + /// Check if the language matches any languages that have been added to this /// match object. /// diff --git a/lldb/include/lldb/Utility/StreamString.h b/lldb/include/lldb/Utility/StreamString.h index 4c568acdcc6f..3d675caf8f3f 100644 --- a/lldb/include/lldb/Utility/StreamString.h +++ b/lldb/include/lldb/Utility/StreamString.h @@ -22,7 +22,7 @@ namespace lldb_private { class StreamString : public Stream { public: - StreamString(); + StreamString(bool colors = false); StreamString(uint32_t flags, uint32_t addr_size, lldb::ByteOrder byte_order); diff --git a/lldb/source/Core/Module.cpp b/lldb/source/Core/Module.cpp index c0574b724ace..331cf3246641 100644 --- a/lldb/source/Core/Module.cpp +++ b/lldb/source/Core/Module.cpp @@ -855,6 +855,23 @@ void Module::FindFunctions(ConstString name, } } +void Module::FindFunctions(llvm::ArrayRef<CompilerContext> compiler_ctx, + FunctionNameType name_type_mask, + const ModuleFunctionSearchOptions &options, + SymbolContextList &sc_list) { + if (compiler_ctx.empty() || + compiler_ctx.back().kind != CompilerContextKind::Function) + return; + ConstString name = compiler_ctx.back().name; + SymbolContextList unfiltered; + FindFunctions(name, CompilerDeclContext(), name_type_mask, options, + unfiltered); + // Filter by context. + for (auto &sc : unfiltered) + if (sc.function && compiler_ctx.equals(sc.function->GetCompilerContext())) + sc_list.Append(sc); +} + void Module::FindFunctions(const RegularExpression ®ex, const ModuleFunctionSearchOptions &options, SymbolContextList &sc_list) { diff --git a/lldb/source/Plugins/ExpressionParser/Clang/ClangASTImporter.cpp b/lldb/source/Plugins/ExpressionParser/Clang/ClangASTImporter.cpp index 5d109feb3d39..62a30c14912b 100644 --- a/lldb/source/Plugins/ExpressionParser/Clang/ClangASTImporter.cpp +++ b/lldb/source/Plugins/ExpressionParser/Clang/ClangASTImporter.cpp @@ -943,44 +943,41 @@ void ClangASTImporter::ASTImporterDelegate::ImportDefinitionTo( // the class was originally sourced from symbols. if (ObjCInterfaceDecl *to_objc_interface = dyn_cast<ObjCInterfaceDecl>(to)) { - do { - ObjCInterfaceDecl *to_superclass = to_objc_interface->getSuperClass(); + ObjCInterfaceDecl *to_superclass = to_objc_interface->getSuperClass(); - if (to_superclass) - break; // we're not going to override it if it's set + if (to_superclass) + return; // we're not going to override it if it's set - ObjCInterfaceDecl *from_objc_interface = - dyn_cast<ObjCInterfaceDecl>(from); + ObjCInterfaceDecl *from_objc_interface = dyn_cast<ObjCInterfaceDecl>(from); - if (!from_objc_interface) - break; + if (!from_objc_interface) + return; - ObjCInterfaceDecl *from_superclass = from_objc_interface->getSuperClass(); + ObjCInterfaceDecl *from_superclass = from_objc_interface->getSuperClass(); - if (!from_superclass) - break; + if (!from_superclass) + return; - llvm::Expected<Decl *> imported_from_superclass_decl = - Import(from_superclass); + llvm::Expected<Decl *> imported_from_superclass_decl = + Import(from_superclass); - if (!imported_from_superclass_decl) { - LLDB_LOG_ERROR(log, imported_from_superclass_decl.takeError(), - "Couldn't import decl: {0}"); - break; - } + if (!imported_from_superclass_decl) { + LLDB_LOG_ERROR(log, imported_from_superclass_decl.takeError(), + "Couldn't import decl: {0}"); + return; + } - ObjCInterfaceDecl *imported_from_superclass = - dyn_cast<ObjCInterfaceDecl>(*imported_from_superclass_decl); + ObjCInterfaceDecl *imported_from_superclass = + dyn_cast<ObjCInterfaceDecl>(*imported_from_superclass_decl); - if (!imported_from_superclass) - break; + if (!imported_from_superclass) + return; - if (!to_objc_interface->hasDefinition()) - to_objc_interface->startDefinition(); + if (!to_objc_interface->hasDefinition()) + to_objc_interface->startDefinition(); - to_objc_interface->setSuperClass(m_source_ctx->getTrivialTypeSourceInfo( - m_source_ctx->getObjCInterfaceType(imported_from_superclass))); - } while (false); + to_objc_interface->setSuperClass(m_source_ctx->getTrivialTypeSourceInfo( + m_source_ctx->getObjCInterfaceType(imported_from_superclass))); } } diff --git a/lldb/source/Plugins/Language/CPlusPlus/CPlusPlusLanguage.cpp b/lldb/source/Plugins/Language/CPlusPlus/CPlusPlusLanguage.cpp index 586cc08a6f12..c6937ebca319 100644 --- a/lldb/source/Plugins/Language/CPlusPlus/CPlusPlusLanguage.cpp +++ b/lldb/source/Plugins/Language/CPlusPlus/CPlusPlusLanguage.cpp @@ -1031,6 +1031,41 @@ static void LoadLibCxxFormatters(lldb::TypeCategoryImplSP cpp_category_sp) { "^std::__[[:alnum:]]+::chrono::seconds", eFormatterMatchRegex, TypeSummaryImplSP(new StringSummaryFormat( eTypeOptionHideChildren | eTypeOptionHideValue, "${var.__rep_} s"))); + + // Chrono calendar types + + cpp_category_sp->AddTypeSummary( + "^std::__[[:alnum:]]+::chrono::day$", eFormatterMatchRegex, + TypeSummaryImplSP(new StringSummaryFormat(eTypeOptionHideChildren | + eTypeOptionHideValue, + "day=${var.__d_%u}"))); + AddCXXSummary(cpp_category_sp, + lldb_private::formatters::LibcxxChronoMonthSummaryProvider, + "libc++ std::chrono::month summary provider", + "^std::__[[:alnum:]]+::chrono::month$", + eTypeOptionHideChildren | eTypeOptionHideValue, true); + + cpp_category_sp->AddTypeSummary( + "^std::__[[:alnum:]]+::chrono::year$", eFormatterMatchRegex, + TypeSummaryImplSP(new StringSummaryFormat( + eTypeOptionHideChildren | eTypeOptionHideValue, "year=${var.__y_}"))); + + cpp_category_sp->AddTypeSummary( + "^std::__[[:alnum:]]+::chrono::month_day$", eFormatterMatchRegex, + TypeSummaryImplSP(new StringSummaryFormat(eTypeOptionHideChildren | + eTypeOptionHideValue, + "${var.__m_} ${var.__d_}"))); + cpp_category_sp->AddTypeSummary( + "^std::__[[:alnum:]]+::chrono::month_day_last$", eFormatterMatchRegex, + TypeSummaryImplSP(new StringSummaryFormat(eTypeOptionHideChildren | + eTypeOptionHideValue, + "${var.__m_} day=last"))); + AddCXXSummary( + cpp_category_sp, + lldb_private::formatters::LibcxxChronoYearMonthDaySummaryProvider, + "libc++ std::chrono::year_month_day summary provider", + "^std::__[[:alnum:]]+::chrono::year_month_day$", + eTypeOptionHideChildren | eTypeOptionHideValue, true); } static void LoadLibStdcppFormatters(lldb::TypeCategoryImplSP cpp_category_sp) { diff --git a/lldb/source/Plugins/Language/CPlusPlus/LibCxx.cpp b/lldb/source/Plugins/Language/CPlusPlus/LibCxx.cpp index cae17ef992b2..f8be4f785dc4 100644 --- a/lldb/source/Plugins/Language/CPlusPlus/LibCxx.cpp +++ b/lldb/source/Plugins/Language/CPlusPlus/LibCxx.cpp @@ -1084,3 +1084,60 @@ bool lldb_private::formatters::LibcxxWStringViewSummaryProvider( return ::LibcxxWStringSummaryProvider(valobj, stream, summary_options, dataobj, size); } + +bool lldb_private::formatters::LibcxxChronoMonthSummaryProvider( + ValueObject &valobj, Stream &stream, const TypeSummaryOptions &options) { + // FIXME: These are the names used in the C++20 ostream operator. Since LLVM + // uses C++17 it's not possible to use the ostream operator directly. + static const std::array<std::string_view, 12> months = { + "January", "February", "March", "April", "May", "June", + "July", "August", "September", "October", "November", "December"}; + + ValueObjectSP ptr_sp = valobj.GetChildMemberWithName("__m_"); + if (!ptr_sp) + return false; + + const unsigned month = ptr_sp->GetValueAsUnsigned(0); + if (month >= 1 && month <= 12) + stream << "month=" << months[month - 1]; + else + stream.Printf("month=%u", month); + + return true; +} + +bool lldb_private::formatters::LibcxxChronoYearMonthDaySummaryProvider( + ValueObject &valobj, Stream &stream, const TypeSummaryOptions &options) { + ValueObjectSP ptr_sp = valobj.GetChildMemberWithName("__y_"); + if (!ptr_sp) + return false; + ptr_sp = ptr_sp->GetChildMemberWithName("__y_"); + if (!ptr_sp) + return false; + int year = ptr_sp->GetValueAsSigned(0); + + ptr_sp = valobj.GetChildMemberWithName("__m_"); + if (!ptr_sp) + return false; + ptr_sp = ptr_sp->GetChildMemberWithName("__m_"); + if (!ptr_sp) + return false; + const unsigned month = ptr_sp->GetValueAsUnsigned(0); + + ptr_sp = valobj.GetChildMemberWithName("__d_"); + if (!ptr_sp) + return false; + ptr_sp = ptr_sp->GetChildMemberWithName("__d_"); + if (!ptr_sp) + return false; + const unsigned day = ptr_sp->GetValueAsUnsigned(0); + + stream << "date="; + if (year < 0) { + stream << '-'; + year = -year; + } + stream.Printf("%04d-%02u-%02u", year, month, day); + + return true; +} diff --git a/lldb/source/Plugins/Language/CPlusPlus/LibCxx.h b/lldb/source/Plugins/Language/CPlusPlus/LibCxx.h index f65801e2cb1b..c252ae382dd9 100644 --- a/lldb/source/Plugins/Language/CPlusPlus/LibCxx.h +++ b/lldb/source/Plugins/Language/CPlusPlus/LibCxx.h @@ -261,6 +261,14 @@ SyntheticChildrenFrontEnd * LibcxxStdRangesRefViewSyntheticFrontEndCreator(CXXSyntheticChildren *, lldb::ValueObjectSP); +bool LibcxxChronoMonthSummaryProvider( + ValueObject &valobj, Stream &stream, + const TypeSummaryOptions &options); // libc++ std::chrono::month + +bool LibcxxChronoYearMonthDaySummaryProvider( + ValueObject &valobj, Stream &stream, + const TypeSummaryOptions &options); // libc++ std::chrono::year_month_day + } // namespace formatters } // namespace lldb_private diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDIE.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDIE.cpp index bed68f45426f..d4446befd83b 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDIE.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDIE.cpp @@ -373,44 +373,51 @@ std::vector<DWARFDIE> DWARFDIE::GetDeclContextDIEs() const { return result; } -std::vector<lldb_private::CompilerContext> DWARFDIE::GetDeclContext() const { +static std::vector<lldb_private::CompilerContext> +GetDeclContextImpl(llvm::SmallSet<lldb::user_id_t, 4> &seen, DWARFDIE die) { std::vector<lldb_private::CompilerContext> context; - const dw_tag_t tag = Tag(); - if (tag == DW_TAG_compile_unit || tag == DW_TAG_partial_unit) + // Stop if we hit a cycle. + if (!die || !seen.insert(die.GetID()).second) return context; - DWARFDIE parent = GetParent(); - if (parent) - context = parent.GetDeclContext(); + + // Handle outline member function DIEs by following the specification. + if (DWARFDIE spec = die.GetReferencedDIE(DW_AT_specification)) + return GetDeclContextImpl(seen, spec); + + // Get the parent context chain. + context = GetDeclContextImpl(seen, die.GetParent()); + + // Add this DIE's contribution at the end of the chain. auto push_ctx = [&](CompilerContextKind kind, llvm::StringRef name) { context.push_back({kind, ConstString(name)}); }; - switch (tag) { + switch (die.Tag()) { case DW_TAG_module: - push_ctx(CompilerContextKind::Module, GetName()); + push_ctx(CompilerContextKind::Module, die.GetName()); break; case DW_TAG_namespace: - push_ctx(CompilerContextKind::Namespace, GetName()); + push_ctx(CompilerContextKind::Namespace, die.GetName()); break; case DW_TAG_structure_type: - push_ctx(CompilerContextKind::Struct, GetName()); + push_ctx(CompilerContextKind::Struct, die.GetName()); break; case DW_TAG_union_type: - push_ctx(CompilerContextKind::Union, GetName()); + push_ctx(CompilerContextKind::Union, die.GetName()); break; case DW_TAG_class_type: - push_ctx(CompilerContextKind::Class, GetName()); + push_ctx(CompilerContextKind::Class, die.GetName()); break; case DW_TAG_enumeration_type: - push_ctx(CompilerContextKind::Enum, GetName()); + push_ctx(CompilerContextKind::Enum, die.GetName()); break; case DW_TAG_subprogram: - push_ctx(CompilerContextKind::Function, GetPubname()); + push_ctx(CompilerContextKind::Function, die.GetName()); break; case DW_TAG_variable: - push_ctx(CompilerContextKind::Variable, GetPubname()); + push_ctx(CompilerContextKind::Variable, die.GetPubname()); break; case DW_TAG_typedef: - push_ctx(CompilerContextKind::Typedef, GetName()); + push_ctx(CompilerContextKind::Typedef, die.GetName()); break; default: break; @@ -418,6 +425,11 @@ std::vector<lldb_private::CompilerContext> DWARFDIE::GetDeclContext() const { return context; } +std::vector<lldb_private::CompilerContext> DWARFDIE::GetDeclContext() const { + llvm::SmallSet<lldb::user_id_t, 4> seen; + return GetDeclContextImpl(seen, *this); +} + std::vector<lldb_private::CompilerContext> DWARFDIE::GetTypeLookupContext() const { std::vector<lldb_private::CompilerContext> context; diff --git a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp index 737da7798b82..1a16b70f42fe 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp @@ -2574,11 +2574,12 @@ void SymbolFileDWARF::FindFunctions(const Module::LookupInfo &lookup_info, Module::LookupInfo no_tp_lookup_info(lookup_info); no_tp_lookup_info.SetLookupName(ConstString(name_no_template_params)); - m_index->GetFunctions(no_tp_lookup_info, *this, parent_decl_ctx, [&](DWARFDIE die) { - if (resolved_dies.insert(die.GetDIE()).second) - ResolveFunction(die, include_inlines, sc_list); - return true; - }); + m_index->GetFunctions(no_tp_lookup_info, *this, parent_decl_ctx, + [&](DWARFDIE die) { + if (resolved_dies.insert(die.GetDIE()).second) + ResolveFunction(die, include_inlines, sc_list); + return true; + }); } } diff --git a/lldb/source/Plugins/Trace/intel-pt/CommandObjectTraceStartIntelPT.cpp b/lldb/source/Plugins/Trace/intel-pt/CommandObjectTraceStartIntelPT.cpp index d4f7dc354e9f..44224229e625 100644 --- a/lldb/source/Plugins/Trace/intel-pt/CommandObjectTraceStartIntelPT.cpp +++ b/lldb/source/Plugins/Trace/intel-pt/CommandObjectTraceStartIntelPT.cpp @@ -158,7 +158,7 @@ CommandObjectProcessTraceStartIntelPT::CommandOptions::GetDefinitions() { return llvm::ArrayRef(g_process_trace_start_intel_pt_options); } -bool CommandObjectProcessTraceStartIntelPT::DoExecute( +void CommandObjectProcessTraceStartIntelPT::DoExecute( Args &command, CommandReturnObject &result) { if (Error err = m_trace.Start( m_options.m_ipt_trace_size, m_options.m_process_buffer_size_limit, @@ -167,8 +167,6 @@ bool CommandObjectProcessTraceStartIntelPT::DoExecute( result.SetError(Status(std::move(err))); else result.SetStatus(eReturnStatusSuccessFinishResult); - - return result.Succeeded(); } std::optional<uint64_t> diff --git a/lldb/source/Plugins/Trace/intel-pt/DecodedThread.cpp b/lldb/source/Plugins/Trace/intel-pt/DecodedThread.cpp index 17f8f51bdf0e..9c075398d547 100644 --- a/lldb/source/Plugins/Trace/intel-pt/DecodedThread.cpp +++ b/lldb/source/Plugins/Trace/intel-pt/DecodedThread.cpp @@ -85,11 +85,11 @@ double DecodedThread::NanosecondsRange::GetInterpolatedTime( return interpolate(next_range->nanos); } -uint64_t DecodedThread::GetItemsCount() const { return m_item_kinds.size(); } +uint64_t DecodedThread::GetItemsCount() const { return m_item_data.size(); } lldb::addr_t DecodedThread::GetInstructionLoadAddress(uint64_t item_index) const { - return m_item_data[item_index].load_address; + return std::get<lldb::addr_t>(m_item_data[item_index]); } lldb::addr_t @@ -99,14 +99,16 @@ DecodedThread::GetSyncPointOffsetByIndex(uint64_t item_index) const { ThreadSP DecodedThread::GetThread() { return m_thread_sp; } +template <typename Data> DecodedThread::TraceItemStorage & -DecodedThread::CreateNewTraceItem(lldb::TraceItemKind kind) { - m_item_kinds.push_back(kind); - m_item_data.emplace_back(); +DecodedThread::CreateNewTraceItem(lldb::TraceItemKind kind, Data &&data) { + m_item_data.emplace_back(data); + if (m_last_tsc) (*m_last_tsc)->second.items_count++; if (m_last_nanoseconds) (*m_last_nanoseconds)->second.items_count++; + return m_item_data.back(); } @@ -176,27 +178,27 @@ uint64_t DecodedThread::GetTotalInstructionCount() const { } void DecodedThread::AppendEvent(lldb::TraceEvent event) { - CreateNewTraceItem(lldb::eTraceItemKindEvent).event = event; + CreateNewTraceItem(lldb::eTraceItemKindEvent, event); m_events_stats.RecordEvent(event); } void DecodedThread::AppendInstruction(const pt_insn &insn) { - CreateNewTraceItem(lldb::eTraceItemKindInstruction).load_address = insn.ip; + CreateNewTraceItem(lldb::eTraceItemKindInstruction, insn.ip); m_insn_count++; } void DecodedThread::AppendError(const IntelPTError &error) { - CreateNewTraceItem(lldb::eTraceItemKindError).error = error.message(); + CreateNewTraceItem(lldb::eTraceItemKindError, error.message()); m_error_stats.RecordError(/*fatal=*/false); } void DecodedThread::AppendCustomError(StringRef err, bool fatal) { - CreateNewTraceItem(lldb::eTraceItemKindError).error = err.str(); + CreateNewTraceItem(lldb::eTraceItemKindError, err.str()); m_error_stats.RecordError(fatal); } lldb::TraceEvent DecodedThread::GetEventByIndex(int item_index) const { - return m_item_data[item_index].event; + return std::get<lldb::TraceEvent>(m_item_data[item_index]); } const DecodedThread::EventsStats &DecodedThread::GetEventsStats() const { @@ -233,13 +235,18 @@ const DecodedThread::ErrorStats &DecodedThread::GetErrorStats() const { lldb::TraceItemKind DecodedThread::GetItemKindByIndex(uint64_t item_index) const { - return static_cast<lldb::TraceItemKind>(m_item_kinds[item_index]); + return std::visit( + llvm::makeVisitor( + [](const std::string &) { return lldb::eTraceItemKindError; }, + [](lldb::TraceEvent) { return lldb::eTraceItemKindEvent; }, + [](lldb::addr_t) { return lldb::eTraceItemKindInstruction; }), + m_item_data[item_index]); } llvm::StringRef DecodedThread::GetErrorByIndex(uint64_t item_index) const { if (item_index >= m_item_data.size()) return llvm::StringRef(); - return m_item_data[item_index].error; + return std::get<std::string>(m_item_data[item_index]); } DecodedThread::DecodedThread( @@ -249,7 +256,6 @@ DecodedThread::DecodedThread( size_t DecodedThread::CalculateApproximateMemoryUsage() const { return sizeof(TraceItemStorage) * m_item_data.size() + - sizeof(uint8_t) * m_item_kinds.size() + (sizeof(uint64_t) + sizeof(TSC)) * m_tscs.size() + (sizeof(uint64_t) + sizeof(uint64_t)) * m_nanoseconds.size() + (sizeof(uint64_t) + sizeof(lldb::cpu_id_t)) * m_cpus.size(); diff --git a/lldb/source/Plugins/Trace/intel-pt/DecodedThread.h b/lldb/source/Plugins/Trace/intel-pt/DecodedThread.h index 5745cdb67ab6..a48c55cc76df 100644 --- a/lldb/source/Plugins/Trace/intel-pt/DecodedThread.h +++ b/lldb/source/Plugins/Trace/intel-pt/DecodedThread.h @@ -14,9 +14,10 @@ #include "lldb/Utility/TraceIntelPTGDBRemotePackets.h" #include "llvm/Support/Errc.h" #include "llvm/Support/Error.h" +#include <deque> #include <optional> #include <utility> -#include <vector> +#include <variant> namespace lldb_private { namespace trace_intel_pt { @@ -265,30 +266,19 @@ private: /// to update \a CalculateApproximateMemoryUsage() accordingly. lldb::ThreadSP m_thread_sp; - /// We use a union to optimize the memory usage for the different kinds of - /// trace items. - union TraceItemStorage { - /// The load addresses of this item if it's an instruction. - uint64_t load_address; - - /// The event kind of this item if it's an event - lldb::TraceEvent event; - - /// The string message of this item if it's an error - std::string error; - }; + using TraceItemStorage = + std::variant<std::string, lldb::TraceEvent, lldb::addr_t>; /// Create a new trace item. /// /// \return /// The index of the new item. - DecodedThread::TraceItemStorage &CreateNewTraceItem(lldb::TraceItemKind kind); + template <typename Data> + DecodedThread::TraceItemStorage &CreateNewTraceItem(lldb::TraceItemKind kind, + Data &&data); /// Most of the trace data is stored here. - std::vector<TraceItemStorage> m_item_data; - /// The TraceItemKind for each trace item encoded as uint8_t. We don't include - /// it in TraceItemStorage to avoid padding. - std::vector<uint8_t> m_item_kinds; + std::deque<TraceItemStorage> m_item_data; /// This map contains the TSCs of the decoded trace items. It maps /// `item index -> TSC`, where `item index` is the first index diff --git a/lldb/source/Plugins/Trace/intel-pt/LibiptDecoder.cpp b/lldb/source/Plugins/Trace/intel-pt/LibiptDecoder.cpp index cdf81954eee9..f8241ef6a793 100644 --- a/lldb/source/Plugins/Trace/intel-pt/LibiptDecoder.cpp +++ b/lldb/source/Plugins/Trace/intel-pt/LibiptDecoder.cpp @@ -572,7 +572,7 @@ Error lldb_private::trace_intel_pt::DecodeSingleTraceForThread( Expected<PSBBlockDecoder> decoder = PSBBlockDecoder::Create( trace_intel_pt, block, buffer.slice(block.psb_offset, block.size), *decoded_thread.GetThread()->GetProcess(), - i + 1 < blocks->size() ? blocks->at(i + 1).starting_ip : None, + i + 1 < blocks->size() ? blocks->at(i + 1).starting_ip : std::nullopt, decoded_thread, std::nullopt); if (!decoder) return decoder.takeError(); @@ -640,7 +640,7 @@ Error lldb_private::trace_intel_pt::DecodeSystemWideTraceForThread( *decoded_thread.GetThread()->GetProcess(), j + 1 < execution.psb_blocks.size() ? execution.psb_blocks[j + 1].starting_ip - : None, + : std::nullopt, decoded_thread, execution.thread_execution.GetEndTSC()); if (!decoder) return decoder.takeError(); diff --git a/lldb/source/Plugins/Trace/intel-pt/TraceCursorIntelPT.cpp b/lldb/source/Plugins/Trace/intel-pt/TraceCursorIntelPT.cpp index 66d342196cf1..dda6cd74343f 100644 --- a/lldb/source/Plugins/Trace/intel-pt/TraceCursorIntelPT.cpp +++ b/lldb/source/Plugins/Trace/intel-pt/TraceCursorIntelPT.cpp @@ -35,7 +35,7 @@ void TraceCursorIntelPT::Next() { void TraceCursorIntelPT::ClearTimingRangesIfInvalid() { if (m_tsc_range_calculated) { if (!m_tsc_range || m_pos < 0 || !m_tsc_range->InRange(m_pos)) { - m_tsc_range = None; + m_tsc_range = std::nullopt; m_tsc_range_calculated = false; } } @@ -43,7 +43,7 @@ void TraceCursorIntelPT::ClearTimingRangesIfInvalid() { if (m_nanoseconds_range_calculated) { if (!m_nanoseconds_range || m_pos < 0 || !m_nanoseconds_range->InRange(m_pos)) { - m_nanoseconds_range = None; + m_nanoseconds_range = std::nullopt; m_nanoseconds_range_calculated = false; } } diff --git a/lldb/source/Plugins/Trace/intel-pt/TraceIntelPTBundleLoader.cpp b/lldb/source/Plugins/Trace/intel-pt/TraceIntelPTBundleLoader.cpp index bd9cca675f2d..1a9f6fe30509 100644 --- a/lldb/source/Plugins/Trace/intel-pt/TraceIntelPTBundleLoader.cpp +++ b/lldb/source/Plugins/Trace/intel-pt/TraceIntelPTBundleLoader.cpp @@ -15,6 +15,7 @@ #include "lldb/Core/Debugger.h" #include "lldb/Core/Module.h" #include "lldb/Target/Process.h" +#include "lldb/Target/ProcessTrace.h" #include "lldb/Target/Target.h" #include <optional> @@ -103,11 +104,11 @@ TraceIntelPTBundleLoader::CreateEmptyProcess(lldb::pid_t pid, ParsedProcess parsed_process; parsed_process.target_sp = target_sp; - // This should instead try to directly create an instance of ProcessTrace. - // ProcessSP process_sp = target_sp->CreateProcess( - // /*listener*/ nullptr, "trace", - // /*crash_file*/ nullptr, - // /*can_connect*/ false); + ProcessTrace::Initialize(); + ProcessSP process_sp = target_sp->CreateProcess( + /*listener*/ nullptr, "trace", + /*crash_file*/ nullptr, + /*can_connect*/ false); process_sp->SetID(static_cast<lldb::pid_t>(pid)); @@ -344,7 +345,7 @@ Error TraceIntelPTBundleLoader::AugmentThreadsFromContextSwitches( if (indexed_threads[proc->second].count(tid)) return; indexed_threads[proc->second].insert(tid); - proc->second->threads.push_back({tid, /*ipt_trace=*/None}); + proc->second->threads.push_back({tid, /*ipt_trace=*/std::nullopt}); }; for (const JSONCpu &cpu : *bundle_description.cpus) { diff --git a/lldb/source/Symbol/Type.cpp b/lldb/source/Symbol/Type.cpp index 293fe1b78f4a..6069d066eaf6 100644 --- a/lldb/source/Symbol/Type.cpp +++ b/lldb/source/Symbol/Type.cpp @@ -145,6 +145,10 @@ void TypeQuery::AddLanguage(LanguageType language) { m_languages->Insert(language); } +void TypeQuery::SetLanguages(LanguageSet languages) { + m_languages = std::move(languages); +} + bool TypeQuery::ContextMatches( llvm::ArrayRef<CompilerContext> context_chain) const { if (GetExactMatch() || context_chain.size() == m_context.size()) diff --git a/lldb/source/Target/ProcessTrace.cpp b/lldb/source/Target/ProcessTrace.cpp index 6e5ef6a379f9..3a41f257627c 100644 --- a/lldb/source/Target/ProcessTrace.cpp +++ b/lldb/source/Target/ProcessTrace.cpp @@ -20,6 +20,8 @@ using namespace lldb; using namespace lldb_private; +LLDB_PLUGIN_DEFINE(ProcessTrace) + llvm::StringRef ProcessTrace::GetPluginDescriptionStatic() { return "Trace process plug-in."; } diff --git a/lldb/source/Utility/StreamString.cpp b/lldb/source/Utility/StreamString.cpp index 745a85b75765..0d35ccbdbbd0 100644 --- a/lldb/source/Utility/StreamString.cpp +++ b/lldb/source/Utility/StreamString.cpp @@ -11,7 +11,7 @@ using namespace lldb; using namespace lldb_private; -StreamString::StreamString() : Stream(0, 4, eByteOrderBig) {} +StreamString::StreamString(bool colors) : Stream(0, 4, eByteOrderBig, colors) {} StreamString::StreamString(uint32_t flags, uint32_t addr_size, ByteOrder byte_order) diff --git a/llvm/include/llvm/ADT/STLExtras.h b/llvm/include/llvm/ADT/STLExtras.h index 18bc4d108b15..a136eeb0ff1b 100644 --- a/llvm/include/llvm/ADT/STLExtras.h +++ b/llvm/include/llvm/ADT/STLExtras.h @@ -1290,18 +1290,6 @@ public: return (*this)[size() - 1]; } - /// Compare this range with another. - template <typename OtherT> - friend bool operator==(const indexed_accessor_range_base &lhs, - const OtherT &rhs) { - return std::equal(lhs.begin(), lhs.end(), rhs.begin(), rhs.end()); - } - template <typename OtherT> - friend bool operator!=(const indexed_accessor_range_base &lhs, - const OtherT &rhs) { - return !(lhs == rhs); - } - /// Return the size of this range. size_t size() const { return count; } @@ -1364,6 +1352,23 @@ protected: /// The size from the owning range. ptrdiff_t count; }; +/// Compare this range with another. +/// FIXME: Make me a member function instead of friend when it works in C++20. +template <typename OtherT, typename DerivedT, typename BaseT, typename T, + typename PointerT, typename ReferenceT> +bool operator==(const indexed_accessor_range_base<DerivedT, BaseT, T, PointerT, + ReferenceT> &lhs, + const OtherT &rhs) { + return std::equal(lhs.begin(), lhs.end(), rhs.begin(), rhs.end()); +} + +template <typename OtherT, typename DerivedT, typename BaseT, typename T, + typename PointerT, typename ReferenceT> +bool operator!=(const indexed_accessor_range_base<DerivedT, BaseT, T, PointerT, + ReferenceT> &lhs, + const OtherT &rhs) { + return !(lhs == rhs); +} } // end namespace detail /// This class provides an implementation of a range of diff --git a/llvm/include/llvm/ADT/StringRef.h b/llvm/include/llvm/ADT/StringRef.h index d892333de391..1c6c96678b5d 100644 --- a/llvm/include/llvm/ADT/StringRef.h +++ b/llvm/include/llvm/ADT/StringRef.h @@ -128,7 +128,7 @@ namespace llvm { /// data - Get a pointer to the start of the string (which may not be null /// terminated). - [[nodiscard]] const char *data() const { return Data; } + [[nodiscard]] constexpr const char *data() const { return Data; } /// empty - Check if the string is empty. [[nodiscard]] constexpr bool empty() const { return Length == 0; } @@ -245,7 +245,7 @@ namespace llvm { /// @name Type Conversions /// @{ - operator std::string_view() const { + constexpr operator std::string_view() const { return std::string_view(data(), size()); } diff --git a/llvm/include/llvm/Analysis/VecFuncs.def b/llvm/include/llvm/Analysis/VecFuncs.def index ee9207bb4f7d..b22bdd555cd4 100644 --- a/llvm/include/llvm/Analysis/VecFuncs.def +++ b/llvm/include/llvm/Analysis/VecFuncs.def @@ -470,123 +470,125 @@ TLI_DEFINE_VECFUNC("__exp2f_finite", "__svml_exp2f16", FIXED(16), "_ZGV_LLVM_N16 #elif defined(TLI_DEFINE_SLEEFGNUABI_VF2_VECFUNCS) -TLI_DEFINE_VECFUNC( "acos", "_ZGVnN2v_acos", FIXED(2), "_ZGV_LLVM_N2v") +TLI_DEFINE_VECFUNC("acos", "_ZGVnN2v_acos", FIXED(2), "_ZGV_LLVM_N2v") -TLI_DEFINE_VECFUNC( "asin", "_ZGVnN2v_asin", FIXED(2), "_ZGV_LLVM_N2v") +TLI_DEFINE_VECFUNC("asin", "_ZGVnN2v_asin", FIXED(2), "_ZGV_LLVM_N2v") -TLI_DEFINE_VECFUNC( "atan", "_ZGVnN2v_atan", FIXED(2), "_ZGV_LLVM_N2v") +TLI_DEFINE_VECFUNC("atan", "_ZGVnN2v_atan", FIXED(2), "_ZGV_LLVM_N2v") -TLI_DEFINE_VECFUNC( "atan2", "_ZGVnN2vv_atan2", FIXED(2), "_ZGV_LLVM_N2vv") +TLI_DEFINE_VECFUNC("atan2", "_ZGVnN2vv_atan2", FIXED(2), "_ZGV_LLVM_N2vv") -TLI_DEFINE_VECFUNC( "atanh", "_ZGVnN2v_atanh", FIXED(2), "_ZGV_LLVM_N2v") +TLI_DEFINE_VECFUNC("atanh", "_ZGVnN2v_atanh", FIXED(2), "_ZGV_LLVM_N2v") -TLI_DEFINE_VECFUNC( "cos", "_ZGVnN2v_cos", FIXED(2), "_ZGV_LLVM_N2v") -TLI_DEFINE_VECFUNC( "llvm.cos.f64", "_ZGVnN2v_cos", FIXED(2), "_ZGV_LLVM_N2v") +TLI_DEFINE_VECFUNC("cos", "_ZGVnN2v_cos", FIXED(2), "_ZGV_LLVM_N2v") +TLI_DEFINE_VECFUNC("llvm.cos.f64", "_ZGVnN2v_cos", FIXED(2), "_ZGV_LLVM_N2v") -TLI_DEFINE_VECFUNC( "cosh", "_ZGVnN2v_cosh", FIXED(2), "_ZGV_LLVM_N2v") +TLI_DEFINE_VECFUNC("cosh", "_ZGVnN2v_cosh", FIXED(2), "_ZGV_LLVM_N2v") -TLI_DEFINE_VECFUNC( "exp", "_ZGVnN2v_exp", FIXED(2), "_ZGV_LLVM_N2v") -TLI_DEFINE_VECFUNC( "llvm.exp.f64", "_ZGVnN2v_exp", FIXED(2), "_ZGV_LLVM_N2v") +TLI_DEFINE_VECFUNC("exp", "_ZGVnN2v_exp", FIXED(2), "_ZGV_LLVM_N2v") +TLI_DEFINE_VECFUNC("llvm.exp.f64", "_ZGVnN2v_exp", FIXED(2), "_ZGV_LLVM_N2v") -TLI_DEFINE_VECFUNC( "exp2", "_ZGVnN2v_exp2", FIXED(2), "_ZGV_LLVM_N2v") -TLI_DEFINE_VECFUNC( "llvm.exp2.f64", "_ZGVnN2v_exp2", FIXED(2), "_ZGV_LLVM_N2v") +TLI_DEFINE_VECFUNC("exp10", "_ZGVnN2v_exp10", FIXED(2), "_ZGV_LLVM_N2v") +TLI_DEFINE_VECFUNC("llvm.exp10.f64", "_ZGVnN2v_exp10", FIXED(2), "_ZGV_LLVM_N2v") + +TLI_DEFINE_VECFUNC("exp2", "_ZGVnN2v_exp2", FIXED(2), "_ZGV_LLVM_N2v") +TLI_DEFINE_VECFUNC("llvm.exp2.f64", "_ZGVnN2v_exp2", FIXED(2), "_ZGV_LLVM_N2v") -TLI_DEFINE_VECFUNC( "exp10", "_ZGVnN2v_exp10", FIXED(2), "_ZGV_LLVM_N2v") -TLI_DEFINE_VECFUNC( "llvm.exp10.f64", "_ZGVnN2v_exp10", FIXED(2), "_ZGV_LLVM_N2v") TLI_DEFINE_VECFUNC("fmod", "_ZGVnN2vv_fmod", FIXED(2), "_ZGV_LLVM_N2vv") -TLI_DEFINE_VECFUNC( "lgamma", "_ZGVnN2v_lgamma", FIXED(2), "_ZGV_LLVM_N2v") +TLI_DEFINE_VECFUNC("lgamma", "_ZGVnN2v_lgamma", FIXED(2), "_ZGV_LLVM_N2v") -TLI_DEFINE_VECFUNC( "log", "_ZGVnN2v_log", FIXED(2), "_ZGV_LLVM_N2v") -TLI_DEFINE_VECFUNC( "llvm.log.f64", "_ZGVnN2v_log", FIXED(2), "_ZGV_LLVM_N2v") +TLI_DEFINE_VECFUNC("log", "_ZGVnN2v_log", FIXED(2), "_ZGV_LLVM_N2v") +TLI_DEFINE_VECFUNC("llvm.log.f64", "_ZGVnN2v_log", FIXED(2), "_ZGV_LLVM_N2v") -TLI_DEFINE_VECFUNC( "log2", "_ZGVnN2v_log2", FIXED(2), "_ZGV_LLVM_N2v") -TLI_DEFINE_VECFUNC( "llvm.log2.f64", "_ZGVnN2v_log2", FIXED(2), "_ZGV_LLVM_N2v") +TLI_DEFINE_VECFUNC("log10", "_ZGVnN2v_log10", FIXED(2), "_ZGV_LLVM_N2v") +TLI_DEFINE_VECFUNC("llvm.log10.f64", "_ZGVnN2v_log10", FIXED(2), "_ZGV_LLVM_N2v") -TLI_DEFINE_VECFUNC( "log10", "_ZGVnN2v_log10", FIXED(2), "_ZGV_LLVM_N2v") -TLI_DEFINE_VECFUNC( "llvm.log10.f64", "_ZGVnN2v_log10", FIXED(2), "_ZGV_LLVM_N2v") +TLI_DEFINE_VECFUNC("log2", "_ZGVnN2v_log2", FIXED(2), "_ZGV_LLVM_N2v") +TLI_DEFINE_VECFUNC("llvm.log2.f64", "_ZGVnN2v_log2", FIXED(2), "_ZGV_LLVM_N2v") -TLI_DEFINE_VECFUNC( "modf", "_ZGVnN2vl8_modf", FIXED(2), "_ZGV_LLVM_N2vl8") +TLI_DEFINE_VECFUNC("modf", "_ZGVnN2vl8_modf", FIXED(2), "_ZGV_LLVM_N2vl8") -TLI_DEFINE_VECFUNC( "pow", "_ZGVnN2vv_pow", FIXED(2), "_ZGV_LLVM_N2vv") -TLI_DEFINE_VECFUNC( "llvm.pow.f64", "_ZGVnN2vv_pow", FIXED(2), "_ZGV_LLVM_N2vv") +TLI_DEFINE_VECFUNC("pow", "_ZGVnN2vv_pow", FIXED(2), "_ZGV_LLVM_N2vv") +TLI_DEFINE_VECFUNC("llvm.pow.f64", "_ZGVnN2vv_pow", FIXED(2), "_ZGV_LLVM_N2vv") -TLI_DEFINE_VECFUNC( "sin", "_ZGVnN2v_sin", FIXED(2), "_ZGV_LLVM_N2v") -TLI_DEFINE_VECFUNC( "llvm.sin.f64", "_ZGVnN2v_sin", FIXED(2), "_ZGV_LLVM_N2v") +TLI_DEFINE_VECFUNC("sin", "_ZGVnN2v_sin", FIXED(2), "_ZGV_LLVM_N2v") +TLI_DEFINE_VECFUNC("llvm.sin.f64", "_ZGVnN2v_sin", FIXED(2), "_ZGV_LLVM_N2v") -TLI_DEFINE_VECFUNC( "sincos", "_ZGVnN2vl8l8_sincos", FIXED(2), "_ZGV_LLVM_N2vl8l8") +TLI_DEFINE_VECFUNC("sincos", "_ZGVnN2vl8l8_sincos", FIXED(2), "_ZGV_LLVM_N2vl8l8") -TLI_DEFINE_VECFUNC( "sincospi", "_ZGVnN2vl8l8_sincospi", FIXED(2), "_ZGV_LLVM_N2vl8l8") +TLI_DEFINE_VECFUNC("sincospi", "_ZGVnN2vl8l8_sincospi", FIXED(2), "_ZGV_LLVM_N2vl8l8") -TLI_DEFINE_VECFUNC( "sinh", "_ZGVnN2v_sinh", FIXED(2), "_ZGV_LLVM_N2v") +TLI_DEFINE_VECFUNC("sinh", "_ZGVnN2v_sinh", FIXED(2), "_ZGV_LLVM_N2v") -TLI_DEFINE_VECFUNC( "sqrt", "_ZGVnN2v_sqrt", FIXED(2), "_ZGV_LLVM_N2v") +TLI_DEFINE_VECFUNC("sqrt", "_ZGVnN2v_sqrt", FIXED(2), "_ZGV_LLVM_N2v") -TLI_DEFINE_VECFUNC( "tan", "_ZGVnN2v_tan", FIXED(2), "_ZGV_LLVM_N2v") +TLI_DEFINE_VECFUNC("tan", "_ZGVnN2v_tan", FIXED(2), "_ZGV_LLVM_N2v") -TLI_DEFINE_VECFUNC( "tanh", "_ZGVnN2v_tanh", FIXED(2), "_ZGV_LLVM_N2v") +TLI_DEFINE_VECFUNC("tanh", "_ZGVnN2v_tanh", FIXED(2), "_ZGV_LLVM_N2v") -TLI_DEFINE_VECFUNC( "tgamma", "_ZGVnN2v_tgamma", FIXED(2), "_ZGV_LLVM_N2v") +TLI_DEFINE_VECFUNC("tgamma", "_ZGVnN2v_tgamma", FIXED(2), "_ZGV_LLVM_N2v") #elif defined(TLI_DEFINE_SLEEFGNUABI_VF4_VECFUNCS) -TLI_DEFINE_VECFUNC( "acosf", "_ZGVnN4v_acosf", FIXED(4), "_ZGV_LLVM_N4v") +TLI_DEFINE_VECFUNC("acosf", "_ZGVnN4v_acosf", FIXED(4), "_ZGV_LLVM_N4v") + +TLI_DEFINE_VECFUNC("asinf", "_ZGVnN4v_asinf", FIXED(4), "_ZGV_LLVM_N4v") -TLI_DEFINE_VECFUNC( "asinf", "_ZGVnN4v_asinf", FIXED(4), "_ZGV_LLVM_N4v") +TLI_DEFINE_VECFUNC("atanf", "_ZGVnN4v_atanf", FIXED(4), "_ZGV_LLVM_N4v") -TLI_DEFINE_VECFUNC( "atanf", "_ZGVnN4v_atanf", FIXED(4), "_ZGV_LLVM_N4v") +TLI_DEFINE_VECFUNC("atan2f", "_ZGVnN4vv_atan2f", FIXED(4), "_ZGV_LLVM_N4vv") -TLI_DEFINE_VECFUNC( "atan2f", "_ZGVnN4vv_atan2f", FIXED(4), "_ZGV_LLVM_N4vv") +TLI_DEFINE_VECFUNC("atanhf", "_ZGVnN4v_atanhf", FIXED(4), "_ZGV_LLVM_N4v") -TLI_DEFINE_VECFUNC( "atanhf", "_ZGVnN4v_atanhf", FIXED(4), "_ZGV_LLVM_N4v") +TLI_DEFINE_VECFUNC("cosf", "_ZGVnN4v_cosf", FIXED(4), "_ZGV_LLVM_N4v") +TLI_DEFINE_VECFUNC("llvm.cos.f32", "_ZGVnN4v_cosf", FIXED(4), "_ZGV_LLVM_N4v") -TLI_DEFINE_VECFUNC( "cosf", "_ZGVnN4v_cosf", FIXED(4), "_ZGV_LLVM_N4v") -TLI_DEFINE_VECFUNC( "llvm.cos.f32", "_ZGVnN4v_cosf", FIXED(4), "_ZGV_LLVM_N4v") +TLI_DEFINE_VECFUNC("coshf", "_ZGVnN4v_coshf", FIXED(4), "_ZGV_LLVM_N4v") -TLI_DEFINE_VECFUNC( "coshf", "_ZGVnN4v_coshf", FIXED(4), "_ZGV_LLVM_N4v") +TLI_DEFINE_VECFUNC("expf", "_ZGVnN4v_expf", FIXED(4), "_ZGV_LLVM_N4v") +TLI_DEFINE_VECFUNC("llvm.exp.f32", "_ZGVnN4v_expf", FIXED(4), "_ZGV_LLVM_N4v") -TLI_DEFINE_VECFUNC( "expf", "_ZGVnN4v_expf", FIXED(4), "_ZGV_LLVM_N4v") -TLI_DEFINE_VECFUNC( "llvm.exp.f32", "_ZGVnN4v_expf", FIXED(4), "_ZGV_LLVM_N4v") +TLI_DEFINE_VECFUNC("exp10f", "_ZGVnN4v_exp10f", FIXED(4), "_ZGV_LLVM_N4v") +TLI_DEFINE_VECFUNC("llvm.exp10.f32", "_ZGVnN4v_exp10f", FIXED(4), "_ZGV_LLVM_N4v") -TLI_DEFINE_VECFUNC( "exp2f", "_ZGVnN4v_exp2f", FIXED(4), "_ZGV_LLVM_N4v") -TLI_DEFINE_VECFUNC( "llvm.exp2.f32", "_ZGVnN4v_exp2f", FIXED(4), "_ZGV_LLVM_N4v") +TLI_DEFINE_VECFUNC("exp2f", "_ZGVnN4v_exp2f", FIXED(4), "_ZGV_LLVM_N4v") +TLI_DEFINE_VECFUNC("llvm.exp2.f32", "_ZGVnN4v_exp2f", FIXED(4), "_ZGV_LLVM_N4v") -TLI_DEFINE_VECFUNC( "exp10f", "_ZGVnN4v_exp10f", FIXED(4), "_ZGV_LLVM_N4v") -TLI_DEFINE_VECFUNC( "llvm.exp10.f32", "_ZGVnN4v_exp10f", FIXED(4), "_ZGV_LLVM_N4v") TLI_DEFINE_VECFUNC("fmodf", "_ZGVnN4vv_fmodf", FIXED(4), "_ZGV_LLVM_N4vv") -TLI_DEFINE_VECFUNC( "lgammaf", "_ZGVnN4v_lgammaf", FIXED(4), "_ZGV_LLVM_N4v") +TLI_DEFINE_VECFUNC("lgammaf", "_ZGVnN4v_lgammaf", FIXED(4), "_ZGV_LLVM_N4v") -TLI_DEFINE_VECFUNC( "logf", "_ZGVnN4v_logf", FIXED(4), "_ZGV_LLVM_N4v") -TLI_DEFINE_VECFUNC( "llvm.log.f32", "_ZGVnN4v_logf", FIXED(4), "_ZGV_LLVM_N4v") +TLI_DEFINE_VECFUNC("logf", "_ZGVnN4v_logf", FIXED(4), "_ZGV_LLVM_N4v") +TLI_DEFINE_VECFUNC("llvm.log.f32", "_ZGVnN4v_logf", FIXED(4), "_ZGV_LLVM_N4v") -TLI_DEFINE_VECFUNC( "log2f", "_ZGVnN4v_log2f", FIXED(4), "_ZGV_LLVM_N4v") -TLI_DEFINE_VECFUNC( "llvm.log2.f32", "_ZGVnN4v_log2f", FIXED(4), "_ZGV_LLVM_N4v") +TLI_DEFINE_VECFUNC("log10f", "_ZGVnN4v_log10f", FIXED(4), "_ZGV_LLVM_N4v") +TLI_DEFINE_VECFUNC("llvm.log10.f32", "_ZGVnN4v_log10f", FIXED(4), "_ZGV_LLVM_N4v") -TLI_DEFINE_VECFUNC( "log10f", "_ZGVnN4v_log10f", FIXED(4), "_ZGV_LLVM_N4v") -TLI_DEFINE_VECFUNC( "llvm.log10.f32", "_ZGVnN4v_log10f", FIXED(4), "_ZGV_LLVM_N4v") +TLI_DEFINE_VECFUNC("log2f", "_ZGVnN4v_log2f", FIXED(4), "_ZGV_LLVM_N4v") +TLI_DEFINE_VECFUNC("llvm.log2.f32", "_ZGVnN4v_log2f", FIXED(4), "_ZGV_LLVM_N4v") -TLI_DEFINE_VECFUNC( "modff", "_ZGVnN4vl4_modff", FIXED(4), "_ZGV_LLVM_N4vl4") +TLI_DEFINE_VECFUNC("modff", "_ZGVnN4vl4_modff", FIXED(4), "_ZGV_LLVM_N4vl4") -TLI_DEFINE_VECFUNC( "powf", "_ZGVnN4vv_powf", FIXED(4), "_ZGV_LLVM_N4vv") -TLI_DEFINE_VECFUNC( "llvm.pow.f32", "_ZGVnN4vv_powf", FIXED(4), "_ZGV_LLVM_N4vv") +TLI_DEFINE_VECFUNC("powf", "_ZGVnN4vv_powf", FIXED(4), "_ZGV_LLVM_N4vv") +TLI_DEFINE_VECFUNC("llvm.pow.f32", "_ZGVnN4vv_powf", FIXED(4), "_ZGV_LLVM_N4vv") -TLI_DEFINE_VECFUNC( "sinf", "_ZGVnN4v_sinf", FIXED(4), "_ZGV_LLVM_N4v") -TLI_DEFINE_VECFUNC( "llvm.sin.f32", "_ZGVnN4v_sinf", FIXED(4), "_ZGV_LLVM_N4v") +TLI_DEFINE_VECFUNC("sinf", "_ZGVnN4v_sinf", FIXED(4), "_ZGV_LLVM_N4v") +TLI_DEFINE_VECFUNC("llvm.sin.f32", "_ZGVnN4v_sinf", FIXED(4), "_ZGV_LLVM_N4v") TLI_DEFINE_VECFUNC("sincosf", "_ZGVnN4vl4l4_sincosf", FIXED(4), "_ZGV_LLVM_N4vl4l4") TLI_DEFINE_VECFUNC("sincospif", "_ZGVnN4vl4l4_sincospif", FIXED(4), "_ZGV_LLVM_N4vl4l4") -TLI_DEFINE_VECFUNC( "sinhf", "_ZGVnN4v_sinhf", FIXED(4), "_ZGV_LLVM_N4v") +TLI_DEFINE_VECFUNC("sinhf", "_ZGVnN4v_sinhf", FIXED(4), "_ZGV_LLVM_N4v") -TLI_DEFINE_VECFUNC( "sqrtf", "_ZGVnN4v_sqrtf", FIXED(4), "_ZGV_LLVM_N4v") +TLI_DEFINE_VECFUNC("sqrtf", "_ZGVnN4v_sqrtf", FIXED(4), "_ZGV_LLVM_N4v") -TLI_DEFINE_VECFUNC( "tanf", "_ZGVnN4v_tanf", FIXED(4), "_ZGV_LLVM_N4v") +TLI_DEFINE_VECFUNC("tanf", "_ZGVnN4v_tanf", FIXED(4), "_ZGV_LLVM_N4v") -TLI_DEFINE_VECFUNC( "tanhf", "_ZGVnN4v_tanhf", FIXED(4), "_ZGV_LLVM_N4v") +TLI_DEFINE_VECFUNC("tanhf", "_ZGVnN4v_tanhf", FIXED(4), "_ZGV_LLVM_N4v") -TLI_DEFINE_VECFUNC( "tgammaf", "_ZGVnN4v_tgammaf", FIXED(4), "_ZGV_LLVM_N4v") +TLI_DEFINE_VECFUNC("tgammaf", "_ZGVnN4v_tgammaf", FIXED(4), "_ZGV_LLVM_N4v") #elif defined(TLI_DEFINE_SLEEFGNUABI_SCALABLE_VECFUNCS) @@ -618,16 +620,16 @@ TLI_DEFINE_VECFUNC("expf", "_ZGVsMxv_expf", SCALABLE(4), MASKED, "_ZGVsMxv") TLI_DEFINE_VECFUNC("llvm.exp.f64", "_ZGVsMxv_exp", SCALABLE(2), MASKED, "_ZGVsMxv") TLI_DEFINE_VECFUNC("llvm.exp.f32", "_ZGVsMxv_expf", SCALABLE(4), MASKED, "_ZGVsMxv") -TLI_DEFINE_VECFUNC("exp2", "_ZGVsMxv_exp2", SCALABLE(2), MASKED, "_ZGVsMxv") -TLI_DEFINE_VECFUNC("exp2f", "_ZGVsMxv_exp2f", SCALABLE(4), MASKED, "_ZGVsMxv") -TLI_DEFINE_VECFUNC("llvm.exp2.f64", "_ZGVsMxv_exp2", SCALABLE(2), MASKED, "_ZGVsMxv") -TLI_DEFINE_VECFUNC("llvm.exp2.f32", "_ZGVsMxv_exp2f", SCALABLE(4), MASKED, "_ZGVsMxv") - TLI_DEFINE_VECFUNC("exp10", "_ZGVsMxv_exp10", SCALABLE(2), MASKED, "_ZGVsMxv") TLI_DEFINE_VECFUNC("exp10f", "_ZGVsMxv_exp10f", SCALABLE(4), MASKED, "_ZGVsMxv") TLI_DEFINE_VECFUNC("llvm.exp10.f64", "_ZGVsMxv_exp10", SCALABLE(2), MASKED, "_ZGVsMxv") TLI_DEFINE_VECFUNC("llvm.exp10.f32", "_ZGVsMxv_exp10f", SCALABLE(4), MASKED, "_ZGVsMxv") +TLI_DEFINE_VECFUNC("exp2", "_ZGVsMxv_exp2", SCALABLE(2), MASKED, "_ZGVsMxv") +TLI_DEFINE_VECFUNC("exp2f", "_ZGVsMxv_exp2f", SCALABLE(4), MASKED, "_ZGVsMxv") +TLI_DEFINE_VECFUNC("llvm.exp2.f64", "_ZGVsMxv_exp2", SCALABLE(2), MASKED, "_ZGVsMxv") +TLI_DEFINE_VECFUNC("llvm.exp2.f32", "_ZGVsMxv_exp2f", SCALABLE(4), MASKED, "_ZGVsMxv") + TLI_DEFINE_VECFUNC("fmod", "_ZGVsMxvv_fmod", SCALABLE(2), MASKED, "_ZGVsMxvv") TLI_DEFINE_VECFUNC("fmodf", "_ZGVsMxvv_fmodf", SCALABLE(4), MASKED, "_ZGVsMxvv") @@ -639,16 +641,16 @@ TLI_DEFINE_VECFUNC("logf", "_ZGVsMxv_logf", SCALABLE(4), MASKED, "_ZGVsMxv") TLI_DEFINE_VECFUNC("llvm.log.f64", "_ZGVsMxv_log", SCALABLE(2), MASKED, "_ZGVsMxv") TLI_DEFINE_VECFUNC("llvm.log.f32", "_ZGVsMxv_logf", SCALABLE(4), MASKED, "_ZGVsMxv") -TLI_DEFINE_VECFUNC( "log2", "_ZGVsMxv_log2", SCALABLE(2), MASKED, "_ZGVsMxv") -TLI_DEFINE_VECFUNC( "log2f", "_ZGVsMxv_log2f", SCALABLE(4), MASKED, "_ZGVsMxv") -TLI_DEFINE_VECFUNC( "llvm.log2.f64", "_ZGVsMxv_log2", SCALABLE(2), MASKED, "_ZGVsMxv") -TLI_DEFINE_VECFUNC( "llvm.log2.f32", "_ZGVsMxv_log2f", SCALABLE(4), MASKED, "_ZGVsMxv") - TLI_DEFINE_VECFUNC("log10", "_ZGVsMxv_log10", SCALABLE(2), MASKED, "_ZGVsMxv") TLI_DEFINE_VECFUNC("log10f", "_ZGVsMxv_log10f", SCALABLE(4), MASKED, "_ZGVsMxv") TLI_DEFINE_VECFUNC("llvm.log10.f64", "_ZGVsMxv_log10", SCALABLE(2), MASKED, "_ZGVsMxv") TLI_DEFINE_VECFUNC("llvm.log10.f32", "_ZGVsMxv_log10f", SCALABLE(4), MASKED, "_ZGVsMxv") +TLI_DEFINE_VECFUNC("log2", "_ZGVsMxv_log2", SCALABLE(2), MASKED, "_ZGVsMxv") +TLI_DEFINE_VECFUNC("log2f", "_ZGVsMxv_log2f", SCALABLE(4), MASKED, "_ZGVsMxv") +TLI_DEFINE_VECFUNC("llvm.log2.f64", "_ZGVsMxv_log2", SCALABLE(2), MASKED, "_ZGVsMxv") +TLI_DEFINE_VECFUNC("llvm.log2.f32", "_ZGVsMxv_log2f", SCALABLE(4), MASKED, "_ZGVsMxv") + TLI_DEFINE_VECFUNC("modf", "_ZGVsMxvl8_modf", SCALABLE(2), MASKED, "_ZGVsMxvl8") TLI_DEFINE_VECFUNC("modff", "_ZGVsMxvl4_modff", SCALABLE(4), MASKED, "_ZGVsMxvl4") @@ -765,16 +767,6 @@ TLI_DEFINE_VECFUNC("llvm.exp.f32", "armpl_vexpq_f32", FIXED(4), NOMASK, "_ZGV_LL TLI_DEFINE_VECFUNC("llvm.exp.f64", "armpl_svexp_f64_x", SCALABLE(2), MASKED, "_ZGVsMxv") TLI_DEFINE_VECFUNC("llvm.exp.f32", "armpl_svexp_f32_x", SCALABLE(4), MASKED, "_ZGVsMxv") -TLI_DEFINE_VECFUNC("exp2", "armpl_vexp2q_f64", FIXED(2), NOMASK, "_ZGV_LLVM_N2v") -TLI_DEFINE_VECFUNC("exp2f", "armpl_vexp2q_f32", FIXED(4), NOMASK, "_ZGV_LLVM_N4v") -TLI_DEFINE_VECFUNC("exp2", "armpl_svexp2_f64_x", SCALABLE(2), MASKED, "_ZGVsMxv") -TLI_DEFINE_VECFUNC("exp2f", "armpl_svexp2_f32_x", SCALABLE(4), MASKED, "_ZGVsMxv") - -TLI_DEFINE_VECFUNC("llvm.exp2.f64", "armpl_vexp2q_f64", FIXED(2), NOMASK, "_ZGV_LLVM_N2v") -TLI_DEFINE_VECFUNC("llvm.exp2.f32", "armpl_vexp2q_f32", FIXED(4), NOMASK, "_ZGV_LLVM_N4v") -TLI_DEFINE_VECFUNC("llvm.exp2.f64", "armpl_svexp2_f64_x", SCALABLE(2), MASKED, "_ZGVsMxv") -TLI_DEFINE_VECFUNC("llvm.exp2.f32", "armpl_svexp2_f32_x", SCALABLE(4), MASKED, "_ZGVsMxv") - TLI_DEFINE_VECFUNC("exp10", "armpl_vexp10q_f64", FIXED(2), NOMASK, "_ZGV_LLVM_N2v") TLI_DEFINE_VECFUNC("exp10f", "armpl_vexp10q_f32", FIXED(4), NOMASK, "_ZGV_LLVM_N4v") TLI_DEFINE_VECFUNC("exp10", "armpl_svexp10_f64_x", SCALABLE(2), MASKED, "_ZGVsMxv") @@ -785,6 +777,16 @@ TLI_DEFINE_VECFUNC("llvm.exp10.f32", "armpl_vexp10q_f32", FIXED(4), NOMASK, "_ZG TLI_DEFINE_VECFUNC("llvm.exp10.f64", "armpl_svexp10_f64_x", SCALABLE(2), MASKED, "_ZGVsMxv") TLI_DEFINE_VECFUNC("llvm.exp10.f32", "armpl_svexp10_f32_x", SCALABLE(4), MASKED, "_ZGVsMxv") +TLI_DEFINE_VECFUNC("exp2", "armpl_vexp2q_f64", FIXED(2), NOMASK, "_ZGV_LLVM_N2v") +TLI_DEFINE_VECFUNC("exp2f", "armpl_vexp2q_f32", FIXED(4), NOMASK, "_ZGV_LLVM_N4v") +TLI_DEFINE_VECFUNC("exp2", "armpl_svexp2_f64_x", SCALABLE(2), MASKED, "_ZGVsMxv") +TLI_DEFINE_VECFUNC("exp2f", "armpl_svexp2_f32_x", SCALABLE(4), MASKED, "_ZGVsMxv") + +TLI_DEFINE_VECFUNC("llvm.exp2.f64", "armpl_vexp2q_f64", FIXED(2), NOMASK, "_ZGV_LLVM_N2v") +TLI_DEFINE_VECFUNC("llvm.exp2.f32", "armpl_vexp2q_f32", FIXED(4), NOMASK, "_ZGV_LLVM_N4v") +TLI_DEFINE_VECFUNC("llvm.exp2.f64", "armpl_svexp2_f64_x", SCALABLE(2), MASKED, "_ZGVsMxv") +TLI_DEFINE_VECFUNC("llvm.exp2.f32", "armpl_svexp2_f32_x", SCALABLE(4), MASKED, "_ZGVsMxv") + TLI_DEFINE_VECFUNC("expm1", "armpl_vexpm1q_f64", FIXED(2), NOMASK, "_ZGV_LLVM_N2v") TLI_DEFINE_VECFUNC("expm1f", "armpl_vexpm1q_f32", FIXED(4), NOMASK, "_ZGV_LLVM_N4v") TLI_DEFINE_VECFUNC("expm1", "armpl_svexpm1_f64_x", SCALABLE(2), MASKED, "_ZGVsMxv") @@ -830,6 +832,16 @@ TLI_DEFINE_VECFUNC("llvm.log.f32", "armpl_vlogq_f32", FIXED(4), NOMASK, "_ZGV_LL TLI_DEFINE_VECFUNC("llvm.log.f64", "armpl_svlog_f64_x", SCALABLE(2), MASKED, "_ZGVsMxv") TLI_DEFINE_VECFUNC("llvm.log.f32", "armpl_svlog_f32_x", SCALABLE(4), MASKED, "_ZGVsMxv") +TLI_DEFINE_VECFUNC("log10", "armpl_vlog10q_f64", FIXED(2), NOMASK, "_ZGV_LLVM_N2v") +TLI_DEFINE_VECFUNC("log10f", "armpl_vlog10q_f32", FIXED(4), NOMASK, "_ZGV_LLVM_N4v") +TLI_DEFINE_VECFUNC("log10", "armpl_svlog10_f64_x", SCALABLE(2), MASKED, "_ZGVsMxv") +TLI_DEFINE_VECFUNC("log10f", "armpl_svlog10_f32_x", SCALABLE(4), MASKED, "_ZGVsMxv") + +TLI_DEFINE_VECFUNC("llvm.log10.f64", "armpl_vlog10q_f64", FIXED(2), NOMASK, "_ZGV_LLVM_N2v") +TLI_DEFINE_VECFUNC("llvm.log10.f32", "armpl_vlog10q_f32", FIXED(4), NOMASK, "_ZGV_LLVM_N4v") +TLI_DEFINE_VECFUNC("llvm.log10.f64", "armpl_svlog10_f64_x", SCALABLE(2), MASKED, "_ZGVsMxv") +TLI_DEFINE_VECFUNC("llvm.log10.f32", "armpl_svlog10_f32_x", SCALABLE(4), MASKED, "_ZGVsMxv") + TLI_DEFINE_VECFUNC("log1p", "armpl_vlog1pq_f64", FIXED(2), NOMASK, "_ZGV_LLVM_N2v") TLI_DEFINE_VECFUNC("log1pf", "armpl_vlog1pq_f32", FIXED(4), NOMASK, "_ZGV_LLVM_N4v") TLI_DEFINE_VECFUNC("log1p", "armpl_svlog1p_f64_x", SCALABLE(2), MASKED, "_ZGVsMxv") @@ -845,16 +857,6 @@ TLI_DEFINE_VECFUNC("llvm.log2.f32", "armpl_vlog2q_f32", FIXED(4), NOMASK, "_ZGV_ TLI_DEFINE_VECFUNC("llvm.log2.f64", "armpl_svlog2_f64_x", SCALABLE(2), MASKED, "_ZGVsMxv") TLI_DEFINE_VECFUNC("llvm.log2.f32", "armpl_svlog2_f32_x", SCALABLE(4), MASKED, "_ZGVsMxv") -TLI_DEFINE_VECFUNC("log10", "armpl_vlog10q_f64", FIXED(2), NOMASK, "_ZGV_LLVM_N2v") -TLI_DEFINE_VECFUNC("log10f", "armpl_vlog10q_f32", FIXED(4), NOMASK, "_ZGV_LLVM_N4v") -TLI_DEFINE_VECFUNC("log10", "armpl_svlog10_f64_x", SCALABLE(2), MASKED, "_ZGVsMxv") -TLI_DEFINE_VECFUNC("log10f", "armpl_svlog10_f32_x", SCALABLE(4), MASKED, "_ZGVsMxv") - -TLI_DEFINE_VECFUNC("llvm.log10.f64", "armpl_vlog10q_f64", FIXED(2), NOMASK, "_ZGV_LLVM_N2v") -TLI_DEFINE_VECFUNC("llvm.log10.f32", "armpl_vlog10q_f32", FIXED(4), NOMASK, "_ZGV_LLVM_N4v") -TLI_DEFINE_VECFUNC("llvm.log10.f64", "armpl_svlog10_f64_x", SCALABLE(2), MASKED, "_ZGVsMxv") -TLI_DEFINE_VECFUNC("llvm.log10.f32", "armpl_svlog10_f32_x", SCALABLE(4), MASKED, "_ZGVsMxv") - TLI_DEFINE_VECFUNC("modf", "armpl_vmodfq_f64", FIXED(2), NOMASK, "_ZGV_LLVM_N2vl8") TLI_DEFINE_VECFUNC("modff", "armpl_vmodfq_f32", FIXED(4), NOMASK, "_ZGV_LLVM_N4vl4") TLI_DEFINE_VECFUNC("modf", "armpl_svmodf_f64_x", SCALABLE(2), MASKED, "_ZGVsMxvl8") diff --git a/llvm/include/llvm/BinaryFormat/ELFRelocs/AArch64.def b/llvm/include/llvm/BinaryFormat/ELFRelocs/AArch64.def index 30375de420e3..5fb3fa4aeb7b 100644 --- a/llvm/include/llvm/BinaryFormat/ELFRelocs/AArch64.def +++ b/llvm/include/llvm/BinaryFormat/ELFRelocs/AArch64.def @@ -59,6 +59,7 @@ ELF_RELOC(R_AARCH64_ADR_GOT_PAGE, 0x137) ELF_RELOC(R_AARCH64_LD64_GOT_LO12_NC, 0x138) ELF_RELOC(R_AARCH64_LD64_GOTPAGE_LO15, 0x139) ELF_RELOC(R_AARCH64_PLT32, 0x13a) +ELF_RELOC(R_AARCH64_GOTPCREL32, 0x13b) ELF_RELOC(R_AARCH64_TLSGD_ADR_PREL21, 0x200) ELF_RELOC(R_AARCH64_TLSGD_ADR_PAGE21, 0x201) ELF_RELOC(R_AARCH64_TLSGD_ADD_LO12_NC, 0x202) diff --git a/llvm/include/llvm/BinaryFormat/ELFRelocs/RISCV.def b/llvm/include/llvm/BinaryFormat/ELFRelocs/RISCV.def index c7fd6490041c..b478799c91fb 100644 --- a/llvm/include/llvm/BinaryFormat/ELFRelocs/RISCV.def +++ b/llvm/include/llvm/BinaryFormat/ELFRelocs/RISCV.def @@ -40,8 +40,7 @@ ELF_RELOC(R_RISCV_SUB8, 37) ELF_RELOC(R_RISCV_SUB16, 38) ELF_RELOC(R_RISCV_SUB32, 39) ELF_RELOC(R_RISCV_SUB64, 40) -ELF_RELOC(R_RISCV_GNU_VTINHERIT, 41) -ELF_RELOC(R_RISCV_GNU_VTENTRY, 42) +ELF_RELOC(R_RISCV_GOT32_PCREL, 41) ELF_RELOC(R_RISCV_ALIGN, 43) ELF_RELOC(R_RISCV_RVC_BRANCH, 44) ELF_RELOC(R_RISCV_RVC_JUMP, 45) diff --git a/llvm/include/llvm/CodeGen/AssignmentTrackingAnalysis.h b/llvm/include/llvm/CodeGen/AssignmentTrackingAnalysis.h index b740ab567b12..fb0ecd828b68 100644 --- a/llvm/include/llvm/CodeGen/AssignmentTrackingAnalysis.h +++ b/llvm/include/llvm/CodeGen/AssignmentTrackingAnalysis.h @@ -1,13 +1,21 @@ +//===-- llvm/CodeGen/AssignmentTrackingAnalysis.h --------------*- C++ -*--===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + #ifndef LLVM_CODEGEN_ASSIGNMENTTRACKINGANALYSIS_H #define LLVM_CODEGEN_ASSIGNMENTTRACKINGANALYSIS_H #include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/DebugLoc.h" #include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/PassManager.h" #include "llvm/Pass.h" namespace llvm { -class Function; class Instruction; class raw_ostream; } // namespace llvm @@ -94,6 +102,25 @@ public: ///@} }; +class DebugAssignmentTrackingAnalysis + : public AnalysisInfoMixin<DebugAssignmentTrackingAnalysis> { + friend AnalysisInfoMixin<DebugAssignmentTrackingAnalysis>; + static AnalysisKey Key; + +public: + using Result = FunctionVarLocs; + Result run(Function &F, FunctionAnalysisManager &FAM); +}; + +class DebugAssignmentTrackingPrinterPass + : public PassInfoMixin<DebugAssignmentTrackingPrinterPass> { + raw_ostream &OS; + +public: + DebugAssignmentTrackingPrinterPass(raw_ostream &OS) : OS(OS) {} + PreservedAnalyses run(Function &F, FunctionAnalysisManager &FAM); +}; + class AssignmentTrackingAnalysis : public FunctionPass { std::unique_ptr<FunctionVarLocs> Results; diff --git a/llvm/include/llvm/CodeGen/CodeGenPassBuilder.h b/llvm/include/llvm/CodeGen/CodeGenPassBuilder.h index fa81ff504ac6..f540f3774c41 100644 --- a/llvm/include/llvm/CodeGen/CodeGenPassBuilder.h +++ b/llvm/include/llvm/CodeGen/CodeGenPassBuilder.h @@ -23,6 +23,7 @@ #include "llvm/Analysis/ScopedNoAliasAA.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Analysis/TypeBasedAliasAnalysis.h" +#include "llvm/CodeGen/AssignmentTrackingAnalysis.h" #include "llvm/CodeGen/CallBrPrepare.h" #include "llvm/CodeGen/CodeGenPrepare.h" #include "llvm/CodeGen/DwarfEHPrepare.h" diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h index dcc1a4580b14..a6e9406bed06 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h @@ -910,6 +910,9 @@ private: bool tryFoldSelectOfConstants(GSelect *Select, BuildFnTy &MatchInfo); + /// Try to fold (icmp X, Y) ? X : Y -> integer minmax. + bool tryFoldSelectToIntMinMax(GSelect *Select, BuildFnTy &MatchInfo); + bool isOneOrOneSplat(Register Src, bool AllowUndefs); bool isZeroOrZeroSplat(Register Src, bool AllowUndefs); bool isConstantSplatVector(Register Src, int64_t SplatValue, diff --git a/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h b/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h index 6ab1d4550c51..14885d5f9d08 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h @@ -558,6 +558,24 @@ public: } }; +/// Represents a G_PHI. +class GPhi : public GenericMachineInstr { +public: + /// Returns the number of incoming values. + unsigned getNumIncomingValues() const { return (getNumOperands() - 1) / 2; } + /// Returns the I'th incoming vreg. + Register getIncomingValue(unsigned I) { + return getOperand(I * 2 + 1).getReg(); + } + /// Returns the I'th incoming basic block. + MachineBasicBlock *getIncomingBlock(unsigned I) { + return getOperand(I * 2 + 2).getMBB(); + } + + static bool classof(const MachineInstr *MI) { + return MI->getOpcode() == TargetOpcode::G_PHI; + } +}; } // namespace llvm diff --git a/llvm/include/llvm/CodeGen/GlobalISel/Localizer.h b/llvm/include/llvm/CodeGen/GlobalISel/Localizer.h index b1fcdd207a60..4fbff4d10f8a 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/Localizer.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/Localizer.h @@ -67,10 +67,9 @@ private: typedef SmallSetVector<MachineInstr *, 32> LocalizedSetVecT; - /// If \p Op is a phi operand and not unique in that phi, that is, - /// there are other operands in the phi with the same register, - /// return true. - bool isNonUniquePhiValue(MachineOperand &Op) const; + /// If \p Op is a reg operand of a PHI, return the number of total + /// operands in the PHI that are the same as \p Op, including itself. + unsigned getNumPhiUses(MachineOperand &Op) const; /// Do inter-block localization from the entry block. bool localizeInterBlock(MachineFunction &MF, diff --git a/llvm/include/llvm/CodeGen/SelectionDAGISel.h b/llvm/include/llvm/CodeGen/SelectionDAGISel.h index 40046e0a8dec..e4d90f6e898f 100644 --- a/llvm/include/llvm/CodeGen/SelectionDAGISel.h +++ b/llvm/include/llvm/CodeGen/SelectionDAGISel.h @@ -159,7 +159,15 @@ public: OPC_CheckChild2Same, OPC_CheckChild3Same, OPC_CheckPatternPredicate, + OPC_CheckPatternPredicate0, + OPC_CheckPatternPredicate1, OPC_CheckPatternPredicate2, + OPC_CheckPatternPredicate3, + OPC_CheckPatternPredicate4, + OPC_CheckPatternPredicate5, + OPC_CheckPatternPredicate6, + OPC_CheckPatternPredicate7, + OPC_CheckPatternPredicateTwoByte, OPC_CheckPredicate, OPC_CheckPredicateWithOperands, OPC_CheckOpcode, @@ -207,6 +215,14 @@ public: OPC_CheckChild2CondCode, OPC_CheckValueType, OPC_CheckComplexPat, + OPC_CheckComplexPat0, + OPC_CheckComplexPat1, + OPC_CheckComplexPat2, + OPC_CheckComplexPat3, + OPC_CheckComplexPat4, + OPC_CheckComplexPat5, + OPC_CheckComplexPat6, + OPC_CheckComplexPat7, OPC_CheckAndImm, OPC_CheckOrImm, OPC_CheckImmAllOnesV, diff --git a/llvm/include/llvm/CodeGen/SelectionDAGNodes.h b/llvm/include/llvm/CodeGen/SelectionDAGNodes.h index ebf410cc94de..65b06d0f4579 100644 --- a/llvm/include/llvm/CodeGen/SelectionDAGNodes.h +++ b/llvm/include/llvm/CodeGen/SelectionDAGNodes.h @@ -935,6 +935,9 @@ public: /// Helper method returns the APInt of a ConstantSDNode operand. inline const APInt &getConstantOperandAPInt(unsigned Num) const; + /// Helper method returns the APInt value of a ConstantSDNode. + inline const APInt &getAsAPIntVal() const; + const SDValue &getOperand(unsigned Num) const { assert(Num < NumOperands && "Invalid child # of SDNode!"); return OperandList[Num]; @@ -1656,6 +1659,10 @@ const APInt &SDNode::getConstantOperandAPInt(unsigned Num) const { return cast<ConstantSDNode>(getOperand(Num))->getAPIntValue(); } +const APInt &SDNode::getAsAPIntVal() const { + return cast<ConstantSDNode>(this)->getAPIntValue(); +} + class ConstantFPSDNode : public SDNode { friend class SelectionDAG; diff --git a/llvm/lib/DWARFLinker/Parallel/Utils.h b/llvm/include/llvm/DWARFLinker/Utils.h index 3c05b2ea173d..23e59c967011 100644 --- a/llvm/lib/DWARFLinker/Parallel/Utils.h +++ b/llvm/include/llvm/DWARFLinker/Utils.h @@ -6,14 +6,17 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_LIB_DWARFLINKER_PARALLEL_UTILS_H -#define LLVM_LIB_DWARFLINKER_PARALLEL_UTILS_H +#ifndef LLVM_DWARFLINKER_UTILS_H +#define LLVM_DWARFLINKER_UTILS_H +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/Twine.h" #include "llvm/Support/Error.h" +#include "llvm/Support/FileSystem.h" +#include "llvm/Support/Path.h" namespace llvm { namespace dwarf_linker { -namespace parallel { /// This function calls \p Iteration() until it returns false. /// If number of iterations exceeds \p MaxCounter then an Error is returned. @@ -27,16 +30,35 @@ inline Error finiteLoop(function_ref<Expected<bool>()> Iteration, Expected<bool> IterationResultOrError = Iteration(); if (!IterationResultOrError) return IterationResultOrError.takeError(); - if (!IterationResultOrError.get()) return Error::success(); } - return createStringError(std::errc::invalid_argument, "Infinite recursion"); } -} // end of namespace parallel +/// Make a best effort to guess the +/// Xcode.app/Contents/Developer/Toolchains/ path from an SDK path. +inline SmallString<128> guessToolchainBaseDir(StringRef SysRoot) { + SmallString<128> Result; + // Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs/MacOSX.sdk + StringRef Base = sys::path::parent_path(SysRoot); + if (sys::path::filename(Base) != "SDKs") + return Result; + Base = sys::path::parent_path(Base); + Result = Base; + Result += "/Toolchains"; + return Result; +} + +inline bool isPathAbsoluteOnWindowsOrPosix(const Twine &Path) { + // Debug info can contain paths from any OS, not necessarily + // an OS we're currently running on. Moreover different compilation units can + // be compiled on different operating systems and linked together later. + return sys::path::is_absolute(Path, sys::path::Style::posix) || + sys::path::is_absolute(Path, sys::path::Style::windows); +} + } // end of namespace dwarf_linker } // end of namespace llvm -#endif // LLVM_LIB_DWARFLINKER_PARALLEL_UTILS_H +#endif // LLVM_DWARFLINKER_UTILS_H diff --git a/llvm/include/llvm/Frontend/OpenACC/ACC.td b/llvm/include/llvm/Frontend/OpenACC/ACC.td index 013d18e160de..0dbd934d83f0 100644 --- a/llvm/include/llvm/Frontend/OpenACC/ACC.td +++ b/llvm/include/llvm/Frontend/OpenACC/ACC.td @@ -391,9 +391,7 @@ def ACC_Loop : Directive<"loop"> { let allowedClauses = [ VersionedClause<ACCC_DeviceType>, VersionedClause<ACCC_Private>, - VersionedClause<ACCC_Reduction> - ]; - let allowedOnceClauses = [ + VersionedClause<ACCC_Reduction>, VersionedClause<ACCC_Collapse>, VersionedClause<ACCC_Gang>, VersionedClause<ACCC_Tile>, @@ -421,15 +419,17 @@ def ACC_Init : Directive<"init"> { // 2.15.1 def ACC_Routine : Directive<"routine"> { - let allowedOnceClauses = [ + let allowedClauses = [ VersionedClause<ACCC_Bind>, VersionedClause<ACCC_DeviceType>, - VersionedClause<ACCC_NoHost>, VersionedClause<ACCC_Gang>, VersionedClause<ACCC_Seq>, VersionedClause<ACCC_Vector>, VersionedClause<ACCC_Worker> ]; + let allowedOnceClauses = [ + VersionedClause<ACCC_NoHost> + ]; } // 2.14.3 @@ -532,32 +532,32 @@ def ACC_HostData : Directive<"host_data"> { // 2.11 def ACC_KernelsLoop : Directive<"kernels loop"> { let allowedClauses = [ + VersionedClause<ACCC_Attach>, + VersionedClause<ACCC_Collapse>, VersionedClause<ACCC_Copy>, VersionedClause<ACCC_Copyin>, VersionedClause<ACCC_Copyout>, VersionedClause<ACCC_Create>, + VersionedClause<ACCC_DevicePtr>, VersionedClause<ACCC_DeviceType>, + VersionedClause<ACCC_Gang>, VersionedClause<ACCC_NoCreate>, + VersionedClause<ACCC_NumGangs>, + VersionedClause<ACCC_NumWorkers>, VersionedClause<ACCC_Present>, VersionedClause<ACCC_Private>, VersionedClause<ACCC_Reduction>, - VersionedClause<ACCC_DevicePtr>, - VersionedClause<ACCC_Attach>, - VersionedClause<ACCC_Wait> + VersionedClause<ACCC_Tile>, + VersionedClause<ACCC_Vector>, + VersionedClause<ACCC_VectorLength>, + VersionedClause<ACCC_Wait>, + VersionedClause<ACCC_Worker> ]; let allowedOnceClauses = [ VersionedClause<ACCC_Async>, - VersionedClause<ACCC_Collapse>, VersionedClause<ACCC_Default>, - VersionedClause<ACCC_Gang>, VersionedClause<ACCC_If>, - VersionedClause<ACCC_NumGangs>, - VersionedClause<ACCC_NumWorkers>, - VersionedClause<ACCC_Self>, - VersionedClause<ACCC_Tile>, - VersionedClause<ACCC_Vector>, - VersionedClause<ACCC_VectorLength>, - VersionedClause<ACCC_Worker> + VersionedClause<ACCC_Self> ]; let allowedExclusiveClauses = [ VersionedClause<ACCC_Auto>, @@ -570,6 +570,7 @@ def ACC_KernelsLoop : Directive<"kernels loop"> { def ACC_ParallelLoop : Directive<"parallel loop"> { let allowedClauses = [ VersionedClause<ACCC_Attach>, + VersionedClause<ACCC_Collapse>, VersionedClause<ACCC_Copy>, VersionedClause<ACCC_Copyin>, VersionedClause<ACCC_Copyout>, @@ -577,25 +578,24 @@ def ACC_ParallelLoop : Directive<"parallel loop"> { VersionedClause<ACCC_DevicePtr>, VersionedClause<ACCC_DeviceType>, VersionedClause<ACCC_FirstPrivate>, + VersionedClause<ACCC_Gang>, VersionedClause<ACCC_NoCreate>, + VersionedClause<ACCC_NumGangs>, + VersionedClause<ACCC_NumWorkers>, VersionedClause<ACCC_Present>, VersionedClause<ACCC_Private>, VersionedClause<ACCC_Reduction>, VersionedClause<ACCC_Tile>, - VersionedClause<ACCC_Wait> + VersionedClause<ACCC_Vector>, + VersionedClause<ACCC_VectorLength>, + VersionedClause<ACCC_Wait>, + VersionedClause<ACCC_Worker> ]; let allowedOnceClauses = [ VersionedClause<ACCC_Async>, - VersionedClause<ACCC_Collapse>, VersionedClause<ACCC_Default>, - VersionedClause<ACCC_Gang>, VersionedClause<ACCC_If>, - VersionedClause<ACCC_NumGangs>, - VersionedClause<ACCC_NumWorkers>, - VersionedClause<ACCC_Self>, - VersionedClause<ACCC_Vector>, - VersionedClause<ACCC_VectorLength>, - VersionedClause<ACCC_Worker> + VersionedClause<ACCC_Self> ]; let allowedExclusiveClauses = [ VersionedClause<ACCC_Auto>, @@ -608,6 +608,7 @@ def ACC_ParallelLoop : Directive<"parallel loop"> { def ACC_SerialLoop : Directive<"serial loop"> { let allowedClauses = [ VersionedClause<ACCC_Attach>, + VersionedClause<ACCC_Collapse>, VersionedClause<ACCC_Copy>, VersionedClause<ACCC_Copyin>, VersionedClause<ACCC_Copyout>, @@ -615,22 +616,21 @@ def ACC_SerialLoop : Directive<"serial loop"> { VersionedClause<ACCC_DevicePtr>, VersionedClause<ACCC_DeviceType>, VersionedClause<ACCC_FirstPrivate>, + VersionedClause<ACCC_Gang>, VersionedClause<ACCC_NoCreate>, VersionedClause<ACCC_Present>, VersionedClause<ACCC_Private>, VersionedClause<ACCC_Reduction>, - VersionedClause<ACCC_Wait> + VersionedClause<ACCC_Tile>, + VersionedClause<ACCC_Vector>, + VersionedClause<ACCC_Wait>, + VersionedClause<ACCC_Worker> ]; let allowedOnceClauses = [ VersionedClause<ACCC_Async>, - VersionedClause<ACCC_Collapse>, VersionedClause<ACCC_Default>, - VersionedClause<ACCC_Gang>, VersionedClause<ACCC_If>, - VersionedClause<ACCC_Self>, - VersionedClause<ACCC_Tile>, - VersionedClause<ACCC_Vector>, - VersionedClause<ACCC_Worker> + VersionedClause<ACCC_Self> ]; let allowedExclusiveClauses = [ VersionedClause<ACCC_Auto>, diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td index 9088168b4c67..acff5c20b1b9 100644 --- a/llvm/include/llvm/IR/IntrinsicsAArch64.td +++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td @@ -2708,8 +2708,8 @@ class SVE2p1_Single_Store_Quadword : DefaultAttrsIntrinsic<[], [llvm_anyvector_ty, llvm_nxv1i1_ty, llvm_ptr_ty], [IntrWriteMem, IntrArgMemOnly]>; -def int_aarch64_sve_st1uwq : SVE2p1_Single_Store_Quadword; -def int_aarch64_sve_st1udq : SVE2p1_Single_Store_Quadword; +def int_aarch64_sve_st1wq : SVE2p1_Single_Store_Quadword; +def int_aarch64_sve_st1dq : SVE2p1_Single_Store_Quadword; def int_aarch64_sve_ld2q_sret : AdvSIMD_2Vec_PredLoad_Intrinsic; @@ -3617,7 +3617,7 @@ def int_aarch64_sve_tbxq : AdvSIMD_SVE2_TBX_Intrinsic; // SVE2.1 - Extract vector segment from each pair of quadword segments. // -def int_aarch64_sve_extq_lane : AdvSIMD_2VectorArgIndexed_Intrinsic; +def int_aarch64_sve_extq : AdvSIMD_2VectorArgIndexed_Intrinsic; // // SVE2.1 - Move predicate to/from vector diff --git a/llvm/include/llvm/IR/IntrinsicsNVVM.td b/llvm/include/llvm/IR/IntrinsicsNVVM.td index 6fd8e80013ce..cf50f2a59f60 100644 --- a/llvm/include/llvm/IR/IntrinsicsNVVM.td +++ b/llvm/include/llvm/IR/IntrinsicsNVVM.td @@ -4710,4 +4710,14 @@ def int_nvvm_is_explicit_cluster [IntrNoMem, IntrSpeculatable, NoUndef<RetIndex>], "llvm.nvvm.is_explicit_cluster">; +// Setmaxnreg inc/dec intrinsics +def int_nvvm_setmaxnreg_inc_sync_aligned_u32 + : DefaultAttrsIntrinsic<[], [llvm_i32_ty], + [IntrConvergent, IntrNoMem, IntrHasSideEffects, ImmArg<ArgIndex<0>>], + "llvm.nvvm.setmaxnreg.inc.sync.aligned.u32">; +def int_nvvm_setmaxnreg_dec_sync_aligned_u32 + : DefaultAttrsIntrinsic<[], [llvm_i32_ty], + [IntrConvergent, IntrNoMem, IntrHasSideEffects, ImmArg<ArgIndex<0>>], + "llvm.nvvm.setmaxnreg.dec.sync.aligned.u32">; + } // let TargetPrefix = "nvvm" diff --git a/llvm/include/llvm/IR/ModuleSummaryIndex.h b/llvm/include/llvm/IR/ModuleSummaryIndex.h index e72f74ad4adb..66c7d10d823d 100644 --- a/llvm/include/llvm/IR/ModuleSummaryIndex.h +++ b/llvm/include/llvm/IR/ModuleSummaryIndex.h @@ -1011,6 +1011,12 @@ public: return *Callsites; } + void addCallsite(CallsiteInfo &Callsite) { + if (!Callsites) + Callsites = std::make_unique<CallsitesTy>(); + Callsites->push_back(Callsite); + } + ArrayRef<AllocInfo> allocs() const { if (Allocs) return *Allocs; diff --git a/llvm/include/llvm/IR/PatternMatch.h b/llvm/include/llvm/IR/PatternMatch.h index 447ac0f2aa61..90d99a6031c8 100644 --- a/llvm/include/llvm/IR/PatternMatch.h +++ b/llvm/include/llvm/IR/PatternMatch.h @@ -1495,6 +1495,36 @@ struct ThreeOps_match { } }; +/// Matches instructions with Opcode and any number of operands +template <unsigned Opcode, typename... OperandTypes> struct AnyOps_match { + std::tuple<OperandTypes...> Operands; + + AnyOps_match(const OperandTypes &...Ops) : Operands(Ops...) {} + + // Operand matching works by recursively calling match_operands, matching the + // operands left to right. The first version is called for each operand but + // the last, for which the second version is called. The second version of + // match_operands is also used to match each individual operand. + template <int Idx, int Last> + std::enable_if_t<Idx != Last, bool> match_operands(const Instruction *I) { + return match_operands<Idx, Idx>(I) && match_operands<Idx + 1, Last>(I); + } + + template <int Idx, int Last> + std::enable_if_t<Idx == Last, bool> match_operands(const Instruction *I) { + return std::get<Idx>(Operands).match(I->getOperand(Idx)); + } + + template <typename OpTy> bool match(OpTy *V) { + if (V->getValueID() == Value::InstructionVal + Opcode) { + auto *I = cast<Instruction>(V); + return I->getNumOperands() == sizeof...(OperandTypes) && + match_operands<0, sizeof...(OperandTypes) - 1>(I); + } + return false; + } +}; + /// Matches SelectInst. template <typename Cond, typename LHS, typename RHS> inline ThreeOps_match<Cond, LHS, RHS, Instruction::Select> @@ -1611,6 +1641,12 @@ m_Store(const ValueOpTy &ValueOp, const PointerOpTy &PointerOp) { PointerOp); } +/// Matches GetElementPtrInst. +template <typename... OperandTypes> +inline auto m_GEP(const OperandTypes &...Ops) { + return AnyOps_match<Instruction::GetElementPtr, OperandTypes...>(Ops...); +} + //===----------------------------------------------------------------------===// // Matchers for CastInst classes // diff --git a/llvm/include/llvm/ProfileData/InstrProf.h b/llvm/include/llvm/ProfileData/InstrProf.h index 36be2e7d869e..87e7bbbd727e 100644 --- a/llvm/include/llvm/ProfileData/InstrProf.h +++ b/llvm/include/llvm/ProfileData/InstrProf.h @@ -1035,7 +1035,8 @@ const HashT HashType = HashT::MD5; inline uint64_t ComputeHash(StringRef K) { return ComputeHash(HashType, K); } // This structure defines the file header of the LLVM profile -// data file in indexed-format. +// data file in indexed-format. Please update llvm/docs/InstrProfileFormat.rst +// as appropriate when updating the indexed profile format. struct Header { uint64_t Magic; uint64_t Version; diff --git a/llvm/include/llvm/ProfileData/InstrProfData.inc b/llvm/include/llvm/ProfileData/InstrProfData.inc index f5de23ff4b94..25df899b3f36 100644 --- a/llvm/include/llvm/ProfileData/InstrProfData.inc +++ b/llvm/include/llvm/ProfileData/InstrProfData.inc @@ -123,6 +123,8 @@ INSTR_PROF_VALUE_NODE(PtrToNodeT, llvm::PointerType::getUnqual(Ctx), Next, \ /* INSTR_PROF_RAW_HEADER start */ /* Definition of member fields of the raw profile header data structure. */ +/* Please update llvm/docs/InstrProfileFormat.rst as appropriate when updating + raw profile format. */ #ifndef INSTR_PROF_RAW_HEADER #define INSTR_PROF_RAW_HEADER(Type, Name, Initializer) #else diff --git a/llvm/include/llvm/Support/RISCVISAInfo.h b/llvm/include/llvm/Support/RISCVISAInfo.h index c539448683d3..46df93d75226 100644 --- a/llvm/include/llvm/Support/RISCVISAInfo.h +++ b/llvm/include/llvm/Support/RISCVISAInfo.h @@ -18,11 +18,6 @@ #include <vector> namespace llvm { -struct RISCVExtensionInfo { - unsigned MajorVersion; - unsigned MinorVersion; -}; - void riscvExtensionsHelp(StringMap<StringRef> DescMap); class RISCVISAInfo { @@ -30,6 +25,12 @@ public: RISCVISAInfo(const RISCVISAInfo &) = delete; RISCVISAInfo &operator=(const RISCVISAInfo &) = delete; + /// Represents the major and version number components of a RISC-V extension. + struct ExtensionVersion { + unsigned Major; + unsigned Minor; + }; + static bool compareExtension(const std::string &LHS, const std::string &RHS); /// Helper class for OrderedExtensionMap. @@ -41,7 +42,7 @@ public: /// OrderedExtensionMap is std::map, it's specialized to keep entries /// in canonical order of extension. - typedef std::map<std::string, RISCVExtensionInfo, ExtensionComparator> + typedef std::map<std::string, ExtensionVersion, ExtensionComparator> OrderedExtensionMap; RISCVISAInfo(unsigned XLen, OrderedExtensionMap &Exts) @@ -71,10 +72,10 @@ public: std::vector<std::string> toFeatures(bool AddAllExtensions = false, bool IgnoreUnknown = true) const; - const OrderedExtensionMap &getExtensions() const { return Exts; }; + const OrderedExtensionMap &getExtensions() const { return Exts; } - unsigned getXLen() const { return XLen; }; - unsigned getFLen() const { return FLen; }; + unsigned getXLen() const { return XLen; } + unsigned getFLen() const { return FLen; } unsigned getMinVLen() const { return MinVLen; } unsigned getMaxVLen() const { return 65536; } unsigned getMaxELen() const { return MaxELen; } @@ -104,8 +105,7 @@ private: OrderedExtensionMap Exts; - void addExtension(StringRef ExtName, unsigned MajorVersion, - unsigned MinorVersion); + void addExtension(StringRef ExtName, ExtensionVersion Version); Error checkDependency(); diff --git a/llvm/include/llvm/Support/TargetOpcodes.def b/llvm/include/llvm/Support/TargetOpcodes.def index 3824b1c66951..c005218c80f4 100644 --- a/llvm/include/llvm/Support/TargetOpcodes.def +++ b/llvm/include/llvm/Support/TargetOpcodes.def @@ -687,6 +687,9 @@ HANDLE_TARGET_OPCODE(G_FMINIMUM) HANDLE_TARGET_OPCODE(G_FMAXIMUM) /// Access to FP environment. +HANDLE_TARGET_OPCODE(G_GET_FPENV) +HANDLE_TARGET_OPCODE(G_SET_FPENV) +HANDLE_TARGET_OPCODE(G_RESET_FPENV) HANDLE_TARGET_OPCODE(G_GET_FPMODE) HANDLE_TARGET_OPCODE(G_SET_FPMODE) HANDLE_TARGET_OPCODE(G_RESET_FPMODE) diff --git a/llvm/include/llvm/Target/GenericOpcodes.td b/llvm/include/llvm/Target/GenericOpcodes.td index 73e38b15bf67..2c73b67f9e1a 100644 --- a/llvm/include/llvm/Target/GenericOpcodes.td +++ b/llvm/include/llvm/Target/GenericOpcodes.td @@ -1020,6 +1020,27 @@ def G_FNEARBYINT : GenericInstruction { // it is modeled as a side effect, because constrained intrinsics use the same // method. +// Reading floating-point environment. +def G_GET_FPENV : GenericInstruction { + let OutOperandList = (outs type0:$dst); + let InOperandList = (ins); + let hasSideEffects = true; +} + +// Setting floating-point environment. +def G_SET_FPENV : GenericInstruction { + let OutOperandList = (outs); + let InOperandList = (ins type0:$src); + let hasSideEffects = true; +} + +// Setting default floating-point environment. +def G_RESET_FPENV : GenericInstruction { + let OutOperandList = (outs); + let InOperandList = (ins); + let hasSideEffects = true; +} + // Reading floating-point control modes. def G_GET_FPMODE : GenericInstruction { let OutOperandList = (outs type0:$dst); diff --git a/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td b/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td index 5e704f0b9a75..f792237203b4 100644 --- a/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td +++ b/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td @@ -116,6 +116,9 @@ def : GINodeEquiv<G_INTRINSIC, intrinsic_wo_chain> { let IfConvergent = G_INTRINSIC_CONVERGENT; } +def : GINodeEquiv<G_GET_FPENV, get_fpenv>; +def : GINodeEquiv<G_SET_FPENV, set_fpenv>; +def : GINodeEquiv<G_RESET_FPENV, reset_fpenv>; def : GINodeEquiv<G_GET_FPMODE, get_fpmode>; def : GINodeEquiv<G_SET_FPMODE, set_fpmode>; def : GINodeEquiv<G_RESET_FPMODE, reset_fpmode>; diff --git a/llvm/include/llvm/Target/TargetPfmCounters.td b/llvm/include/llvm/Target/TargetPfmCounters.td index b00f3e19c35f..33dff741fa2a 100644 --- a/llvm/include/llvm/Target/TargetPfmCounters.td +++ b/llvm/include/llvm/Target/TargetPfmCounters.td @@ -28,6 +28,24 @@ class PfmIssueCounter<string resource_name, string counter> string ResourceName = resource_name; } +// Definition of a validation event. A validation event represents a specific +// event that can be measured using performance counters that is interesting +// in regard to the snippet state. +class ValidationEvent <int event_number> { + int EventNumber = event_number; +} + +def InstructionRetired : ValidationEvent<0>; + +// PfmValidationCounter provides a mapping between the events that are +// are interesting in regards to the snippet execution environment and +// a concrete performance counter name that can be looked up in libpfm. +class PfmValidationCounter<ValidationEvent event_type, string counter> + : PfmCounter<counter> { + // The name of the event that the validation counter detects. + ValidationEvent EventType = event_type; +} + def NoPfmCounter : PfmCounter <""> {} // Set of PfmCounters for measuring sched model characteristics. @@ -38,6 +56,9 @@ class ProcPfmCounters { PfmCounter UopsCounter = NoPfmCounter; // Processors can define how to measure issued uops by defining IssueCounters. list<PfmIssueCounter> IssueCounters = []; + // Processor can list mappings between validation events and real counters + // to measure the specified events. + list<PfmValidationCounter> ValidationCounters = []; } // A binding of a set of counters to a CPU. diff --git a/llvm/include/llvm/TargetParser/ARMTargetParserCommon.h b/llvm/include/llvm/TargetParser/ARMTargetParserCommon.h index 1e4187c6fb11..8ae553ca80dd 100644 --- a/llvm/include/llvm/TargetParser/ARMTargetParserCommon.h +++ b/llvm/include/llvm/TargetParser/ARMTargetParserCommon.h @@ -42,6 +42,7 @@ struct ParsedBranchProtection { StringRef Key; bool BranchTargetEnforcement; bool BranchProtectionPAuthLR; + bool GuardedControlStack; }; bool parseBranchProtection(StringRef Spec, ParsedBranchProtection &PBP, diff --git a/llvm/lib/Analysis/LazyValueInfo.cpp b/llvm/lib/Analysis/LazyValueInfo.cpp index 360fc594ef7c..b948eb6ebd12 100644 --- a/llvm/lib/Analysis/LazyValueInfo.cpp +++ b/llvm/lib/Analysis/LazyValueInfo.cpp @@ -539,10 +539,13 @@ void LazyValueInfoImpl::solve() { } std::pair<BasicBlock *, Value *> e = BlockValueStack.back(); assert(BlockValueSet.count(e) && "Stack value should be in BlockValueSet!"); + unsigned StackSize = BlockValueStack.size(); + (void) StackSize; if (solveBlockValue(e.second, e.first)) { // The work item was completely processed. - assert(BlockValueStack.back() == e && "Nothing should have been pushed!"); + assert(BlockValueStack.size() == StackSize && + BlockValueStack.back() == e && "Nothing should have been pushed!"); #ifndef NDEBUG std::optional<ValueLatticeElement> BBLV = TheCache.getCachedValueInfo(e.second, e.first); @@ -556,7 +559,8 @@ void LazyValueInfoImpl::solve() { BlockValueSet.erase(e); } else { // More work needs to be done before revisiting. - assert(BlockValueStack.back() != e && "Stack should have been pushed!"); + assert(BlockValueStack.size() == StackSize + 1 && + "Exactly one element should have been pushed!"); } } } diff --git a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp index 8fca569a391b..a5fc267b1883 100644 --- a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp +++ b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp @@ -459,9 +459,24 @@ public: // Record all stack id indices actually used in the summary entries being // written, so that we can compact them in the case of distributed ThinLTO // indexes. - for (auto &CI : FS->callsites()) + for (auto &CI : FS->callsites()) { + // If the stack id list is empty, this callsite info was synthesized for + // a missing tail call frame. Ensure that the callee's GUID gets a value + // id. Normally we only generate these for defined summaries, which in + // the case of distributed ThinLTO is only the functions already defined + // in the module or that we want to import. We don't bother to include + // all the callee symbols as they aren't normally needed in the backend. + // However, for the synthesized callsite infos we do need the callee + // GUID in the backend so that we can correlate the identified callee + // with this callsite info (which for non-tail calls is done by the + // ordering of the callsite infos and verified via stack ids). + if (CI.StackIdIndices.empty()) { + GUIDToValueIdMap[CI.Callee.getGUID()] = ++GlobalValueId; + continue; + } for (auto Idx : CI.StackIdIndices) StackIdIndices.push_back(Idx); + } for (auto &AI : FS->allocs()) for (auto &MIB : AI.MIBs) for (auto Idx : MIB.StackIdIndices) diff --git a/llvm/lib/CodeGen/AssignmentTrackingAnalysis.cpp b/llvm/lib/CodeGen/AssignmentTrackingAnalysis.cpp index ad3ad9928987..eb372655e5f1 100644 --- a/llvm/lib/CodeGen/AssignmentTrackingAnalysis.cpp +++ b/llvm/lib/CodeGen/AssignmentTrackingAnalysis.cpp @@ -1,3 +1,11 @@ +//===-- AssignmentTrackingAnalysis.cpp ------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + #include "llvm/CodeGen/AssignmentTrackingAnalysis.h" #include "LiveDebugValues/LiveDebugValues.h" #include "llvm/ADT/BitVector.h" @@ -2553,6 +2561,32 @@ static void analyzeFunction(Function &Fn, const DataLayout &Layout, } } +FunctionVarLocs +DebugAssignmentTrackingAnalysis::run(Function &F, + FunctionAnalysisManager &FAM) { + if (!isAssignmentTrackingEnabled(*F.getParent())) + return FunctionVarLocs(); + + auto &DL = F.getParent()->getDataLayout(); + + FunctionVarLocsBuilder Builder; + analyzeFunction(F, DL, &Builder); + + // Save these results. + FunctionVarLocs Results; + Results.init(Builder); + return Results; +} + +AnalysisKey DebugAssignmentTrackingAnalysis::Key; + +PreservedAnalyses +DebugAssignmentTrackingPrinterPass::run(Function &F, + FunctionAnalysisManager &FAM) { + FAM.getResult<DebugAssignmentTrackingAnalysis>(F).print(OS, F); + return PreservedAnalyses::all(); +} + bool AssignmentTrackingAnalysis::runOnFunction(Function &F) { if (!isAssignmentTrackingEnabled(*F.getParent())) return false; diff --git a/llvm/lib/CodeGen/BranchFolding.cpp b/llvm/lib/CodeGen/BranchFolding.cpp index 0801296cab49..599b7c72b2f5 100644 --- a/llvm/lib/CodeGen/BranchFolding.cpp +++ b/llvm/lib/CodeGen/BranchFolding.cpp @@ -1363,6 +1363,14 @@ ReoptimizeBlock: MachineBasicBlock *Pred = *(MBB->pred_end()-1); Pred->ReplaceUsesOfBlockWith(MBB, &*FallThrough); } + // Add rest successors of MBB to successors of FallThrough. Those + // successors are not directly reachable via MBB, so it should be + // landing-pad. + for (auto SI = MBB->succ_begin(), SE = MBB->succ_end(); SI != SE; ++SI) + if (*SI != &*FallThrough && !FallThrough->isSuccessor(*SI)) { + assert((*SI)->isEHPad() && "Bad CFG"); + FallThrough->copySuccessor(MBB, SI); + } // If MBB was the target of a jump table, update jump tables to go to the // fallthrough instead. if (MachineJumpTableInfo *MJTI = MF.getJumpTableInfo()) @@ -1624,6 +1632,15 @@ ReoptimizeBlock: } else { DidChange = true; PMBB->ReplaceUsesOfBlockWith(MBB, CurTBB); + // Add rest successors of MBB to successors of CurTBB. Those + // successors are not directly reachable via MBB, so it should be + // landing-pad. + for (auto SI = MBB->succ_begin(), SE = MBB->succ_end(); SI != SE; + ++SI) + if (*SI != CurTBB && !CurTBB->isSuccessor(*SI)) { + assert((*SI)->isEHPad() && "Bad CFG"); + CurTBB->copySuccessor(MBB, SI); + } // If this change resulted in PMBB ending in a conditional // branch where both conditions go to the same destination, // change this to an unconditional branch. diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp index 8b15bdb0aca3..fc2793bd7a13 100644 --- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp @@ -6548,6 +6548,87 @@ bool CombinerHelper::tryFoldBoolSelectToLogic(GSelect *Select, return false; } +bool CombinerHelper::tryFoldSelectToIntMinMax(GSelect *Select, + BuildFnTy &MatchInfo) { + Register DstReg = Select->getReg(0); + Register Cond = Select->getCondReg(); + Register True = Select->getTrueReg(); + Register False = Select->getFalseReg(); + LLT DstTy = MRI.getType(DstReg); + + // We need an G_ICMP on the condition register. + GICmp *Cmp = getOpcodeDef<GICmp>(Cond, MRI); + if (!Cmp) + return false; + + // We want to fold the icmp and replace the select. + if (!MRI.hasOneNonDBGUse(Cmp->getReg(0))) + return false; + + CmpInst::Predicate Pred = Cmp->getCond(); + // We need a larger or smaller predicate for + // canonicalization. + if (CmpInst::isEquality(Pred)) + return false; + + Register CmpLHS = Cmp->getLHSReg(); + Register CmpRHS = Cmp->getRHSReg(); + + // We can swap CmpLHS and CmpRHS for higher hitrate. + if (True == CmpRHS && False == CmpLHS) { + std::swap(CmpLHS, CmpRHS); + Pred = CmpInst::getSwappedPredicate(Pred); + } + + // (icmp X, Y) ? X : Y -> integer minmax. + // see matchSelectPattern in ValueTracking. + // Legality between G_SELECT and integer minmax can differ. + if (True == CmpLHS && False == CmpRHS) { + switch (Pred) { + case ICmpInst::ICMP_UGT: + case ICmpInst::ICMP_UGE: { + if (!isLegalOrBeforeLegalizer({TargetOpcode::G_UMAX, DstTy})) + return false; + MatchInfo = [=](MachineIRBuilder &B) { + B.buildUMax(DstReg, True, False); + }; + return true; + } + case ICmpInst::ICMP_SGT: + case ICmpInst::ICMP_SGE: { + if (!isLegalOrBeforeLegalizer({TargetOpcode::G_SMAX, DstTy})) + return false; + MatchInfo = [=](MachineIRBuilder &B) { + B.buildSMax(DstReg, True, False); + }; + return true; + } + case ICmpInst::ICMP_ULT: + case ICmpInst::ICMP_ULE: { + if (!isLegalOrBeforeLegalizer({TargetOpcode::G_UMIN, DstTy})) + return false; + MatchInfo = [=](MachineIRBuilder &B) { + B.buildUMin(DstReg, True, False); + }; + return true; + } + case ICmpInst::ICMP_SLT: + case ICmpInst::ICMP_SLE: { + if (!isLegalOrBeforeLegalizer({TargetOpcode::G_SMIN, DstTy})) + return false; + MatchInfo = [=](MachineIRBuilder &B) { + B.buildSMin(DstReg, True, False); + }; + return true; + } + default: + return false; + } + } + + return false; +} + bool CombinerHelper::matchSelect(MachineInstr &MI, BuildFnTy &MatchInfo) { GSelect *Select = cast<GSelect>(&MI); @@ -6557,5 +6638,8 @@ bool CombinerHelper::matchSelect(MachineInstr &MI, BuildFnTy &MatchInfo) { if (tryFoldBoolSelectToLogic(Select, MatchInfo)) return true; + if (tryFoldSelectToIntMinMax(Select, MatchInfo)) + return true; + return false; } diff --git a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp index 6708f2baa5ed..8a6bfdc5ee66 100644 --- a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp +++ b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp @@ -1919,6 +1919,8 @@ unsigned IRTranslator::getSimpleIntrinsicOpcode(Intrinsic::ID ID) { return TargetOpcode::G_LROUND; case Intrinsic::llround: return TargetOpcode::G_LLROUND; + case Intrinsic::get_fpenv: + return TargetOpcode::G_GET_FPENV; case Intrinsic::get_fpmode: return TargetOpcode::G_GET_FPMODE; } @@ -2502,6 +2504,16 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID, return true; } + case Intrinsic::set_fpenv: { + Value *FPEnv = CI.getOperand(0); + MIRBuilder.buildInstr(TargetOpcode::G_SET_FPENV, {}, + {getOrCreateVReg(*FPEnv)}); + return true; + } + case Intrinsic::reset_fpenv: { + MIRBuilder.buildInstr(TargetOpcode::G_RESET_FPENV, {}, {}); + return true; + } case Intrinsic::set_fpmode: { Value *FPState = CI.getOperand(0); MIRBuilder.buildInstr(TargetOpcode::G_SET_FPMODE, {}, diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp index def7f6ebeb01..21947a55874a 100644 --- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -958,6 +958,13 @@ static RTLIB::Libcall getStateLibraryFunctionFor(MachineInstr &MI, const TargetLowering &TLI) { RTLIB::Libcall RTLibcall; switch (MI.getOpcode()) { + case TargetOpcode::G_GET_FPENV: + RTLibcall = RTLIB::FEGETENV; + break; + case TargetOpcode::G_SET_FPENV: + case TargetOpcode::G_RESET_FPENV: + RTLibcall = RTLIB::FESETENV; + break; case TargetOpcode::G_GET_FPMODE: RTLibcall = RTLIB::FEGETMODE; break; @@ -1232,18 +1239,21 @@ LegalizerHelper::libcall(MachineInstr &MI, LostDebugLocObserver &LocObserver) { MI.eraseFromParent(); return Result; } + case TargetOpcode::G_GET_FPENV: case TargetOpcode::G_GET_FPMODE: { LegalizeResult Result = createGetStateLibcall(MIRBuilder, MI, LocObserver); if (Result != Legalized) return Result; break; } + case TargetOpcode::G_SET_FPENV: case TargetOpcode::G_SET_FPMODE: { LegalizeResult Result = createSetStateLibcall(MIRBuilder, MI, LocObserver); if (Result != Legalized) return Result; break; } + case TargetOpcode::G_RESET_FPENV: case TargetOpcode::G_RESET_FPMODE: { LegalizeResult Result = createResetStateLibcall(MIRBuilder, MI, LocObserver); diff --git a/llvm/lib/CodeGen/GlobalISel/Localizer.cpp b/llvm/lib/CodeGen/GlobalISel/Localizer.cpp index 55984423e5bc..ae58e135931f 100644 --- a/llvm/lib/CodeGen/GlobalISel/Localizer.cpp +++ b/llvm/lib/CodeGen/GlobalISel/Localizer.cpp @@ -13,6 +13,7 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/STLExtras.h" #include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h" #include "llvm/CodeGen/GlobalISel/Utils.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/TargetLowering.h" @@ -58,18 +59,18 @@ bool Localizer::isLocalUse(MachineOperand &MOUse, const MachineInstr &Def, return InsertMBB == Def.getParent(); } -bool Localizer::isNonUniquePhiValue(MachineOperand &Op) const { - MachineInstr *MI = Op.getParent(); - if (!MI->isPHI()) - return false; +unsigned Localizer::getNumPhiUses(MachineOperand &Op) const { + auto *MI = dyn_cast<GPhi>(&*Op.getParent()); + if (!MI) + return 0; Register SrcReg = Op.getReg(); - for (unsigned Idx = 1; Idx < MI->getNumOperands(); Idx += 2) { - auto &MO = MI->getOperand(Idx); - if (&MO != &Op && MO.isReg() && MO.getReg() == SrcReg) - return true; + unsigned NumUses = 0; + for (unsigned I = 0, NumVals = MI->getNumIncomingValues(); I < NumVals; ++I) { + if (MI->getIncomingValue(I) == SrcReg) + ++NumUses; } - return false; + return NumUses; } bool Localizer::localizeInterBlock(MachineFunction &MF, @@ -108,11 +109,12 @@ bool Localizer::localizeInterBlock(MachineFunction &MF, continue; } - // If the use is a phi operand that's not unique, don't try to localize. - // If we do, we can cause unnecessary instruction bloat by duplicating - // into each predecessor block, when the existing one is sufficient and - // allows for easier optimization later. - if (isNonUniquePhiValue(MOUse)) + // PHIs look like a single user but can use the same register in multiple + // edges, causing remat into each predecessor. Allow this to a certain + // extent. + unsigned NumPhiUses = getNumPhiUses(MOUse); + const unsigned PhiThreshold = 2; // FIXME: Tune this more. + if (NumPhiUses > PhiThreshold) continue; LLVM_DEBUG(dbgs() << "Fixing non-local use\n"); @@ -164,19 +166,22 @@ bool Localizer::localizeIntraBlock(LocalizedSetVecT &LocalizedInstrs) { if (!UseMI.isPHI()) Users.insert(&UseMI); } - // If all the users were PHIs then they're not going to be in our block, - // don't try to move this instruction. - if (Users.empty()) - continue; - MachineBasicBlock::iterator II(MI); - ++II; - while (II != MBB.end() && !Users.count(&*II)) + // If all the users were PHIs then they're not going to be in our block, we + // may still benefit from sinking, especially since the value might be live + // across a call. + if (Users.empty()) { + // Make sure we don't sink in between two terminator sequences by scanning + // forward, not backward. + II = MBB.getFirstTerminatorForward(); + LLVM_DEBUG(dbgs() << "Only phi users: moving inst to end: " << *MI); + } else { ++II; - - assert(II != MBB.end() && "Didn't find the user in the MBB"); - LLVM_DEBUG(dbgs() << "Intra-block: moving " << *MI << " before " << *II - << '\n'); + while (II != MBB.end() && !Users.count(&*II)) + ++II; + assert(II != MBB.end() && "Didn't find the user in the MBB"); + LLVM_DEBUG(dbgs() << "Intra-block: moving " << *MI << " before " << *II); + } MI->removeFromParent(); MBB.insert(II, MI); diff --git a/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp b/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp index 9037f752dc4f..cfc8c28b99e5 100644 --- a/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp +++ b/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp @@ -2403,8 +2403,15 @@ bool InstrRefBasedLDV::mlocJoin( llvm::sort(BlockOrders, Cmp); // Skip entry block. - if (BlockOrders.size() == 0) + if (BlockOrders.size() == 0) { + // FIXME: We don't use assert here to prevent instr-ref-unreachable.mir + // failing. + LLVM_DEBUG(if (!MBB.isEntryBlock()) dbgs() + << "Found not reachable block " << MBB.getFullName() + << " from entry which may lead out of " + "bound access to VarLocs\n"); return false; + } // Step through all machine locations, look at each predecessor and test // whether we can eliminate redundant PHIs. diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 2327664516cc..ecdf9ab9e989 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -4380,7 +4380,7 @@ SDValue DAGCombiner::visitMUL(SDNode *N) { } else { N1IsConst = isa<ConstantSDNode>(N1); if (N1IsConst) { - ConstValue1 = cast<ConstantSDNode>(N1)->getAPIntValue(); + ConstValue1 = N1->getAsAPIntVal(); N1IsOpaqueConst = cast<ConstantSDNode>(N1)->isOpaque(); } } @@ -10999,8 +10999,8 @@ SDValue DAGCombiner::visitBSWAP(SDNode *N) { SDLoc DL(N); // fold (bswap c1) -> c2 - if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) - return DAG.getNode(ISD::BSWAP, DL, VT, N0); + if (SDValue C = DAG.FoldConstantArithmetic(ISD::BSWAP, DL, VT, {N0})) + return C; // fold (bswap (bswap x)) -> x if (N0.getOpcode() == ISD::BSWAP) return N0.getOperand(0); @@ -11059,10 +11059,11 @@ SDValue DAGCombiner::visitBSWAP(SDNode *N) { SDValue DAGCombiner::visitBITREVERSE(SDNode *N) { SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); + SDLoc DL(N); // fold (bitreverse c1) -> c2 - if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) - return DAG.getNode(ISD::BITREVERSE, SDLoc(N), VT, N0); + if (SDValue C = DAG.FoldConstantArithmetic(ISD::BITREVERSE, DL, VT, {N0})) + return C; // fold (bitreverse (bitreverse x)) -> x if (N0.getOpcode() == ISD::BITREVERSE) return N0.getOperand(0); @@ -11072,16 +11073,16 @@ SDValue DAGCombiner::visitBITREVERSE(SDNode *N) { SDValue DAGCombiner::visitCTLZ(SDNode *N) { SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); + SDLoc DL(N); // fold (ctlz c1) -> c2 - if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) - return DAG.getNode(ISD::CTLZ, SDLoc(N), VT, N0); + if (SDValue C = DAG.FoldConstantArithmetic(ISD::CTLZ, DL, VT, {N0})) + return C; // If the value is known never to be zero, switch to the undef version. - if (!LegalOperations || TLI.isOperationLegal(ISD::CTLZ_ZERO_UNDEF, VT)) { + if (!LegalOperations || TLI.isOperationLegal(ISD::CTLZ_ZERO_UNDEF, VT)) if (DAG.isKnownNeverZero(N0)) - return DAG.getNode(ISD::CTLZ_ZERO_UNDEF, SDLoc(N), VT, N0); - } + return DAG.getNode(ISD::CTLZ_ZERO_UNDEF, DL, VT, N0); return SDValue(); } @@ -11089,26 +11090,28 @@ SDValue DAGCombiner::visitCTLZ(SDNode *N) { SDValue DAGCombiner::visitCTLZ_ZERO_UNDEF(SDNode *N) { SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); + SDLoc DL(N); // fold (ctlz_zero_undef c1) -> c2 - if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) - return DAG.getNode(ISD::CTLZ_ZERO_UNDEF, SDLoc(N), VT, N0); + if (SDValue C = + DAG.FoldConstantArithmetic(ISD::CTLZ_ZERO_UNDEF, DL, VT, {N0})) + return C; return SDValue(); } SDValue DAGCombiner::visitCTTZ(SDNode *N) { SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); + SDLoc DL(N); // fold (cttz c1) -> c2 - if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) - return DAG.getNode(ISD::CTTZ, SDLoc(N), VT, N0); + if (SDValue C = DAG.FoldConstantArithmetic(ISD::CTTZ, DL, VT, {N0})) + return C; // If the value is known never to be zero, switch to the undef version. - if (!LegalOperations || TLI.isOperationLegal(ISD::CTTZ_ZERO_UNDEF, VT)) { + if (!LegalOperations || TLI.isOperationLegal(ISD::CTTZ_ZERO_UNDEF, VT)) if (DAG.isKnownNeverZero(N0)) - return DAG.getNode(ISD::CTTZ_ZERO_UNDEF, SDLoc(N), VT, N0); - } + return DAG.getNode(ISD::CTTZ_ZERO_UNDEF, DL, VT, N0); return SDValue(); } @@ -11116,20 +11119,23 @@ SDValue DAGCombiner::visitCTTZ(SDNode *N) { SDValue DAGCombiner::visitCTTZ_ZERO_UNDEF(SDNode *N) { SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); + SDLoc DL(N); // fold (cttz_zero_undef c1) -> c2 - if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) - return DAG.getNode(ISD::CTTZ_ZERO_UNDEF, SDLoc(N), VT, N0); + if (SDValue C = + DAG.FoldConstantArithmetic(ISD::CTTZ_ZERO_UNDEF, DL, VT, {N0})) + return C; return SDValue(); } SDValue DAGCombiner::visitCTPOP(SDNode *N) { SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); + SDLoc DL(N); // fold (ctpop c1) -> c2 - if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) - return DAG.getNode(ISD::CTPOP, SDLoc(N), VT, N0); + if (SDValue C = DAG.FoldConstantArithmetic(ISD::CTPOP, DL, VT, {N0})) + return C; return SDValue(); } @@ -12087,8 +12093,8 @@ SDValue DAGCombiner::foldVSelectOfConstants(SDNode *N) { if (N1Elt.getValueType() != N2Elt.getValueType()) continue; - const APInt &C1 = cast<ConstantSDNode>(N1Elt)->getAPIntValue(); - const APInt &C2 = cast<ConstantSDNode>(N2Elt)->getAPIntValue(); + const APInt &C1 = N1Elt->getAsAPIntVal(); + const APInt &C2 = N2Elt->getAsAPIntVal(); if (C1 != C2 + 1) AllAddOne = false; if (C1 != C2 - 1) @@ -12764,7 +12770,7 @@ static SDValue tryToFoldExtendOfConstant(SDNode *N, const TargetLowering &TLI, SDLoc DL(Op); // Get the constant value and if needed trunc it to the size of the type. // Nodes like build_vector might have constants wider than the scalar type. - APInt C = cast<ConstantSDNode>(Op)->getAPIntValue().zextOrTrunc(EVTBits); + APInt C = Op->getAsAPIntVal().zextOrTrunc(EVTBits); if (Opcode == ISD::SIGN_EXTEND || Opcode == ISD::SIGN_EXTEND_VECTOR_INREG) Elts.push_back(DAG.getConstant(C.sext(VTBits), DL, SVT)); else @@ -13375,9 +13381,9 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG) { SDValue N00 = N0.getOperand(0); EVT ExtVT = cast<VTSDNode>(N0->getOperand(1))->getVT(); - if (N00.getOpcode() == ISD::TRUNCATE && + if ((N00.getOpcode() == ISD::TRUNCATE || TLI.isTruncateFree(N00, ExtVT)) && (!LegalTypes || TLI.isTypeLegal(ExtVT))) { - SDValue T = DAG.getNode(ISD::TRUNCATE, DL, ExtVT, N00.getOperand(0)); + SDValue T = DAG.getNode(ISD::TRUNCATE, DL, ExtVT, N00); return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, T); } } @@ -17942,10 +17948,10 @@ SDValue DAGCombiner::rebuildSetCC(SDValue N) { SDValue AndOp1 = Op0.getOperand(1); if (AndOp1.getOpcode() == ISD::Constant) { - const APInt &AndConst = cast<ConstantSDNode>(AndOp1)->getAPIntValue(); + const APInt &AndConst = AndOp1->getAsAPIntVal(); if (AndConst.isPowerOf2() && - cast<ConstantSDNode>(Op1)->getAPIntValue() == AndConst.logBase2()) { + Op1->getAsAPIntVal() == AndConst.logBase2()) { SDLoc DL(N); return DAG.getSetCC(DL, getSetCCResultType(Op0.getValueType()), Op0, DAG.getConstant(0, DL, Op0.getValueType()), @@ -18266,7 +18272,7 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) { auto *CN = cast<ConstantSDNode>(OtherUses[i]->getOperand(OffsetIdx)); const APInt &Offset0 = CN->getAPIntValue(); - const APInt &Offset1 = cast<ConstantSDNode>(Offset)->getAPIntValue(); + const APInt &Offset1 = Offset->getAsAPIntVal(); int X0 = (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 1) ? -1 : 1; int Y0 = (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 0) ? -1 : 1; int X1 = (AM == ISD::PRE_DEC && !Swapped) ? -1 : 1; @@ -19573,7 +19579,7 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) { // Find the type to narrow it the load / op / store to. SDValue N1 = Value.getOperand(1); unsigned BitWidth = N1.getValueSizeInBits(); - APInt Imm = cast<ConstantSDNode>(N1)->getAPIntValue(); + APInt Imm = N1->getAsAPIntVal(); if (Opc == ISD::AND) Imm ^= APInt::getAllOnes(BitWidth); if (Imm == 0 || Imm.isAllOnes()) @@ -26542,10 +26548,10 @@ SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) { } APInt Bits; - if (isa<ConstantSDNode>(Elt)) - Bits = cast<ConstantSDNode>(Elt)->getAPIntValue(); - else if (isa<ConstantFPSDNode>(Elt)) - Bits = cast<ConstantFPSDNode>(Elt)->getValueAPF().bitcastToAPInt(); + if (auto *Cst = dyn_cast<ConstantSDNode>(Elt)) + Bits = Cst->getAPIntValue(); + else if (auto *CstFP = dyn_cast<ConstantFPSDNode>(Elt)) + Bits = CstFP->getValueAPF().bitcastToAPInt(); else return SDValue(); diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index ec74d2940099..c278bdc07360 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -1854,7 +1854,7 @@ void DAGTypeLegalizer::SplitVecRes_STEP_VECTOR(SDNode *N, SDValue &Lo, // Hi = Lo + (EltCnt * Step) EVT EltVT = Step.getValueType(); - APInt StepVal = cast<ConstantSDNode>(Step)->getAPIntValue(); + APInt StepVal = Step->getAsAPIntVal(); SDValue StartOfHi = DAG.getVScale(dl, EltVT, StepVal * LoVT.getVectorMinNumElements()); StartOfHi = DAG.getSExtOrTrunc(StartOfHi, dl, HiVT.getVectorElementType()); diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index b39be64c06f9..01d31806c844 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -327,7 +327,7 @@ bool ISD::isVectorShrinkable(const SDNode *N, unsigned NewEltSize, if (!isa<ConstantSDNode>(Op)) return false; - APInt C = cast<ConstantSDNode>(Op)->getAPIntValue().trunc(EltSize); + APInt C = Op->getAsAPIntVal().trunc(EltSize); if (Signed && C.trunc(NewEltSize).sext(EltSize) != C) return false; if (!Signed && C.trunc(NewEltSize).zext(EltSize) != C) @@ -7200,7 +7200,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, (N2VT.getVectorMinNumElements() + N3->getAsZExtVal()) <= VT.getVectorMinNumElements()) && "Insert subvector overflow!"); - assert(cast<ConstantSDNode>(N3)->getAPIntValue().getBitWidth() == + assert(N3->getAsAPIntVal().getBitWidth() == TLI->getVectorIdxTy(getDataLayout()).getFixedSizeInBits() && "Constant index for INSERT_SUBVECTOR has an invalid size"); @@ -9304,7 +9304,7 @@ SDValue SelectionDAG::getGatherVP(SDVTList VTs, EVT VT, const SDLoc &dl, N->getValueType(0).getVectorElementCount()) && "Vector width mismatch between index and data"); assert(isa<ConstantSDNode>(N->getScale()) && - cast<ConstantSDNode>(N->getScale())->getAPIntValue().isPowerOf2() && + N->getScale()->getAsAPIntVal().isPowerOf2() && "Scale should be a constant power of 2"); CSEMap.InsertNode(N, IP); @@ -9348,7 +9348,7 @@ SDValue SelectionDAG::getScatterVP(SDVTList VTs, EVT VT, const SDLoc &dl, N->getValue().getValueType().getVectorElementCount()) && "Vector width mismatch between index and data"); assert(isa<ConstantSDNode>(N->getScale()) && - cast<ConstantSDNode>(N->getScale())->getAPIntValue().isPowerOf2() && + N->getScale()->getAsAPIntVal().isPowerOf2() && "Scale should be a constant power of 2"); CSEMap.InsertNode(N, IP); @@ -9490,7 +9490,7 @@ SDValue SelectionDAG::getMaskedGather(SDVTList VTs, EVT MemVT, const SDLoc &dl, N->getValueType(0).getVectorElementCount()) && "Vector width mismatch between index and data"); assert(isa<ConstantSDNode>(N->getScale()) && - cast<ConstantSDNode>(N->getScale())->getAPIntValue().isPowerOf2() && + N->getScale()->getAsAPIntVal().isPowerOf2() && "Scale should be a constant power of 2"); CSEMap.InsertNode(N, IP); @@ -9536,7 +9536,7 @@ SDValue SelectionDAG::getMaskedScatter(SDVTList VTs, EVT MemVT, const SDLoc &dl, N->getValue().getValueType().getVectorElementCount()) && "Vector width mismatch between index and data"); assert(isa<ConstantSDNode>(N->getScale()) && - cast<ConstantSDNode>(N->getScale())->getAPIntValue().isPowerOf2() && + N->getScale()->getAsAPIntVal().isPowerOf2() && "Scale should be a constant power of 2"); CSEMap.InsertNode(N, IP); diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp index 9acfc76d7d5e..678d273e4bd6 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -2697,9 +2697,14 @@ LLVM_ATTRIBUTE_ALWAYS_INLINE static bool CheckChildSame( /// CheckPatternPredicate - Implements OP_CheckPatternPredicate. LLVM_ATTRIBUTE_ALWAYS_INLINE static bool -CheckPatternPredicate(const unsigned char *MatcherTable, unsigned &MatcherIndex, - const SelectionDAGISel &SDISel, bool TwoBytePredNo) { - unsigned PredNo = MatcherTable[MatcherIndex++]; +CheckPatternPredicate(unsigned Opcode, const unsigned char *MatcherTable, + unsigned &MatcherIndex, const SelectionDAGISel &SDISel) { + bool TwoBytePredNo = + Opcode == SelectionDAGISel::OPC_CheckPatternPredicateTwoByte; + unsigned PredNo = + TwoBytePredNo || Opcode == SelectionDAGISel::OPC_CheckPatternPredicate + ? MatcherTable[MatcherIndex++] + : Opcode - SelectionDAGISel::OPC_CheckPatternPredicate0; if (TwoBytePredNo) PredNo |= MatcherTable[MatcherIndex++] << 8; return SDISel.CheckPatternPredicate(PredNo); @@ -2851,10 +2856,16 @@ static unsigned IsPredicateKnownToFail(const unsigned char *Table, Table[Index-1] - SelectionDAGISel::OPC_CheckChild0Same); return Index; case SelectionDAGISel::OPC_CheckPatternPredicate: + case SelectionDAGISel::OPC_CheckPatternPredicate0: + case SelectionDAGISel::OPC_CheckPatternPredicate1: case SelectionDAGISel::OPC_CheckPatternPredicate2: - Result = !::CheckPatternPredicate( - Table, Index, SDISel, - Table[Index - 1] == SelectionDAGISel::OPC_CheckPatternPredicate2); + case SelectionDAGISel::OPC_CheckPatternPredicate3: + case SelectionDAGISel::OPC_CheckPatternPredicate4: + case SelectionDAGISel::OPC_CheckPatternPredicate5: + case SelectionDAGISel::OPC_CheckPatternPredicate6: + case SelectionDAGISel::OPC_CheckPatternPredicate7: + case SelectionDAGISel::OPC_CheckPatternPredicateTwoByte: + Result = !::CheckPatternPredicate(Opcode, Table, Index, SDISel); return Index; case SelectionDAGISel::OPC_CheckPredicate: Result = !::CheckNodePredicate(Table, Index, SDISel, N.getNode()); @@ -3336,9 +3347,16 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch, continue; case OPC_CheckPatternPredicate: + case OPC_CheckPatternPredicate0: + case OPC_CheckPatternPredicate1: case OPC_CheckPatternPredicate2: - if (!::CheckPatternPredicate(MatcherTable, MatcherIndex, *this, - Opcode == OPC_CheckPatternPredicate2)) + case OPC_CheckPatternPredicate3: + case OPC_CheckPatternPredicate4: + case OPC_CheckPatternPredicate5: + case OPC_CheckPatternPredicate6: + case OPC_CheckPatternPredicate7: + case OPC_CheckPatternPredicateTwoByte: + if (!::CheckPatternPredicate(Opcode, MatcherTable, MatcherIndex, *this)) break; continue; case OPC_CheckPredicate: @@ -3358,8 +3376,18 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch, break; continue; } - case OPC_CheckComplexPat: { - unsigned CPNum = MatcherTable[MatcherIndex++]; + case OPC_CheckComplexPat: + case OPC_CheckComplexPat0: + case OPC_CheckComplexPat1: + case OPC_CheckComplexPat2: + case OPC_CheckComplexPat3: + case OPC_CheckComplexPat4: + case OPC_CheckComplexPat5: + case OPC_CheckComplexPat6: + case OPC_CheckComplexPat7: { + unsigned CPNum = Opcode == OPC_CheckComplexPat + ? MatcherTable[MatcherIndex++] + : Opcode - OPC_CheckComplexPat0; unsigned RecNo = MatcherTable[MatcherIndex++]; assert(RecNo < RecordedNodes.size() && "Invalid CheckComplexPat"); diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index e3e3e375d6a6..3bbef6e6d85d 100644 --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -1108,7 +1108,7 @@ bool TargetLowering::SimplifyDemandedBits( if (Op.getOpcode() == ISD::Constant) { // We know all of the bits for a constant! - Known = KnownBits::makeConstant(cast<ConstantSDNode>(Op)->getAPIntValue()); + Known = KnownBits::makeConstant(Op->getAsAPIntVal()); return false; } @@ -6350,8 +6350,7 @@ SDValue TargetLowering::BuildUDIV(SDNode *N, SelectionDAG &DAG, LeadingZeros = DAG.computeKnownBits(N0).countMinLeadingZeros(); // UnsignedDivisionByConstantInfo doesn't work correctly if leading zeros in // the dividend exceeds the leading zeros for the divisor. - LeadingZeros = std::min( - LeadingZeros, cast<ConstantSDNode>(N1)->getAPIntValue().countl_zero()); + LeadingZeros = std::min(LeadingZeros, N1->getAsAPIntVal().countl_zero()); } bool UseNPQ = false, UsePreShift = false, UsePostShift = false; diff --git a/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp index 6e69dc66429d..a69b71451736 100644 --- a/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp +++ b/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp @@ -1669,9 +1669,18 @@ static int getSelectionForCOFF(const GlobalValue *GV) { MCSection *TargetLoweringObjectFileCOFF::getExplicitSectionGlobal( const GlobalObject *GO, SectionKind Kind, const TargetMachine &TM) const { + StringRef Name = GO->getSection(); + if (Name == getInstrProfSectionName(IPSK_covmap, Triple::COFF, + /*AddSegmentInfo=*/false) || + Name == getInstrProfSectionName(IPSK_covfun, Triple::COFF, + /*AddSegmentInfo=*/false) || + Name == getInstrProfSectionName(IPSK_covdata, Triple::COFF, + /*AddSegmentInfo=*/false) || + Name == getInstrProfSectionName(IPSK_covname, Triple::COFF, + /*AddSegmentInfo=*/false)) + Kind = SectionKind::getMetadata(); int Selection = 0; unsigned Characteristics = getCOFFSectionFlags(Kind, TM); - StringRef Name = GO->getSection(); StringRef COMDATSymName = ""; if (GO->hasComdat()) { Selection = getSelectionForCOFF(GO); diff --git a/llvm/lib/DWARFLinker/Classic/DWARFLinker.cpp b/llvm/lib/DWARFLinker/Classic/DWARFLinker.cpp index 8d76c3bcf672..ac2c26e52240 100644 --- a/llvm/lib/DWARFLinker/Classic/DWARFLinker.cpp +++ b/llvm/lib/DWARFLinker/Classic/DWARFLinker.cpp @@ -14,6 +14,7 @@ #include "llvm/CodeGen/NonRelocatableStringpool.h" #include "llvm/DWARFLinker/Classic/DWARFLinkerDeclContext.h" #include "llvm/DWARFLinker/Classic/DWARFStreamer.h" +#include "llvm/DWARFLinker/Utils.h" #include "llvm/DebugInfo/DWARF/DWARFAbbreviationDeclaration.h" #include "llvm/DebugInfo/DWARF/DWARFAcceleratorTable.h" #include "llvm/DebugInfo/DWARF/DWARFContext.h" @@ -176,20 +177,6 @@ static void resolveRelativeObjectPath(SmallVectorImpl<char> &Buf, DWARFDie CU) { sys::path::append(Buf, dwarf::toString(CU.find(dwarf::DW_AT_comp_dir), "")); } -/// Make a best effort to guess the -/// Xcode.app/Contents/Developer/Toolchains/ path from an SDK path. -static SmallString<128> guessToolchainBaseDir(StringRef SysRoot) { - SmallString<128> Result; - // Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs/MacOSX.sdk - StringRef Base = sys::path::parent_path(SysRoot); - if (sys::path::filename(Base) != "SDKs") - return Result; - Base = sys::path::parent_path(Base); - Result = Base; - Result += "/Toolchains"; - return Result; -} - /// Collect references to parseable Swift interfaces in imported /// DW_TAG_module blocks. static void analyzeImportedModule( diff --git a/llvm/lib/DWARFLinker/Parallel/AcceleratorRecordsSaver.cpp b/llvm/lib/DWARFLinker/Parallel/AcceleratorRecordsSaver.cpp index 3af574c70561..9af222354551 100644 --- a/llvm/lib/DWARFLinker/Parallel/AcceleratorRecordsSaver.cpp +++ b/llvm/lib/DWARFLinker/Parallel/AcceleratorRecordsSaver.cpp @@ -7,7 +7,7 @@ //===----------------------------------------------------------------------===// #include "AcceleratorRecordsSaver.h" -#include "Utils.h" +#include "llvm/DWARFLinker/Utils.h" #include "llvm/DebugInfo/DWARF/DWARFAcceleratorTable.h" #include "llvm/Support/DJB.h" diff --git a/llvm/lib/DWARFLinker/Parallel/DWARFLinkerCompileUnit.cpp b/llvm/lib/DWARFLinker/Parallel/DWARFLinkerCompileUnit.cpp index ffcf9f365aec..6ed284a66a85 100644 --- a/llvm/lib/DWARFLinker/Parallel/DWARFLinkerCompileUnit.cpp +++ b/llvm/lib/DWARFLinker/Parallel/DWARFLinkerCompileUnit.cpp @@ -12,6 +12,7 @@ #include "DIEGenerator.h" #include "DependencyTracker.h" #include "SyntheticTypeNameBuilder.h" +#include "llvm/DWARFLinker/Utils.h" #include "llvm/DebugInfo/DWARF/DWARFDebugAbbrev.h" #include "llvm/DebugInfo/DWARF/DWARFDebugMacro.h" #include "llvm/Support/DJB.h" @@ -247,20 +248,6 @@ void CompileUnit::cleanupDataAfterClonning() { getOrigUnit().clear(); } -/// Make a best effort to guess the -/// Xcode.app/Contents/Developer/Toolchains/ path from an SDK path. -static SmallString<128> guessToolchainBaseDir(StringRef SysRoot) { - SmallString<128> Result; - // Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs/MacOSX.sdk - StringRef Base = sys::path::parent_path(SysRoot); - if (sys::path::filename(Base) != "SDKs") - return Result; - Base = sys::path::parent_path(Base); - Result = Base; - Result += "/Toolchains"; - return Result; -} - /// Collect references to parseable Swift interfaces in imported /// DW_TAG_module blocks. void CompileUnit::analyzeImportedModule(const DWARFDebugInfoEntry *DieEntry) { @@ -1698,14 +1685,6 @@ CompileUnit::getDirAndFilenameFromLineTable( return getDirAndFilenameFromLineTable(FileIdx); } -static bool isPathAbsoluteOnWindowsOrPosix(const Twine &Path) { - // Debug info can contain paths from any OS, not necessarily - // an OS we're currently running on. Moreover different compilation units can - // be compiled on different operating systems and linked together later. - return sys::path::is_absolute(Path, sys::path::Style::posix) || - sys::path::is_absolute(Path, sys::path::Style::windows); -} - std::optional<std::pair<StringRef, StringRef>> CompileUnit::getDirAndFilenameFromLineTable(uint64_t FileIdx) { FileNamesCache::iterator FileData = FileNames.find(FileIdx); diff --git a/llvm/lib/DWARFLinker/Parallel/DWARFLinkerImpl.cpp b/llvm/lib/DWARFLinker/Parallel/DWARFLinkerImpl.cpp index bb59cbfdb347..b0b819cf9778 100644 --- a/llvm/lib/DWARFLinker/Parallel/DWARFLinkerImpl.cpp +++ b/llvm/lib/DWARFLinker/Parallel/DWARFLinkerImpl.cpp @@ -9,7 +9,7 @@ #include "DWARFLinkerImpl.h" #include "DIEGenerator.h" #include "DependencyTracker.h" -#include "Utils.h" +#include "llvm/DWARFLinker/Utils.h" #include "llvm/DebugInfo/DWARF/DWARFDebugAbbrev.h" #include "llvm/Support/FormatVariadic.h" #include "llvm/Support/Parallel.h" diff --git a/llvm/lib/DWARFLinker/Parallel/DebugLineSectionEmitter.h b/llvm/lib/DWARFLinker/Parallel/DebugLineSectionEmitter.h index 545d04cfbe43..1839164dcec1 100644 --- a/llvm/lib/DWARFLinker/Parallel/DebugLineSectionEmitter.h +++ b/llvm/lib/DWARFLinker/Parallel/DebugLineSectionEmitter.h @@ -193,24 +193,39 @@ private: Section.emitString(Include.getForm(), *IncludeStr); } + bool HasChecksums = P.ContentTypes.HasMD5; + bool HasInlineSources = P.ContentTypes.HasSource; + + dwarf::Form FileNameForm = dwarf::DW_FORM_string; + dwarf::Form LLVMSourceForm = dwarf::DW_FORM_string; + if (P.FileNames.empty()) { // file_name_entry_format_count (ubyte). Section.emitIntVal(0, 1); } else { + FileNameForm = P.FileNames[0].Name.getForm(); + LLVMSourceForm = P.FileNames[0].Source.getForm(); + // file_name_entry_format_count (ubyte). - Section.emitIntVal(2 + (P.ContentTypes.HasMD5 ? 1 : 0), 1); + Section.emitIntVal( + 2 + (HasChecksums ? 1 : 0) + (HasInlineSources ? 1 : 0), 1); // file_name_entry_format (sequence of ULEB128 pairs). encodeULEB128(dwarf::DW_LNCT_path, Section.OS); - encodeULEB128(P.FileNames[0].Name.getForm(), Section.OS); + encodeULEB128(FileNameForm, Section.OS); encodeULEB128(dwarf::DW_LNCT_directory_index, Section.OS); encodeULEB128(dwarf::DW_FORM_data1, Section.OS); - if (P.ContentTypes.HasMD5) { + if (HasChecksums) { encodeULEB128(dwarf::DW_LNCT_MD5, Section.OS); encodeULEB128(dwarf::DW_FORM_data16, Section.OS); } + + if (HasInlineSources) { + encodeULEB128(dwarf::DW_LNCT_LLVM_source, Section.OS); + encodeULEB128(LLVMSourceForm, Section.OS); + } } // file_names_count (ULEB128). @@ -226,14 +241,27 @@ private: // A null-terminated string containing the full or relative path name of a // source file. - Section.emitString(File.Name.getForm(), *FileNameStr); + Section.emitString(FileNameForm, *FileNameStr); Section.emitIntVal(File.DirIdx, 1); - if (P.ContentTypes.HasMD5) { + if (HasChecksums) { + assert((File.Checksum.size() == 16) && + "checksum size is not equal to 16 bytes."); Section.emitBinaryData( StringRef(reinterpret_cast<const char *>(File.Checksum.data()), File.Checksum.size())); } + + if (HasInlineSources) { + std::optional<const char *> FileSourceStr = + dwarf::toString(File.Source); + if (!FileSourceStr) { + U.warn("cann't read string from line table."); + return; + } + + Section.emitString(LLVMSourceForm, *FileSourceStr); + } } } diff --git a/llvm/lib/DWARFLinker/Utils.cpp b/llvm/lib/DWARFLinker/Utils.cpp index e8b0fe303aae..52508c998532 100644 --- a/llvm/lib/DWARFLinker/Utils.cpp +++ b/llvm/lib/DWARFLinker/Utils.cpp @@ -5,3 +5,5 @@ // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// + +#include "llvm/DWARFLinker/Utils.h" diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp index aeaca21a99cc..b6ad85b2d46e 100644 --- a/llvm/lib/IR/Verifier.cpp +++ b/llvm/lib/IR/Verifier.cpp @@ -96,6 +96,7 @@ #include "llvm/IR/IntrinsicsAArch64.h" #include "llvm/IR/IntrinsicsAMDGPU.h" #include "llvm/IR/IntrinsicsARM.h" +#include "llvm/IR/IntrinsicsNVPTX.h" #include "llvm/IR/IntrinsicsWebAssembly.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Metadata.h" @@ -6031,6 +6032,16 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) { "Value for inactive lanes must be a VGPR function argument", &Call); break; } + case Intrinsic::nvvm_setmaxnreg_inc_sync_aligned_u32: + case Intrinsic::nvvm_setmaxnreg_dec_sync_aligned_u32: { + Value *V = Call.getArgOperand(0); + unsigned RegCount = cast<ConstantInt>(V)->getZExtValue(); + Check(RegCount % 8 == 0, + "reg_count argument to nvvm.setmaxnreg must be in multiples of 8"); + Check((RegCount >= 24 && RegCount <= 256), + "reg_count argument to nvvm.setmaxnreg must be within [24, 256]"); + break; + } case Intrinsic::experimental_convergence_entry: LLVM_FALLTHROUGH; case Intrinsic::experimental_convergence_anchor: diff --git a/llvm/lib/MC/MCParser/ELFAsmParser.cpp b/llvm/lib/MC/MCParser/ELFAsmParser.cpp index 93e1d2f44b8c..d4c4bcb85648 100644 --- a/llvm/lib/MC/MCParser/ELFAsmParser.cpp +++ b/llvm/lib/MC/MCParser/ELFAsmParser.cpp @@ -616,12 +616,12 @@ bool ELFAsmParser::ParseSectionArguments(bool IsPush, SMLoc loc) { if (Mergeable) if (parseMergeSize(Size)) return true; - if (Group) - if (parseGroup(GroupName, IsComdat)) - return true; if (Flags & ELF::SHF_LINK_ORDER) if (parseLinkedToSym(LinkedToSym)) return true; + if (Group) + if (parseGroup(GroupName, IsComdat)) + return true; if (maybeParseUniqueID(UniqueID)) return true; } diff --git a/llvm/lib/MC/MCSectionELF.cpp b/llvm/lib/MC/MCSectionELF.cpp index 95fdf3352207..b1efb839ba75 100644 --- a/llvm/lib/MC/MCSectionELF.cpp +++ b/llvm/lib/MC/MCSectionELF.cpp @@ -90,8 +90,6 @@ void MCSectionELF::printSwitchToSection(const MCAsmInfo &MAI, const Triple &T, OS << 'e'; if (Flags & ELF::SHF_EXECINSTR) OS << 'x'; - if (Flags & ELF::SHF_GROUP) - OS << 'G'; if (Flags & ELF::SHF_WRITE) OS << 'w'; if (Flags & ELF::SHF_MERGE) @@ -102,6 +100,8 @@ void MCSectionELF::printSwitchToSection(const MCAsmInfo &MAI, const Triple &T, OS << 'T'; if (Flags & ELF::SHF_LINK_ORDER) OS << 'o'; + if (Flags & ELF::SHF_GROUP) + OS << 'G'; if (Flags & ELF::SHF_GNU_RETAIN) OS << 'R'; @@ -183,13 +183,6 @@ void MCSectionELF::printSwitchToSection(const MCAsmInfo &MAI, const Triple &T, OS << "," << EntrySize; } - if (Flags & ELF::SHF_GROUP) { - OS << ","; - printName(OS, Group.getPointer()->getName()); - if (isComdat()) - OS << ",comdat"; - } - if (Flags & ELF::SHF_LINK_ORDER) { OS << ","; if (LinkedToSym) @@ -198,6 +191,13 @@ void MCSectionELF::printSwitchToSection(const MCAsmInfo &MAI, const Triple &T, OS << '0'; } + if (Flags & ELF::SHF_GROUP) { + OS << ","; + printName(OS, Group.getPointer()->getName()); + if (isComdat()) + OS << ",comdat"; + } + if (isUnique()) OS << ",unique," << UniqueID; diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp index 27bfe12127cc..bfc97d5464c0 100644 --- a/llvm/lib/Passes/PassBuilder.cpp +++ b/llvm/lib/Passes/PassBuilder.cpp @@ -72,6 +72,7 @@ #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Analysis/TypeBasedAliasAnalysis.h" #include "llvm/Analysis/UniformityAnalysis.h" +#include "llvm/CodeGen/AssignmentTrackingAnalysis.h" #include "llvm/CodeGen/BasicBlockSectionsProfileReader.h" #include "llvm/CodeGen/CallBrPrepare.h" #include "llvm/CodeGen/CodeGenPrepare.h" diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def index bda36bd8c107..0b53b59787dd 100644 --- a/llvm/lib/Passes/PassRegistry.def +++ b/llvm/lib/Passes/PassRegistry.def @@ -235,6 +235,7 @@ FUNCTION_ANALYSIS("block-freq", BlockFrequencyAnalysis()) FUNCTION_ANALYSIS("branch-prob", BranchProbabilityAnalysis()) FUNCTION_ANALYSIS("cycles", CycleAnalysis()) FUNCTION_ANALYSIS("da", DependenceAnalysis()) +FUNCTION_ANALYSIS("debug-ata", DebugAssignmentTrackingAnalysis()) FUNCTION_ANALYSIS("demanded-bits", DemandedBitsAnalysis()) FUNCTION_ANALYSIS("domfrontier", DominanceFrontierAnalysis()) FUNCTION_ANALYSIS("domtree", DominatorTreeAnalysis()) @@ -384,6 +385,7 @@ FUNCTION_PASS("print<branch-prob>", BranchProbabilityPrinterPass(dbgs())) FUNCTION_PASS("print<cost-model>", CostModelPrinterPass(dbgs())) FUNCTION_PASS("print<cycles>", CycleInfoPrinterPass(dbgs())) FUNCTION_PASS("print<da>", DependenceAnalysisPrinterPass(dbgs())) +FUNCTION_PASS("print<debug-ata>", DebugAssignmentTrackingPrinterPass(dbgs())) FUNCTION_PASS("print<delinearization>", DelinearizationPrinterPass(dbgs())) FUNCTION_PASS("print<demanded-bits>", DemandedBitsPrinterPass(dbgs())) FUNCTION_PASS("print<domfrontier>", DominanceFrontierPrinterPass(dbgs())) @@ -421,7 +423,7 @@ FUNCTION_PASS("structurizecfg", StructurizeCFGPass()) FUNCTION_PASS("tailcallelim", TailCallElimPass()) FUNCTION_PASS("tlshoist", TLSVariableHoistPass()) FUNCTION_PASS("transform-warning", WarnMissedTransformationsPass()) -FUNCTION_PASS("trigger-verifier-error", TriggerVerifierErrorPass()) +FUNCTION_PASS("trigger-verifier-error", TriggerVerifierErrorPass()) FUNCTION_PASS("tsan", ThreadSanitizerPass()) FUNCTION_PASS("typepromotion", TypePromotionPass(TM)) FUNCTION_PASS("unify-loop-exits", UnifyLoopExitsPass()) diff --git a/llvm/lib/Support/RISCVISAInfo.cpp b/llvm/lib/Support/RISCVISAInfo.cpp index 70f531e40b90..390d950486a7 100644 --- a/llvm/lib/Support/RISCVISAInfo.cpp +++ b/llvm/lib/Support/RISCVISAInfo.cpp @@ -24,16 +24,11 @@ using namespace llvm; namespace { -/// Represents the major and version number components of a RISC-V extension -struct RISCVExtensionVersion { - unsigned Major; - unsigned Minor; -}; struct RISCVSupportedExtension { const char *Name; /// Supported version. - RISCVExtensionVersion Version; + RISCVISAInfo::ExtensionVersion Version; bool operator<(const RISCVSupportedExtension &RHS) const { return StringRef(Name) < StringRef(RHS.Name); @@ -50,161 +45,161 @@ static const char *RISCVGImplications[] = { // NOTE: This table should be sorted alphabetically by extension name. static const RISCVSupportedExtension SupportedExtensions[] = { - {"a", RISCVExtensionVersion{2, 1}}, - {"c", RISCVExtensionVersion{2, 0}}, - {"d", RISCVExtensionVersion{2, 2}}, - {"e", RISCVExtensionVersion{2, 0}}, - {"f", RISCVExtensionVersion{2, 2}}, - {"h", RISCVExtensionVersion{1, 0}}, - {"i", RISCVExtensionVersion{2, 1}}, - {"m", RISCVExtensionVersion{2, 0}}, - - {"smaia", RISCVExtensionVersion{1, 0}}, - {"ssaia", RISCVExtensionVersion{1, 0}}, - {"svinval", RISCVExtensionVersion{1, 0}}, - {"svnapot", RISCVExtensionVersion{1, 0}}, - {"svpbmt", RISCVExtensionVersion{1, 0}}, - - {"v", RISCVExtensionVersion{1, 0}}, + {"a", {2, 1}}, + {"c", {2, 0}}, + {"d", {2, 2}}, + {"e", {2, 0}}, + {"f", {2, 2}}, + {"h", {1, 0}}, + {"i", {2, 1}}, + {"m", {2, 0}}, + + {"smaia", {1, 0}}, + {"ssaia", {1, 0}}, + {"svinval", {1, 0}}, + {"svnapot", {1, 0}}, + {"svpbmt", {1, 0}}, + + {"v", {1, 0}}, // vendor-defined ('X') extensions - {"xcvalu", RISCVExtensionVersion{1, 0}}, - {"xcvbi", RISCVExtensionVersion{1, 0}}, - {"xcvbitmanip", RISCVExtensionVersion{1, 0}}, - {"xcvelw", RISCVExtensionVersion{1, 0}}, - {"xcvmac", RISCVExtensionVersion{1, 0}}, - {"xcvmem", RISCVExtensionVersion{1, 0}}, - {"xcvsimd", RISCVExtensionVersion{1, 0}}, - {"xsfvcp", RISCVExtensionVersion{1, 0}}, - {"xsfvfnrclipxfqf", RISCVExtensionVersion{1, 0}}, - {"xsfvfwmaccqqq", RISCVExtensionVersion{1, 0}}, - {"xsfvqmaccdod", RISCVExtensionVersion{1, 0}}, - {"xsfvqmaccqoq", RISCVExtensionVersion{1, 0}}, - {"xtheadba", RISCVExtensionVersion{1, 0}}, - {"xtheadbb", RISCVExtensionVersion{1, 0}}, - {"xtheadbs", RISCVExtensionVersion{1, 0}}, - {"xtheadcmo", RISCVExtensionVersion{1, 0}}, - {"xtheadcondmov", RISCVExtensionVersion{1, 0}}, - {"xtheadfmemidx", RISCVExtensionVersion{1, 0}}, - {"xtheadmac", RISCVExtensionVersion{1, 0}}, - {"xtheadmemidx", RISCVExtensionVersion{1, 0}}, - {"xtheadmempair", RISCVExtensionVersion{1, 0}}, - {"xtheadsync", RISCVExtensionVersion{1, 0}}, - {"xtheadvdot", RISCVExtensionVersion{1, 0}}, - {"xventanacondops", RISCVExtensionVersion{1, 0}}, - - {"zawrs", RISCVExtensionVersion{1, 0}}, - - {"zba", RISCVExtensionVersion{1, 0}}, - {"zbb", RISCVExtensionVersion{1, 0}}, - {"zbc", RISCVExtensionVersion{1, 0}}, - {"zbkb", RISCVExtensionVersion{1, 0}}, - {"zbkc", RISCVExtensionVersion{1, 0}}, - {"zbkx", RISCVExtensionVersion{1, 0}}, - {"zbs", RISCVExtensionVersion{1, 0}}, - - {"zca", RISCVExtensionVersion{1, 0}}, - {"zcb", RISCVExtensionVersion{1, 0}}, - {"zcd", RISCVExtensionVersion{1, 0}}, - {"zce", RISCVExtensionVersion{1, 0}}, - {"zcf", RISCVExtensionVersion{1, 0}}, - {"zcmp", RISCVExtensionVersion{1, 0}}, - {"zcmt", RISCVExtensionVersion{1, 0}}, - - {"zdinx", RISCVExtensionVersion{1, 0}}, - - {"zfa", RISCVExtensionVersion{1, 0}}, - {"zfh", RISCVExtensionVersion{1, 0}}, - {"zfhmin", RISCVExtensionVersion{1, 0}}, - {"zfinx", RISCVExtensionVersion{1, 0}}, - - {"zhinx", RISCVExtensionVersion{1, 0}}, - {"zhinxmin", RISCVExtensionVersion{1, 0}}, - - {"zicbom", RISCVExtensionVersion{1, 0}}, - {"zicbop", RISCVExtensionVersion{1, 0}}, - {"zicboz", RISCVExtensionVersion{1, 0}}, - {"zicntr", RISCVExtensionVersion{2, 0}}, - {"zicsr", RISCVExtensionVersion{2, 0}}, - {"zifencei", RISCVExtensionVersion{2, 0}}, - {"zihintntl", RISCVExtensionVersion{1, 0}}, - {"zihintpause", RISCVExtensionVersion{2, 0}}, - {"zihpm", RISCVExtensionVersion{2, 0}}, - - {"zk", RISCVExtensionVersion{1, 0}}, - {"zkn", RISCVExtensionVersion{1, 0}}, - {"zknd", RISCVExtensionVersion{1, 0}}, - {"zkne", RISCVExtensionVersion{1, 0}}, - {"zknh", RISCVExtensionVersion{1, 0}}, - {"zkr", RISCVExtensionVersion{1, 0}}, - {"zks", RISCVExtensionVersion{1, 0}}, - {"zksed", RISCVExtensionVersion{1, 0}}, - {"zksh", RISCVExtensionVersion{1, 0}}, - {"zkt", RISCVExtensionVersion{1, 0}}, - - {"zmmul", RISCVExtensionVersion{1, 0}}, - - {"zvbb", RISCVExtensionVersion{1, 0}}, - {"zvbc", RISCVExtensionVersion{1, 0}}, - - {"zve32f", RISCVExtensionVersion{1, 0}}, - {"zve32x", RISCVExtensionVersion{1, 0}}, - {"zve64d", RISCVExtensionVersion{1, 0}}, - {"zve64f", RISCVExtensionVersion{1, 0}}, - {"zve64x", RISCVExtensionVersion{1, 0}}, - - {"zvfh", RISCVExtensionVersion{1, 0}}, - {"zvfhmin", RISCVExtensionVersion{1, 0}}, + {"xcvalu", {1, 0}}, + {"xcvbi", {1, 0}}, + {"xcvbitmanip", {1, 0}}, + {"xcvelw", {1, 0}}, + {"xcvmac", {1, 0}}, + {"xcvmem", {1, 0}}, + {"xcvsimd", {1, 0}}, + {"xsfvcp", {1, 0}}, + {"xsfvfnrclipxfqf", {1, 0}}, + {"xsfvfwmaccqqq", {1, 0}}, + {"xsfvqmaccdod", {1, 0}}, + {"xsfvqmaccqoq", {1, 0}}, + {"xtheadba", {1, 0}}, + {"xtheadbb", {1, 0}}, + {"xtheadbs", {1, 0}}, + {"xtheadcmo", {1, 0}}, + {"xtheadcondmov", {1, 0}}, + {"xtheadfmemidx", {1, 0}}, + {"xtheadmac", {1, 0}}, + {"xtheadmemidx", {1, 0}}, + {"xtheadmempair", {1, 0}}, + {"xtheadsync", {1, 0}}, + {"xtheadvdot", {1, 0}}, + {"xventanacondops", {1, 0}}, + + {"zawrs", {1, 0}}, + + {"zba", {1, 0}}, + {"zbb", {1, 0}}, + {"zbc", {1, 0}}, + {"zbkb", {1, 0}}, + {"zbkc", {1, 0}}, + {"zbkx", {1, 0}}, + {"zbs", {1, 0}}, + + {"zca", {1, 0}}, + {"zcb", {1, 0}}, + {"zcd", {1, 0}}, + {"zce", {1, 0}}, + {"zcf", {1, 0}}, + {"zcmp", {1, 0}}, + {"zcmt", {1, 0}}, + + {"zdinx", {1, 0}}, + + {"zfa", {1, 0}}, + {"zfh", {1, 0}}, + {"zfhmin", {1, 0}}, + {"zfinx", {1, 0}}, + + {"zhinx", {1, 0}}, + {"zhinxmin", {1, 0}}, + + {"zicbom", {1, 0}}, + {"zicbop", {1, 0}}, + {"zicboz", {1, 0}}, + {"zicntr", {2, 0}}, + {"zicsr", {2, 0}}, + {"zifencei", {2, 0}}, + {"zihintntl", {1, 0}}, + {"zihintpause", {2, 0}}, + {"zihpm", {2, 0}}, + + {"zk", {1, 0}}, + {"zkn", {1, 0}}, + {"zknd", {1, 0}}, + {"zkne", {1, 0}}, + {"zknh", {1, 0}}, + {"zkr", {1, 0}}, + {"zks", {1, 0}}, + {"zksed", {1, 0}}, + {"zksh", {1, 0}}, + {"zkt", {1, 0}}, + + {"zmmul", {1, 0}}, + + {"zvbb", {1, 0}}, + {"zvbc", {1, 0}}, + + {"zve32f", {1, 0}}, + {"zve32x", {1, 0}}, + {"zve64d", {1, 0}}, + {"zve64f", {1, 0}}, + {"zve64x", {1, 0}}, + + {"zvfh", {1, 0}}, + {"zvfhmin", {1, 0}}, // vector crypto - {"zvkb", RISCVExtensionVersion{1, 0}}, - {"zvkg", RISCVExtensionVersion{1, 0}}, - {"zvkn", RISCVExtensionVersion{1, 0}}, - {"zvknc", RISCVExtensionVersion{1, 0}}, - {"zvkned", RISCVExtensionVersion{1, 0}}, - {"zvkng", RISCVExtensionVersion{1, 0}}, - {"zvknha", RISCVExtensionVersion{1, 0}}, - {"zvknhb", RISCVExtensionVersion{1, 0}}, - {"zvks", RISCVExtensionVersion{1, 0}}, - {"zvksc", RISCVExtensionVersion{1, 0}}, - {"zvksed", RISCVExtensionVersion{1, 0}}, - {"zvksg", RISCVExtensionVersion{1, 0}}, - {"zvksh", RISCVExtensionVersion{1, 0}}, - {"zvkt", RISCVExtensionVersion{1, 0}}, - - {"zvl1024b", RISCVExtensionVersion{1, 0}}, - {"zvl128b", RISCVExtensionVersion{1, 0}}, - {"zvl16384b", RISCVExtensionVersion{1, 0}}, - {"zvl2048b", RISCVExtensionVersion{1, 0}}, - {"zvl256b", RISCVExtensionVersion{1, 0}}, - {"zvl32768b", RISCVExtensionVersion{1, 0}}, - {"zvl32b", RISCVExtensionVersion{1, 0}}, - {"zvl4096b", RISCVExtensionVersion{1, 0}}, - {"zvl512b", RISCVExtensionVersion{1, 0}}, - {"zvl64b", RISCVExtensionVersion{1, 0}}, - {"zvl65536b", RISCVExtensionVersion{1, 0}}, - {"zvl8192b", RISCVExtensionVersion{1, 0}}, + {"zvkb", {1, 0}}, + {"zvkg", {1, 0}}, + {"zvkn", {1, 0}}, + {"zvknc", {1, 0}}, + {"zvkned", {1, 0}}, + {"zvkng", {1, 0}}, + {"zvknha", {1, 0}}, + {"zvknhb", {1, 0}}, + {"zvks", {1, 0}}, + {"zvksc", {1, 0}}, + {"zvksed", {1, 0}}, + {"zvksg", {1, 0}}, + {"zvksh", {1, 0}}, + {"zvkt", {1, 0}}, + + {"zvl1024b", {1, 0}}, + {"zvl128b", {1, 0}}, + {"zvl16384b", {1, 0}}, + {"zvl2048b", {1, 0}}, + {"zvl256b", {1, 0}}, + {"zvl32768b", {1, 0}}, + {"zvl32b", {1, 0}}, + {"zvl4096b", {1, 0}}, + {"zvl512b", {1, 0}}, + {"zvl64b", {1, 0}}, + {"zvl65536b", {1, 0}}, + {"zvl8192b", {1, 0}}, }; // NOTE: This table should be sorted alphabetically by extension name. static const RISCVSupportedExtension SupportedExperimentalExtensions[] = { - {"zacas", RISCVExtensionVersion{1, 0}}, + {"zacas", {1, 0}}, - {"zcmop", RISCVExtensionVersion{0, 2}}, + {"zcmop", {0, 2}}, - {"zfbfmin", RISCVExtensionVersion{0, 8}}, + {"zfbfmin", {0, 8}}, - {"zicfilp", RISCVExtensionVersion{0, 4}}, - {"zicfiss", RISCVExtensionVersion{0, 4}}, + {"zicfilp", {0, 4}}, + {"zicfiss", {0, 4}}, - {"zicond", RISCVExtensionVersion{1, 0}}, + {"zicond", {1, 0}}, - {"zimop", RISCVExtensionVersion{0, 1}}, + {"zimop", {0, 1}}, - {"ztso", RISCVExtensionVersion{0, 1}}, + {"ztso", {0, 1}}, - {"zvfbfmin", RISCVExtensionVersion{0, 8}}, - {"zvfbfwma", RISCVExtensionVersion{0, 8}}, + {"zvfbfmin", {0, 8}}, + {"zvfbfwma", {0, 8}}, }; static void verifyTables() { @@ -237,8 +232,8 @@ void llvm::riscvExtensionsHelp(StringMap<StringRef> DescMap) { for (const auto &E : SupportedExtensions) ExtMap[E.Name] = {E.Version.Major, E.Version.Minor}; for (const auto &E : ExtMap) { - std::string Version = std::to_string(E.second.MajorVersion) + "." + - std::to_string(E.second.MinorVersion); + std::string Version = + std::to_string(E.second.Major) + "." + std::to_string(E.second.Minor); PrintExtension(E.first, Version, DescMap[E.first]); } @@ -247,8 +242,8 @@ void llvm::riscvExtensionsHelp(StringMap<StringRef> DescMap) { for (const auto &E : SupportedExperimentalExtensions) ExtMap[E.Name] = {E.Version.Major, E.Version.Minor}; for (const auto &E : ExtMap) { - std::string Version = std::to_string(E.second.MajorVersion) + "." + - std::to_string(E.second.MinorVersion); + std::string Version = + std::to_string(E.second.Major) + "." + std::to_string(E.second.Minor); PrintExtension(E.first, Version, DescMap["experimental-" + E.first]); } @@ -293,7 +288,7 @@ struct LessExtName { }; } // namespace -static std::optional<RISCVExtensionVersion> +static std::optional<RISCVISAInfo::ExtensionVersion> findDefaultVersion(StringRef ExtName) { // Find default version of an extension. // TODO: We might set default version based on profile or ISA spec. @@ -309,12 +304,9 @@ findDefaultVersion(StringRef ExtName) { return std::nullopt; } -void RISCVISAInfo::addExtension(StringRef ExtName, unsigned MajorVersion, - unsigned MinorVersion) { - RISCVExtensionInfo Ext; - Ext.MajorVersion = MajorVersion; - Ext.MinorVersion = MinorVersion; - Exts[ExtName.str()] = Ext; +void RISCVISAInfo::addExtension(StringRef ExtName, + RISCVISAInfo::ExtensionVersion Version) { + Exts[ExtName.str()] = Version; } static StringRef getExtensionTypeDesc(StringRef Ext) { @@ -337,7 +329,7 @@ static StringRef getExtensionType(StringRef Ext) { return StringRef(); } -static std::optional<RISCVExtensionVersion> +static std::optional<RISCVISAInfo::ExtensionVersion> isExperimentalExtension(StringRef Ext) { auto I = llvm::lower_bound(SupportedExperimentalExtensions, Ext, LessExtName()); @@ -634,8 +626,7 @@ RISCVISAInfo::parseFeatures(unsigned XLen, continue; if (Add) - ISAInfo->addExtension(ExtName, ExtensionInfoIterator->Version.Major, - ExtensionInfoIterator->Version.Minor); + ISAInfo->addExtension(ExtName, ExtensionInfoIterator->Version); else ISAInfo->Exts.erase(ExtName.str()); } @@ -696,7 +687,7 @@ RISCVISAInfo::parseNormalizedArchString(StringRef Arch) { if (MajorVersionStr.getAsInteger(10, MajorVersion)) return createStringError(errc::invalid_argument, "failed to parse major version number"); - ISAInfo->addExtension(ExtName, MajorVersion, MinorVersion); + ISAInfo->addExtension(ExtName, {MajorVersion, MinorVersion}); } ISAInfo->updateFLen(); ISAInfo->updateMinVLen(); @@ -775,7 +766,7 @@ RISCVISAInfo::parseArchString(StringRef Arch, bool EnableExperimentalExtension, // ISA spec. for (const auto *Ext : RISCVGImplications) { if (auto Version = findDefaultVersion(Ext)) - ISAInfo->addExtension(Ext, Version->Major, Version->Minor); + ISAInfo->addExtension(Ext, *Version); else llvm_unreachable("Default extension version not found?"); } @@ -794,7 +785,7 @@ RISCVISAInfo::parseArchString(StringRef Arch, bool EnableExperimentalExtension, Minor = Version->Minor; } - ISAInfo->addExtension(StringRef(&Baseline, 1), Major, Minor); + ISAInfo->addExtension(StringRef(&Baseline, 1), {Major, Minor}); } // Consume the base ISA version number and any '_' between rvxxx and the @@ -860,7 +851,7 @@ RISCVISAInfo::parseArchString(StringRef Arch, bool EnableExperimentalExtension, "unsupported standard user-level extension '%c'", C); } - ISAInfo->addExtension(StringRef(&C, 1), Major, Minor); + ISAInfo->addExtension(StringRef(&C, 1), {Major, Minor}); // Consume full extension name and version, including any optional '_' // between this extension and the next @@ -928,7 +919,7 @@ RISCVISAInfo::parseArchString(StringRef Arch, bool EnableExperimentalExtension, if (IgnoreUnknown && !isSupportedExtension(Name)) continue; - ISAInfo->addExtension(Name, Major, Minor); + ISAInfo->addExtension(Name, {Major, Minor}); // Extension format is correct, keep parsing the extensions. // TODO: Save Type, Name, Major, Minor to avoid parsing them later. AllExts.push_back(Name); @@ -1143,7 +1134,7 @@ void RISCVISAInfo::updateImplication() { // implied if (!HasE && !HasI) { auto Version = findDefaultVersion("i"); - addExtension("i", Version->Major, Version->Minor); + addExtension("i", Version.value()); } assert(llvm::is_sorted(ImpliedExts) && "Table not sorted by Name"); @@ -1164,7 +1155,7 @@ void RISCVISAInfo::updateImplication() { if (Exts.count(ImpliedExt)) continue; auto Version = findDefaultVersion(ImpliedExt); - addExtension(ImpliedExt, Version->Major, Version->Minor); + addExtension(ImpliedExt, Version.value()); WorkList.insert(ImpliedExt); } } @@ -1174,7 +1165,7 @@ void RISCVISAInfo::updateImplication() { if (XLen == 32 && Exts.count("zce") && Exts.count("f") && !Exts.count("zcf")) { auto Version = findDefaultVersion("zcf"); - addExtension("zcf", Version->Major, Version->Minor); + addExtension("zcf", Version.value()); } } @@ -1209,7 +1200,7 @@ void RISCVISAInfo::updateCombination() { IsAllRequiredFeatureExist &= hasExtension(Ext); if (IsAllRequiredFeatureExist) { auto Version = findDefaultVersion(CombineExt); - addExtension(CombineExt, Version->Major, Version->Minor); + addExtension(CombineExt, Version.value()); IsNewCombine = true; } } @@ -1266,7 +1257,7 @@ std::string RISCVISAInfo::toString() const { StringRef ExtName = Ext.first; auto ExtInfo = Ext.second; Arch << LS << ExtName; - Arch << ExtInfo.MajorVersion << "p" << ExtInfo.MinorVersion; + Arch << ExtInfo.Major << "p" << ExtInfo.Minor; } return Arch.str(); diff --git a/llvm/lib/TableGen/Record.cpp b/llvm/lib/TableGen/Record.cpp index aa981fdab4b3..2b3e8a0c7f84 100644 --- a/llvm/lib/TableGen/Record.cpp +++ b/llvm/lib/TableGen/Record.cpp @@ -923,15 +923,16 @@ Init *UnOpInit::Fold(Record *CurRec, bool IsFinal) const { case GETDAGOP: if (DagInit *Dag = dyn_cast<DagInit>(LHS)) { - DefInit *DI = DefInit::get(Dag->getOperatorAsDef({})); - if (!DI->getType()->typeIsA(getType())) { + // TI is not necessarily a def due to the late resolution in multiclasses, + // but has to be a TypedInit. + auto *TI = cast<TypedInit>(Dag->getOperator()); + if (!TI->getType()->typeIsA(getType())) { PrintFatalError(CurRec->getLoc(), - Twine("Expected type '") + - getType()->getAsString() + "', got '" + - DI->getType()->getAsString() + "' in: " + - getAsString() + "\n"); + Twine("Expected type '") + getType()->getAsString() + + "', got '" + TI->getType()->getAsString() + + "' in: " + getAsString() + "\n"); } else { - return DI; + return Dag->getOperator(); } } break; diff --git a/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp b/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp index 90e1ce9ddf66..7d2ff146a340 100644 --- a/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp +++ b/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp @@ -256,6 +256,11 @@ void AArch64AsmPrinter::emitStartOfAsmFile(Module &M) { if (BTE->getZExtValue()) Flags |= ELF::GNU_PROPERTY_AARCH64_FEATURE_1_BTI; + if (const auto *GCS = mdconst::extract_or_null<ConstantInt>( + M.getModuleFlag("guarded-control-stack"))) + if (GCS->getZExtValue()) + Flags |= ELF::GNU_PROPERTY_AARCH64_FEATURE_1_GCS; + if (const auto *Sign = mdconst::extract_or_null<ConstantInt>( M.getModuleFlag("sign-return-address"))) if (Sign->getZExtValue()) diff --git a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp index edc8cc7d4d1e..ea5679b4d5e3 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp @@ -6834,10 +6834,10 @@ static EVT getMemVTFromNode(LLVMContext &Ctx, SDNode *Root) { return getPackedVectorTypeFromPredicateType( Ctx, Root->getOperand(6)->getValueType(0), /*NumVec=*/4); case Intrinsic::aarch64_sve_ld1udq: - case Intrinsic::aarch64_sve_st1udq: + case Intrinsic::aarch64_sve_st1dq: return EVT(MVT::nxv1i64); case Intrinsic::aarch64_sve_ld1uwq: - case Intrinsic::aarch64_sve_st1uwq: + case Intrinsic::aarch64_sve_st1wq: return EVT(MVT::nxv1i32); } } diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 47e665176e8b..e2d07a096496 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -4513,8 +4513,7 @@ static SDValue skipExtensionForVectorMULL(SDValue N, SelectionDAG &DAG) { SDLoc dl(N); SmallVector<SDValue, 8> Ops; for (unsigned i = 0; i != NumElts; ++i) { - ConstantSDNode *C = cast<ConstantSDNode>(N.getOperand(i)); - const APInt &CInt = C->getAPIntValue(); + const APInt &CInt = N.getConstantOperandAPInt(i); // Element types smaller than 32 bits are not legal, so use i32 elements. // The values are implicitly truncated so sext vs. zext doesn't matter. Ops.push_back(DAG.getConstant(CInt.zextOrTrunc(32), dl, MVT::i32)); diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp index 1cfbf4737a6f..42b7a6418032 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -4214,6 +4214,9 @@ static bool canPairLdStOpc(unsigned FirstOpc, unsigned SecondOpc) { switch (FirstOpc) { default: return false; + case AArch64::LDRQui: + case AArch64::LDURQi: + return SecondOpc == AArch64::LDRQui || SecondOpc == AArch64::LDURQi; case AArch64::LDRWui: case AArch64::LDURWi: return SecondOpc == AArch64::LDRSWui || SecondOpc == AArch64::LDURSWi; diff --git a/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp b/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp index b435b3ce03e7..e90b8a8ca7ac 100644 --- a/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp +++ b/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp @@ -1326,10 +1326,14 @@ static int alignTo(int Num, int PowOf2) { static bool mayAlias(MachineInstr &MIa, SmallVectorImpl<MachineInstr *> &MemInsns, AliasAnalysis *AA) { - for (MachineInstr *MIb : MemInsns) - if (MIa.mayAlias(AA, *MIb, /*UseTBAA*/ false)) + for (MachineInstr *MIb : MemInsns) { + if (MIa.mayAlias(AA, *MIb, /*UseTBAA*/ false)) { + LLVM_DEBUG(dbgs() << "Aliasing with: "; MIb->dump()); return true; + } + } + LLVM_DEBUG(dbgs() << "No aliases found\n"); return false; } @@ -1757,9 +1761,11 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I, // Remember any instructions that read/write memory between FirstMI and MI. SmallVector<MachineInstr *, 4> MemInsns; + LLVM_DEBUG(dbgs() << "Find match for: "; FirstMI.dump()); for (unsigned Count = 0; MBBI != E && Count < Limit; MBBI = next_nodbg(MBBI, E)) { MachineInstr &MI = *MBBI; + LLVM_DEBUG(dbgs() << "Analysing 2nd insn: "; MI.dump()); UsedInBetween.accumulate(MI); @@ -1859,6 +1865,8 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I, LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits, TRI); MemInsns.push_back(&MI); + LLVM_DEBUG(dbgs() << "Offset doesn't fit in immediate, " + << "keep looking.\n"); continue; } // If the alignment requirements of the paired (scaled) instruction @@ -1868,6 +1876,9 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I, LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits, TRI); MemInsns.push_back(&MI); + LLVM_DEBUG(dbgs() + << "Offset doesn't fit due to alignment requirements, " + << "keep looking.\n"); continue; } } @@ -1884,14 +1895,22 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I, const bool SameLoadReg = MayLoad && TRI->isSuperOrSubRegisterEq( Reg, getLdStRegOp(MI).getReg()); - // If the Rt of the second instruction was not modified or used between - // the two instructions and none of the instructions between the second - // and first alias with the second, we can combine the second into the - // first. - if (ModifiedRegUnits.available(getLdStRegOp(MI).getReg()) && - !(MI.mayLoad() && !SameLoadReg && - !UsedRegUnits.available(getLdStRegOp(MI).getReg())) && - !mayAlias(MI, MemInsns, AA)) { + // If the Rt of the second instruction (destination register of the + // load) was not modified or used between the two instructions and none + // of the instructions between the second and first alias with the + // second, we can combine the second into the first. + bool RtNotModified = + ModifiedRegUnits.available(getLdStRegOp(MI).getReg()); + bool RtNotUsed = !(MI.mayLoad() && !SameLoadReg && + !UsedRegUnits.available(getLdStRegOp(MI).getReg())); + + LLVM_DEBUG(dbgs() << "Checking, can combine 2nd into 1st insn:\n" + << "Reg '" << getLdStRegOp(MI) << "' not modified: " + << (RtNotModified ? "true" : "false") << "\n" + << "Reg '" << getLdStRegOp(MI) << "' not used: " + << (RtNotUsed ? "true" : "false") << "\n"); + + if (RtNotModified && RtNotUsed && !mayAlias(MI, MemInsns, AA)) { // For pairs loading into the same reg, try to find a renaming // opportunity to allow the renaming of Reg between FirstMI and MI // and combine MI into FirstMI; otherwise bail and keep looking. @@ -1904,6 +1923,8 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I, LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits, TRI); MemInsns.push_back(&MI); + LLVM_DEBUG(dbgs() << "Can't find reg for renaming, " + << "keep looking.\n"); continue; } Flags.setRenameReg(*RenameReg); @@ -1919,10 +1940,15 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I, // between the two instructions and none of the instructions between the // first and the second alias with the first, we can combine the first // into the second. - if (!(MayLoad && - !UsedRegUnits.available(getLdStRegOp(FirstMI).getReg())) && - !mayAlias(FirstMI, MemInsns, AA)) { + RtNotModified = !( + MayLoad && !UsedRegUnits.available(getLdStRegOp(FirstMI).getReg())); + LLVM_DEBUG(dbgs() << "Checking, can combine 1st into 2nd insn:\n" + << "Reg '" << getLdStRegOp(FirstMI) + << "' not modified: " + << (RtNotModified ? "true" : "false") << "\n"); + + if (RtNotModified && !mayAlias(FirstMI, MemInsns, AA)) { if (ModifiedRegUnits.available(getLdStRegOp(FirstMI).getReg())) { Flags.setMergeForward(true); Flags.clearRenameReg(); @@ -1938,8 +1964,8 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I, MBBIWithRenameReg = MBBI; } } - // Unable to combine these instructions due to interference in between. - // Keep looking. + LLVM_DEBUG(dbgs() << "Unable to combine these instructions due to " + << "interference in between, keep looking.\n"); } } @@ -1948,16 +1974,20 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I, // If the instruction wasn't a matching load or store. Stop searching if we // encounter a call instruction that might modify memory. - if (MI.isCall()) + if (MI.isCall()) { + LLVM_DEBUG(dbgs() << "Found a call, stop looking.\n"); return E; + } // Update modified / uses register units. LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits, TRI); // Otherwise, if the base register is modified, we have no match, so // return early. - if (!ModifiedRegUnits.available(BaseReg)) + if (!ModifiedRegUnits.available(BaseReg)) { + LLVM_DEBUG(dbgs() << "Base reg is modified, stop looking.\n"); return E; + } // Update list of instructions that read/write memory. if (MI.mayLoadOrStore()) diff --git a/llvm/lib/Target/AArch64/AArch64LoopIdiomTransform.cpp b/llvm/lib/Target/AArch64/AArch64LoopIdiomTransform.cpp index 6fcd9c290e9c..6c6cd120b035 100644 --- a/llvm/lib/Target/AArch64/AArch64LoopIdiomTransform.cpp +++ b/llvm/lib/Target/AArch64/AArch64LoopIdiomTransform.cpp @@ -53,7 +53,7 @@ using namespace PatternMatch; #define DEBUG_TYPE "aarch64-loop-idiom-transform" static cl::opt<bool> - DisableAll("disable-aarch64-lit-all", cl::Hidden, cl::init(true), + DisableAll("disable-aarch64-lit-all", cl::Hidden, cl::init(false), cl::desc("Disable AArch64 Loop Idiom Transform Pass.")); static cl::opt<bool> DisableByteCmp( diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td index ee10a7d1c706..4782ad076c60 100644 --- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -1397,17 +1397,17 @@ let Predicates = [HasSVEorSME] in { (RegImmInst Z_q:$Zt, PPR3bAny:$Pg, GPR64sp:$base, (i64 0))>; } - // ld1quw/st1quw + // ld1quw/st1qw defm : sve_ld1q_pat<nxv4i32, nxv1i1, int_aarch64_sve_ld1uwq, LD1W_Q, LD1W_Q_IMM, am_sve_regreg_lsl2>; defm : sve_ld1q_pat<nxv4f32, nxv1i1, int_aarch64_sve_ld1uwq, LD1W_Q, LD1W_Q_IMM, am_sve_regreg_lsl2>; - defm : sve_st1q_pat<nxv4i32, nxv1i1, int_aarch64_sve_st1uwq, ST1W_Q, ST1W_Q_IMM, am_sve_regreg_lsl2>; - defm : sve_st1q_pat<nxv4f32, nxv1i1, int_aarch64_sve_st1uwq, ST1W_Q, ST1W_Q_IMM, am_sve_regreg_lsl2>; + defm : sve_st1q_pat<nxv4i32, nxv1i1, int_aarch64_sve_st1wq, ST1W_Q, ST1W_Q_IMM, am_sve_regreg_lsl2>; + defm : sve_st1q_pat<nxv4f32, nxv1i1, int_aarch64_sve_st1wq, ST1W_Q, ST1W_Q_IMM, am_sve_regreg_lsl2>; - // ld1qud/st1qud + // ld1qud/st1qd defm : sve_ld1q_pat<nxv2i64, nxv1i1, int_aarch64_sve_ld1udq, LD1D_Q, LD1D_Q_IMM, am_sve_regreg_lsl3>; defm : sve_ld1q_pat<nxv2f64, nxv1i1, int_aarch64_sve_ld1udq, LD1D_Q, LD1D_Q_IMM, am_sve_regreg_lsl3>; - defm : sve_st1q_pat<nxv2i64, nxv1i1, int_aarch64_sve_st1udq, ST1D_Q, ST1D_Q_IMM, am_sve_regreg_lsl3>; - defm : sve_st1q_pat<nxv2f64, nxv1i1, int_aarch64_sve_st1udq, ST1D_Q, ST1D_Q_IMM, am_sve_regreg_lsl3>; + defm : sve_st1q_pat<nxv2i64, nxv1i1, int_aarch64_sve_st1dq, ST1D_Q, ST1D_Q_IMM, am_sve_regreg_lsl3>; + defm : sve_st1q_pat<nxv2f64, nxv1i1, int_aarch64_sve_st1dq, ST1D_Q, ST1D_Q_IMM, am_sve_regreg_lsl3>; } // End HasSVEorSME @@ -4006,7 +4006,9 @@ defm WHILEHS_CXX : sve2p1_int_while_rr_pn<"whilehs", 0b100>; defm WHILEHI_CXX : sve2p1_int_while_rr_pn<"whilehi", 0b101>; defm WHILELO_CXX : sve2p1_int_while_rr_pn<"whilelo", 0b110>; defm WHILELS_CXX : sve2p1_int_while_rr_pn<"whilels", 0b111>; +} // End HasSVE2p1_or_HasSME2 +let Predicates = [HasSVEorSME] in { // Aliases for existing SVE instructions for which predicate-as-counter are // accepted as an operand to the instruction @@ -4025,7 +4027,7 @@ def : InstAlias<"mov $Pd, $Pn", def : InstAlias<"pfalse\t$Pd", (PFALSE PNRasPPR8:$Pd), 0>; -} // End HasSVE2p1_or_HasSME2 +} //===----------------------------------------------------------------------===// // Non-widening BFloat16 to BFloat16 instructions @@ -4095,7 +4097,7 @@ defm FMAXQV : sve2p1_fp_reduction_q<0b110, "fmaxqv", int_aarch64_sve_fmaxqv>; defm FMINQV : sve2p1_fp_reduction_q<0b111, "fminqv", int_aarch64_sve_fminqv>; defm DUPQ_ZZI : sve2p1_dupq<"dupq">; -defm EXTQ_ZZI : sve2p1_extq<"extq", int_aarch64_sve_extq_lane>; +defm EXTQ_ZZI : sve2p1_extq<"extq", int_aarch64_sve_extq>; defm PMOV_PZI : sve2p1_vector_to_pred<"pmov", int_aarch64_sve_pmov_to_pred_lane, int_aarch64_sve_pmov_to_pred_lane_zero>; defm PMOV_ZIP : sve2p1_pred_to_vector<"pmov", int_aarch64_sve_pmov_to_vector_lane_merging, int_aarch64_sve_pmov_to_vector_lane_zeroing>; diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp index b5b8b6829178..13b5e578391d 100644 --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -1406,9 +1406,23 @@ static std::optional<Instruction *> instCombineSVEAllActive(IntrinsicInst &II, return &II; } +// Simplify operations where predicate has all inactive lanes or try to replace +// with _u form when all lanes are active +static std::optional<Instruction *> +instCombineSVEAllOrNoActive(InstCombiner &IC, IntrinsicInst &II, + Intrinsic::ID IID) { + if (match(II.getOperand(0), m_ZeroInt())) { + // llvm_ir, pred(0), op1, op2 - Spec says to return op1 when all lanes are + // inactive for sv[func]_m + return IC.replaceInstUsesWith(II, II.getOperand(1)); + } + return instCombineSVEAllActive(II, IID); +} + static std::optional<Instruction *> instCombineSVEVectorAdd(InstCombiner &IC, IntrinsicInst &II) { - if (auto II_U = instCombineSVEAllActive(II, Intrinsic::aarch64_sve_add_u)) + if (auto II_U = + instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_add_u)) return II_U; if (auto MLA = instCombineSVEVectorFuseMulAddSub<Intrinsic::aarch64_sve_mul, Intrinsic::aarch64_sve_mla>( @@ -1423,7 +1437,8 @@ static std::optional<Instruction *> instCombineSVEVectorAdd(InstCombiner &IC, static std::optional<Instruction *> instCombineSVEVectorFAdd(InstCombiner &IC, IntrinsicInst &II) { - if (auto II_U = instCombineSVEAllActive(II, Intrinsic::aarch64_sve_fadd_u)) + if (auto II_U = + instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_fadd_u)) return II_U; if (auto FMLA = instCombineSVEVectorFuseMulAddSub<Intrinsic::aarch64_sve_fmul, @@ -1465,7 +1480,8 @@ instCombineSVEVectorFAddU(InstCombiner &IC, IntrinsicInst &II) { static std::optional<Instruction *> instCombineSVEVectorFSub(InstCombiner &IC, IntrinsicInst &II) { - if (auto II_U = instCombineSVEAllActive(II, Intrinsic::aarch64_sve_fsub_u)) + if (auto II_U = + instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_fsub_u)) return II_U; if (auto FMLS = instCombineSVEVectorFuseMulAddSub<Intrinsic::aarch64_sve_fmul, @@ -1507,7 +1523,8 @@ instCombineSVEVectorFSubU(InstCombiner &IC, IntrinsicInst &II) { static std::optional<Instruction *> instCombineSVEVectorSub(InstCombiner &IC, IntrinsicInst &II) { - if (auto II_U = instCombineSVEAllActive(II, Intrinsic::aarch64_sve_sub_u)) + if (auto II_U = + instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_sub_u)) return II_U; if (auto MLS = instCombineSVEVectorFuseMulAddSub<Intrinsic::aarch64_sve_mul, Intrinsic::aarch64_sve_mls>( @@ -1523,11 +1540,6 @@ static std::optional<Instruction *> instCombineSVEVectorMul(InstCombiner &IC, auto *OpMultiplicand = II.getOperand(1); auto *OpMultiplier = II.getOperand(2); - // Canonicalise a non _u intrinsic only. - if (II.getIntrinsicID() != IID) - if (auto II_U = instCombineSVEAllActive(II, IID)) - return II_U; - // Return true if a given instruction is a unit splat value, false otherwise. auto IsUnitSplat = [](auto *I) { auto *SplatValue = getSplatValue(I); @@ -1891,34 +1903,38 @@ AArch64TTIImpl::instCombineIntrinsic(InstCombiner &IC, case Intrinsic::aarch64_sve_ptest_last: return instCombineSVEPTest(IC, II); case Intrinsic::aarch64_sve_fabd: - return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_fabd_u); + return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_fabd_u); case Intrinsic::aarch64_sve_fadd: return instCombineSVEVectorFAdd(IC, II); case Intrinsic::aarch64_sve_fadd_u: return instCombineSVEVectorFAddU(IC, II); case Intrinsic::aarch64_sve_fdiv: - return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_fdiv_u); + return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_fdiv_u); case Intrinsic::aarch64_sve_fmax: - return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_fmax_u); + return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_fmax_u); case Intrinsic::aarch64_sve_fmaxnm: - return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_fmaxnm_u); + return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_fmaxnm_u); case Intrinsic::aarch64_sve_fmin: - return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_fmin_u); + return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_fmin_u); case Intrinsic::aarch64_sve_fminnm: - return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_fminnm_u); + return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_fminnm_u); case Intrinsic::aarch64_sve_fmla: - return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_fmla_u); + return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_fmla_u); case Intrinsic::aarch64_sve_fmls: - return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_fmls_u); + return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_fmls_u); case Intrinsic::aarch64_sve_fmul: + if (auto II_U = + instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_fmul_u)) + return II_U; + return instCombineSVEVectorMul(IC, II, Intrinsic::aarch64_sve_fmul_u); case Intrinsic::aarch64_sve_fmul_u: return instCombineSVEVectorMul(IC, II, Intrinsic::aarch64_sve_fmul_u); case Intrinsic::aarch64_sve_fmulx: - return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_fmulx_u); + return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_fmulx_u); case Intrinsic::aarch64_sve_fnmla: - return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_fnmla_u); + return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_fnmla_u); case Intrinsic::aarch64_sve_fnmls: - return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_fnmls_u); + return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_fnmls_u); case Intrinsic::aarch64_sve_fsub: return instCombineSVEVectorFSub(IC, II); case Intrinsic::aarch64_sve_fsub_u: @@ -1930,20 +1946,24 @@ AArch64TTIImpl::instCombineIntrinsic(InstCombiner &IC, Intrinsic::aarch64_sve_mla_u>( IC, II, true); case Intrinsic::aarch64_sve_mla: - return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_mla_u); + return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_mla_u); case Intrinsic::aarch64_sve_mls: - return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_mls_u); + return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_mls_u); case Intrinsic::aarch64_sve_mul: + if (auto II_U = + instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_mul_u)) + return II_U; + return instCombineSVEVectorMul(IC, II, Intrinsic::aarch64_sve_mul_u); case Intrinsic::aarch64_sve_mul_u: return instCombineSVEVectorMul(IC, II, Intrinsic::aarch64_sve_mul_u); case Intrinsic::aarch64_sve_sabd: - return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_sabd_u); + return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_sabd_u); case Intrinsic::aarch64_sve_smax: - return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_smax_u); + return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_smax_u); case Intrinsic::aarch64_sve_smin: - return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_smin_u); + return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_smin_u); case Intrinsic::aarch64_sve_smulh: - return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_smulh_u); + return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_smulh_u); case Intrinsic::aarch64_sve_sub: return instCombineSVEVectorSub(IC, II); case Intrinsic::aarch64_sve_sub_u: @@ -1951,31 +1971,31 @@ AArch64TTIImpl::instCombineIntrinsic(InstCombiner &IC, Intrinsic::aarch64_sve_mls_u>( IC, II, true); case Intrinsic::aarch64_sve_uabd: - return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_uabd_u); + return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_uabd_u); case Intrinsic::aarch64_sve_umax: - return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_umax_u); + return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_umax_u); case Intrinsic::aarch64_sve_umin: - return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_umin_u); + return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_umin_u); case Intrinsic::aarch64_sve_umulh: - return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_umulh_u); + return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_umulh_u); case Intrinsic::aarch64_sve_asr: - return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_asr_u); + return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_asr_u); case Intrinsic::aarch64_sve_lsl: - return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_lsl_u); + return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_lsl_u); case Intrinsic::aarch64_sve_lsr: - return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_lsr_u); + return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_lsr_u); case Intrinsic::aarch64_sve_and: - return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_and_u); + return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_and_u); case Intrinsic::aarch64_sve_bic: - return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_bic_u); + return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_bic_u); case Intrinsic::aarch64_sve_eor: - return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_eor_u); + return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_eor_u); case Intrinsic::aarch64_sve_orr: - return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_orr_u); + return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_orr_u); case Intrinsic::aarch64_sve_sqsub: - return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_sqsub_u); + return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_sqsub_u); case Intrinsic::aarch64_sve_uqsub: - return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_uqsub_u); + return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_uqsub_u); case Intrinsic::aarch64_sve_tbl: return instCombineSVETBL(IC, II); case Intrinsic::aarch64_sve_uunpkhi: diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp index b657a0954d78..302116447efc 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp @@ -1166,7 +1166,8 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) getActionDefinitionsBuilder(G_FMAD).lower(); // Access to floating-point environment. - getActionDefinitionsBuilder({G_GET_FPMODE, G_SET_FPMODE, G_RESET_FPMODE}) + getActionDefinitionsBuilder({G_GET_FPENV, G_SET_FPENV, G_RESET_FPENV, + G_GET_FPMODE, G_SET_FPMODE, G_RESET_FPMODE}) .libcall(); getActionDefinitionsBuilder(G_IS_FPCLASS).lower(); diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp index 496ab18e9b19..6e074b6a63c4 100644 --- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp +++ b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp @@ -120,7 +120,8 @@ unsigned AArch64ELFObjectWriter::getRelocType(MCContext &Ctx, assert((!Target.getSymA() || Target.getSymA()->getKind() == MCSymbolRefExpr::VK_None || - Target.getSymA()->getKind() == MCSymbolRefExpr::VK_PLT) && + Target.getSymA()->getKind() == MCSymbolRefExpr::VK_PLT || + Target.getSymA()->getKind() == MCSymbolRefExpr::VK_GOTPCREL) && "Should only be expression-level modifiers here"); assert((!Target.getSymB() || @@ -206,7 +207,10 @@ unsigned AArch64ELFObjectWriter::getRelocType(MCContext &Ctx, case FK_Data_2: return R_CLS(ABS16); case FK_Data_4: - return R_CLS(ABS32); + return (!IsILP32 && + Target.getAccessVariant() == MCSymbolRefExpr::VK_GOTPCREL) + ? ELF::R_AARCH64_GOTPCREL32 + : R_CLS(ABS32); case FK_Data_8: if (IsILP32) { Ctx.reportError(Fixup.getLoc(), diff --git a/llvm/lib/Target/AArch64/SMEInstrFormats.td b/llvm/lib/Target/AArch64/SMEInstrFormats.td index 70f3c2c99f0f..44d9a8ac7cb6 100644 --- a/llvm/lib/Target/AArch64/SMEInstrFormats.td +++ b/llvm/lib/Target/AArch64/SMEInstrFormats.td @@ -1268,7 +1268,7 @@ multiclass sve2_int_perm_revd<string asm, SDPatternOperator op> { } class sve2_clamp<string asm, bits<2> sz, bit U, ZPRRegOp zpr_ty> - : I<(outs zpr_ty:$Zd), (ins zpr_ty:$Zn, zpr_ty:$Zm, zpr_ty:$_Zd), + : I<(outs zpr_ty:$Zd), (ins zpr_ty:$_Zd, zpr_ty:$Zn, zpr_ty:$Zm), asm, "\t$Zd, $Zn, $Zm", "", []>, Sched<[]> { bits<5> Zm; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCombine.td b/llvm/lib/Target/AMDGPU/AMDGPUCombine.td index 0c77fe725958..b9411e205212 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUCombine.td +++ b/llvm/lib/Target/AMDGPU/AMDGPUCombine.td @@ -111,7 +111,7 @@ def smulu64 : GICombineRule< [{ return matchCombine_s_mul_u64(*${smul}, ${matchinfo}); }]), (apply [{ applyCombine_s_mul_u64(*${smul}, ${matchinfo}); }])>; -def sign_exension_in_reg_matchdata : GIDefMatchData<"MachineInstr *">; +def sign_exension_in_reg_matchdata : GIDefMatchData<"std::pair<MachineInstr *, unsigned>">; def sign_extension_in_reg : GICombineRule< (defs root:$sign_inreg, sign_exension_in_reg_matchdata:$matchinfo), diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp index 719ae2e8750c..41462d7a133e 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp @@ -1579,13 +1579,9 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, bool AMDGPUDAGToDAGISel::SelectBUFSOffset(SDValue ByteOffsetNode, SDValue &SOffset) const { - if (Subtarget->hasRestrictedSOffset()) { - if (auto SOffsetConst = dyn_cast<ConstantSDNode>(ByteOffsetNode)) { - if (SOffsetConst->isZero()) { - SOffset = CurDAG->getRegister(AMDGPU::SGPR_NULL, MVT::i32); - return true; - } - } + if (Subtarget->hasRestrictedSOffset() && isNullConstant(ByteOffsetNode)) { + SOffset = CurDAG->getRegister(AMDGPU::SGPR_NULL, MVT::i32); + return true; } SOffset = ByteOffsetNode; @@ -2483,7 +2479,7 @@ void AMDGPUDAGToDAGISel::SelectDSAppendConsume(SDNode *N, unsigned IntrID) { SDValue PtrBase = Ptr.getOperand(0); SDValue PtrOffset = Ptr.getOperand(1); - const APInt &OffsetVal = cast<ConstantSDNode>(PtrOffset)->getAPIntValue(); + const APInt &OffsetVal = PtrOffset->getAsAPIntVal(); if (isDSOffsetLegal(PtrBase, OffsetVal.getZExtValue())) { N = glueCopyToM0(N, PtrBase); Offset = CurDAG->getTargetConstant(OffsetVal, SDLoc(), MVT::i32); diff --git a/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp b/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp index d2a02143e4e7..5762f1906a16 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp @@ -1026,6 +1026,51 @@ public: return N; } + /// Strip "amdgpu-no-lds-kernel-id" from any functions where we may have + /// introduced its use. If AMDGPUAttributor ran prior to the pass, we inferred + /// the lack of llvm.amdgcn.lds.kernel.id calls. + void removeNoLdsKernelIdFromReachable(CallGraph &CG, Function *KernelRoot) { + KernelRoot->removeFnAttr("amdgpu-no-lds-kernel-id"); + + SmallVector<Function *> Tmp({CG[KernelRoot]->getFunction()}); + if (!Tmp.back()) + return; + + SmallPtrSet<Function *, 8> Visited; + bool SeenUnknownCall = false; + + do { + Function *F = Tmp.pop_back_val(); + + for (auto &N : *CG[F]) { + if (!N.second) + continue; + + Function *Callee = N.second->getFunction(); + if (!Callee) { + if (!SeenUnknownCall) { + SeenUnknownCall = true; + + // If we see any indirect calls, assume nothing about potential + // targets. + // TODO: This could be refined to possible LDS global users. + for (auto &N : *CG.getExternalCallingNode()) { + Function *PotentialCallee = N.second->getFunction(); + if (!isKernelLDS(PotentialCallee)) + PotentialCallee->removeFnAttr("amdgpu-no-lds-kernel-id"); + } + + continue; + } + } + + Callee->removeFnAttr("amdgpu-no-lds-kernel-id"); + if (Visited.insert(Callee).second) + Tmp.push_back(Callee); + } + } while (!Tmp.empty()); + } + DenseMap<Function *, GlobalVariable *> lowerDynamicLDSVariables( Module &M, LDSUsesInfoTy &LDSUsesInfo, DenseSet<Function *> const &KernelsThatIndirectlyAllocateDynamicLDS, @@ -1175,6 +1220,13 @@ public: M, TableLookupVariablesOrdered, OrderedKernels, KernelToReplacement); replaceUsesInInstructionsWithTableLookup(M, TableLookupVariablesOrdered, LookupTable); + + // Strip amdgpu-no-lds-kernel-id from all functions reachable from the + // kernel. We may have inferred this wasn't used prior to the pass. + // + // TODO: We could filter out subgraphs that do not access LDS globals. + for (Function *F : KernelsThatAllocateTableLDS) + removeNoLdsKernelIdFromReachable(CG, F); } DenseMap<Function *, GlobalVariable *> KernelToCreatedDynamicLDS = diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPostLegalizerCombiner.cpp b/llvm/lib/Target/AMDGPU/AMDGPUPostLegalizerCombiner.cpp index 21bfab52c6c4..bb1d6cb72e80 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUPostLegalizerCombiner.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUPostLegalizerCombiner.cpp @@ -99,10 +99,10 @@ public: // Combine unsigned buffer load and signed extension instructions to generate // signed buffer laod instructions. - bool matchCombineSignExtendInReg(MachineInstr &MI, - MachineInstr *&MatchInfo) const; - void applyCombineSignExtendInReg(MachineInstr &MI, - MachineInstr *&MatchInfo) const; + bool matchCombineSignExtendInReg( + MachineInstr &MI, std::pair<MachineInstr *, unsigned> &MatchInfo) const; + void applyCombineSignExtendInReg( + MachineInstr &MI, std::pair<MachineInstr *, unsigned> &MatchInfo) const; // Find the s_mul_u64 instructions where the higher bits are either // zero-extended or sign-extended. @@ -395,34 +395,36 @@ bool AMDGPUPostLegalizerCombinerImpl::matchRemoveFcanonicalize( // Identify buffer_load_{u8, u16}. bool AMDGPUPostLegalizerCombinerImpl::matchCombineSignExtendInReg( - MachineInstr &MI, MachineInstr *&SubwordBufferLoad) const { - Register Op0Reg = MI.getOperand(1).getReg(); - SubwordBufferLoad = MRI.getVRegDef(Op0Reg); - - if (!MRI.hasOneNonDBGUse(Op0Reg)) + MachineInstr &MI, std::pair<MachineInstr *, unsigned> &MatchData) const { + Register LoadReg = MI.getOperand(1).getReg(); + if (!MRI.hasOneNonDBGUse(LoadReg)) return false; // Check if the first operand of the sign extension is a subword buffer load // instruction. - return SubwordBufferLoad->getOpcode() == AMDGPU::G_AMDGPU_BUFFER_LOAD_UBYTE || - SubwordBufferLoad->getOpcode() == AMDGPU::G_AMDGPU_BUFFER_LOAD_USHORT; + MachineInstr *LoadMI = MRI.getVRegDef(LoadReg); + int64_t Width = MI.getOperand(2).getImm(); + switch (LoadMI->getOpcode()) { + case AMDGPU::G_AMDGPU_BUFFER_LOAD_UBYTE: + MatchData = {LoadMI, AMDGPU::G_AMDGPU_BUFFER_LOAD_SBYTE}; + return Width == 8; + case AMDGPU::G_AMDGPU_BUFFER_LOAD_USHORT: + MatchData = {LoadMI, AMDGPU::G_AMDGPU_BUFFER_LOAD_SSHORT}; + return Width == 16; + } + return false; } // Combine buffer_load_{u8, u16} and the sign extension instruction to generate // buffer_load_{i8, i16}. void AMDGPUPostLegalizerCombinerImpl::applyCombineSignExtendInReg( - MachineInstr &MI, MachineInstr *&SubwordBufferLoad) const { - // Modify the opcode and the destination of buffer_load_{u8, u16}: - // Replace the opcode. - unsigned Opc = - SubwordBufferLoad->getOpcode() == AMDGPU::G_AMDGPU_BUFFER_LOAD_UBYTE - ? AMDGPU::G_AMDGPU_BUFFER_LOAD_SBYTE - : AMDGPU::G_AMDGPU_BUFFER_LOAD_SSHORT; - SubwordBufferLoad->setDesc(TII.get(Opc)); - // Update the destination register of SubwordBufferLoad with the destination - // register of the sign extension. + MachineInstr &MI, std::pair<MachineInstr *, unsigned> &MatchData) const { + auto [LoadMI, NewOpcode] = MatchData; + LoadMI->setDesc(TII.get(NewOpcode)); + // Update the destination register of the load with the destination register + // of the sign extension. Register SignExtendInsnDst = MI.getOperand(0).getReg(); - SubwordBufferLoad->getOperand(0).setReg(SignExtendInsnDst); + LoadMI->getOperand(0).setReg(SignExtendInsnDst); // Remove the sign extension. MI.eraseFromParent(); } diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp index b7f043860115..ba79affe683d 100644 --- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp +++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp @@ -1342,10 +1342,8 @@ private: unsigned ParseRegList(RegisterKind &RegKind, unsigned &RegNum, unsigned &RegWidth, SmallVectorImpl<AsmToken> &Tokens); bool ParseRegRange(unsigned& Num, unsigned& Width); - unsigned getRegularReg(RegisterKind RegKind, - unsigned RegNum, - unsigned RegWidth, - SMLoc Loc); + unsigned getRegularReg(RegisterKind RegKind, unsigned RegNum, unsigned SubReg, + unsigned RegWidth, SMLoc Loc); bool isRegister(); bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const; @@ -2616,6 +2614,8 @@ AMDGPUAsmParser::isRegister(const AsmToken &Token, StringRef RegName = Reg->Name; StringRef RegSuffix = Str.substr(RegName.size()); if (!RegSuffix.empty()) { + RegSuffix.consume_back(".l"); + RegSuffix.consume_back(".h"); unsigned Num; // A single register with an index: rXX if (getRegNum(RegSuffix, Num)) @@ -2636,12 +2636,9 @@ AMDGPUAsmParser::isRegister() return isRegister(getToken(), peekToken()); } -unsigned -AMDGPUAsmParser::getRegularReg(RegisterKind RegKind, - unsigned RegNum, - unsigned RegWidth, - SMLoc Loc) { - +unsigned AMDGPUAsmParser::getRegularReg(RegisterKind RegKind, unsigned RegNum, + unsigned SubReg, unsigned RegWidth, + SMLoc Loc) { assert(isRegularReg(RegKind)); unsigned AlignSize = 1; @@ -2670,7 +2667,17 @@ AMDGPUAsmParser::getRegularReg(RegisterKind RegKind, return AMDGPU::NoRegister; } - return RC.getRegister(RegIdx); + unsigned Reg = RC.getRegister(RegIdx); + + if (SubReg) { + Reg = TRI->getSubReg(Reg, SubReg); + + // Currently all regular registers have their .l and .h subregisters, so + // we should never need to generate an error here. + assert(Reg && "Invalid subregister!"); + } + + return Reg; } bool AMDGPUAsmParser::ParseRegRange(unsigned &Num, unsigned &RegWidth) { @@ -2748,7 +2755,17 @@ unsigned AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind, RegKind = RI->Kind; StringRef RegSuffix = RegName.substr(RI->Name.size()); + unsigned SubReg = NoSubRegister; if (!RegSuffix.empty()) { + // We don't know the opcode till we are done parsing, so we don't know if + // registers should be 16 or 32 bit. It is therefore mandatory to put .l or + // .h to correctly specify 16 bit registers. We also can't determine class + // VGPR_16_Lo128 or VGPR_16, so always parse them as VGPR_16. + if (RegSuffix.consume_back(".l")) + SubReg = AMDGPU::lo16; + else if (RegSuffix.consume_back(".h")) + SubReg = AMDGPU::hi16; + // Single 32-bit register: vXX. if (!getRegNum(RegSuffix, RegNum)) { Error(Loc, "invalid register index"); @@ -2761,7 +2778,7 @@ unsigned AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind, return AMDGPU::NoRegister; } - return getRegularReg(RegKind, RegNum, RegWidth, Loc); + return getRegularReg(RegKind, RegNum, SubReg, RegWidth, Loc); } unsigned AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind, unsigned &RegNum, @@ -2813,7 +2830,7 @@ unsigned AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind, unsigned &RegNum, } if (isRegularReg(RegKind)) - Reg = getRegularReg(RegKind, RegNum, RegWidth, ListLoc); + Reg = getRegularReg(RegKind, RegNum, NoSubRegister, RegWidth, ListLoc); return Reg; } diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp index a7d8ff0242b8..bcd93e30d6c2 100644 --- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp +++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp @@ -1450,20 +1450,27 @@ bool GCNHazardRecognizer::fixLdsDirectVMEMHazard(MachineInstr *MI) { return false; return I.readsRegister(VDSTReg, &TRI) || I.modifiesRegister(VDSTReg, &TRI); }; - auto IsExpiredFn = [](const MachineInstr &I, int) { + bool LdsdirCanWait = ST.hasLdsWaitVMSRC(); + auto IsExpiredFn = [this, LdsdirCanWait](const MachineInstr &I, int) { return SIInstrInfo::isVALU(I) || SIInstrInfo::isEXP(I) || (I.getOpcode() == AMDGPU::S_WAITCNT && !I.getOperand(0).getImm()) || (I.getOpcode() == AMDGPU::S_WAITCNT_DEPCTR && - AMDGPU::DepCtr::decodeFieldVmVsrc(I.getOperand(0).getImm()) == 0); + AMDGPU::DepCtr::decodeFieldVmVsrc(I.getOperand(0).getImm()) == 0) || + (LdsdirCanWait && SIInstrInfo::isLDSDIR(I) && + !TII.getNamedOperand(I, AMDGPU::OpName::waitvsrc)->getImm()); }; if (::getWaitStatesSince(IsHazardFn, MI, IsExpiredFn) == std::numeric_limits<int>::max()) return false; - BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), - TII.get(AMDGPU::S_WAITCNT_DEPCTR)) - .addImm(AMDGPU::DepCtr::encodeFieldVmVsrc(0)); + if (LdsdirCanWait) { + TII.getNamedOperand(*MI, AMDGPU::OpName::waitvsrc)->setImm(0); + } else { + BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), + TII.get(AMDGPU::S_WAITCNT_DEPCTR)) + .addImm(AMDGPU::DepCtr::encodeFieldVmVsrc(0)); + } return true; } diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h index f6f37f5170a4..85d062a9a6f5 100644 --- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h +++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h @@ -1128,6 +1128,8 @@ public: bool hasLdsDirect() const { return getGeneration() >= GFX11; } + bool hasLdsWaitVMSRC() const { return getGeneration() >= GFX12; } + bool hasVALUPartialForwardingHazard() const { return getGeneration() >= GFX11; } diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCAsmInfo.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCAsmInfo.cpp index d539d75fdff0..201cc8d01e2d 100644 --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCAsmInfo.cpp +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCAsmInfo.cpp @@ -31,7 +31,6 @@ AMDGPUMCAsmInfo::AMDGPUMCAsmInfo(const Triple &TT, InlineAsmEnd = ";#ASMEND"; //===--- Data Emission Directives -------------------------------------===// - SunStyleELFSectionSwitchSyntax = true; UsesELFSectionDirectiveForBSS = true; //===--- Global Variable Emission Directives --------------------------===// diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index 6ddc7e864fb2..5a9222e91588 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -8181,12 +8181,8 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, // SGPR_NULL to avoid generating an extra s_mov with zero. static SDValue selectSOffset(SDValue SOffset, SelectionDAG &DAG, const GCNSubtarget *Subtarget) { - if (Subtarget->hasRestrictedSOffset()) - if (auto SOffsetConst = dyn_cast<ConstantSDNode>(SOffset)) { - if (SOffsetConst->isZero()) { - return DAG.getRegister(AMDGPU::SGPR_NULL, MVT::i32); - } - } + if (Subtarget->hasRestrictedSOffset() && isNullConstant(SOffset)) + return DAG.getRegister(AMDGPU::SGPR_NULL, MVT::i32); return SOffset; } diff --git a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp index 1cb1d32707f2..1f480c248154 100644 --- a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp +++ b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp @@ -292,7 +292,7 @@ public: VgprVmemTypes[GprNo] = 0; } - void setNonKernelFunctionInitialState() { + void setStateOnFunctionEntryOrReturn() { setScoreUB(VS_CNT, getWaitCountMax(VS_CNT)); PendingEvents |= WaitEventMaskForInst[VS_CNT]; } @@ -1487,6 +1487,7 @@ void SIInsertWaitcnts::updateEventWaitcntAfter(MachineInstr &Inst, if (callWaitsOnFunctionReturn(Inst)) { // Act as a wait on everything ScoreBrackets->applyWaitcnt(AMDGPU::Waitcnt::allZeroExceptVsCnt()); + ScoreBrackets->setStateOnFunctionEntryOrReturn(); } else { // May need to way wait for anything. ScoreBrackets->applyWaitcnt(AMDGPU::Waitcnt()); @@ -1879,7 +1880,7 @@ bool SIInsertWaitcnts::runOnMachineFunction(MachineFunction &MF) { auto NonKernelInitialState = std::make_unique<WaitcntBrackets>(ST, Limits, Encoding); - NonKernelInitialState->setNonKernelFunctionInitialState(); + NonKernelInitialState->setStateOnFunctionEntryOrReturn(); BlockInfos[&EntryBB].Incoming = std::move(NonKernelInitialState); Modified = true; diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index fee900b3efb2..e50f5f28e030 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -5276,10 +5276,15 @@ unsigned SIInstrInfo::getVALUOp(const MachineInstr &MI) const { case AMDGPU::S_FLOOR_F32: return AMDGPU::V_FLOOR_F32_e64; case AMDGPU::S_TRUNC_F32: return AMDGPU::V_TRUNC_F32_e64; case AMDGPU::S_RNDNE_F32: return AMDGPU::V_RNDNE_F32_e64; - case AMDGPU::S_CEIL_F16: return AMDGPU::V_CEIL_F16_t16_e64; - case AMDGPU::S_FLOOR_F16: return AMDGPU::V_FLOOR_F16_t16_e64; - case AMDGPU::S_TRUNC_F16: return AMDGPU::V_TRUNC_F16_t16_e64; - case AMDGPU::S_RNDNE_F16: return AMDGPU::V_RNDNE_F16_t16_e64; + case AMDGPU::S_CEIL_F16: + return ST.useRealTrue16Insts() ? AMDGPU::V_CEIL_F16_t16_e64 + : AMDGPU::V_CEIL_F16_fake16_e64; + case AMDGPU::S_FLOOR_F16: + return AMDGPU::V_FLOOR_F16_fake16_e64; + case AMDGPU::S_TRUNC_F16: + return AMDGPU::V_TRUNC_F16_fake16_e64; + case AMDGPU::S_RNDNE_F16: + return AMDGPU::V_RNDNE_F16_fake16_e64; case AMDGPU::S_ADD_F32: return AMDGPU::V_ADD_F32_e64; case AMDGPU::S_SUB_F32: return AMDGPU::V_SUB_F32_e64; case AMDGPU::S_MIN_F32: return AMDGPU::V_MIN_F32_e64; @@ -5328,15 +5333,15 @@ unsigned SIInstrInfo::getVALUOp(const MachineInstr &MI) const { case AMDGPU::S_CMP_NEQ_F16: return AMDGPU::V_CMP_NEQ_F16_t16_e64; case AMDGPU::S_CMP_NLT_F16: return AMDGPU::V_CMP_NLT_F16_t16_e64; case AMDGPU::V_S_EXP_F32_e64: return AMDGPU::V_EXP_F32_e64; - case AMDGPU::V_S_EXP_F16_e64: return AMDGPU::V_EXP_F16_t16_e64; + case AMDGPU::V_S_EXP_F16_e64: return AMDGPU::V_EXP_F16_fake16_e64; case AMDGPU::V_S_LOG_F32_e64: return AMDGPU::V_LOG_F32_e64; - case AMDGPU::V_S_LOG_F16_e64: return AMDGPU::V_LOG_F16_t16_e64; + case AMDGPU::V_S_LOG_F16_e64: return AMDGPU::V_LOG_F16_fake16_e64; case AMDGPU::V_S_RCP_F32_e64: return AMDGPU::V_RCP_F32_e64; - case AMDGPU::V_S_RCP_F16_e64: return AMDGPU::V_RCP_F16_t16_e64; + case AMDGPU::V_S_RCP_F16_e64: return AMDGPU::V_RCP_F16_fake16_e64; case AMDGPU::V_S_RSQ_F32_e64: return AMDGPU::V_RSQ_F32_e64; - case AMDGPU::V_S_RSQ_F16_e64: return AMDGPU::V_RSQ_F16_t16_e64; + case AMDGPU::V_S_RSQ_F16_e64: return AMDGPU::V_RSQ_F16_fake16_e64; case AMDGPU::V_S_SQRT_F32_e64: return AMDGPU::V_SQRT_F32_e64; - case AMDGPU::V_S_SQRT_F16_e64: return AMDGPU::V_SQRT_F16_t16_e64; + case AMDGPU::V_S_SQRT_F16_e64: return AMDGPU::V_SQRT_F16_fake16_e64; } llvm_unreachable( "Unexpected scalar opcode without corresponding vector one!"); @@ -7266,8 +7271,14 @@ void SIInstrInfo::moveToVALUImpl(SIInstrWorklist &Worklist, if (AMDGPU::getNamedOperandIdx(NewOpcode, AMDGPU::OpName::src0_modifiers) >= 0) NewInstr.addImm(0); - if (AMDGPU::getNamedOperandIdx(NewOpcode, AMDGPU::OpName::src0) >= 0) - NewInstr->addOperand(Inst.getOperand(1)); + if (AMDGPU::hasNamedOperand(NewOpcode, AMDGPU::OpName::src0)) { + MachineOperand Src = Inst.getOperand(1); + if (AMDGPU::isTrue16Inst(NewOpcode) && ST.useRealTrue16Insts() && + Src.isReg() && RI.isVGPR(MRI, Src.getReg())) + NewInstr.addReg(Src.getReg(), 0, AMDGPU::lo16); + else + NewInstr->addOperand(Src); + } if (Opcode == AMDGPU::S_SEXT_I32_I8 || Opcode == AMDGPU::S_SEXT_I32_I16) { // We are converting these to a BFE, so we need to add the missing diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td index f07b8fa0ea4c..04c92155f5aa 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td @@ -1773,28 +1773,27 @@ class getIns64 <RegisterOperand Src0RC, RegisterOperand Src1RC, class getInsVOP3Base<RegisterOperand Src0RC, RegisterOperand Src1RC, RegisterOperand Src2RC, int NumSrcArgs, bit HasClamp, bit HasModifiers, bit HasSrc2Mods, bit HasOMod, - Operand Src0Mod, Operand Src1Mod, Operand Src2Mod, bit HasOpSel, - bit IsVOP3P> { + Operand Src0Mod, Operand Src1Mod, Operand Src2Mod, bit HasOpSel> { // getInst64 handles clamp and omod. implicit mutex between vop3p and omod dag base = getIns64 <Src0RC, Src1RC, Src2RC, NumSrcArgs, HasClamp, HasModifiers, HasSrc2Mods, HasOMod, Src0Mod, Src1Mod, Src2Mod>.ret; dag opsel = (ins op_sel0:$op_sel); - dag vop3pOpsel = (ins op_sel_hi0:$op_sel_hi); - dag vop3pFields = !con(!if(HasOpSel, vop3pOpsel, (ins)), (ins neg_lo0:$neg_lo, neg_hi0:$neg_hi)); - - dag ret = !con(base, - !if(HasOpSel, opsel,(ins)), - !if(IsVOP3P, vop3pFields,(ins))); + dag ret = !con(base, !if(HasOpSel, opsel, (ins))); } class getInsVOP3P <RegisterOperand Src0RC, RegisterOperand Src1RC, RegisterOperand Src2RC, int NumSrcArgs, bit HasClamp, bit HasOpSel, Operand Src0Mod, Operand Src1Mod, Operand Src2Mod> { - dag ret = getInsVOP3Base<Src0RC, Src1RC, Src2RC, NumSrcArgs, + dag base = getInsVOP3Base<Src0RC, Src1RC, Src2RC, NumSrcArgs, HasClamp, 1/*HasModifiers*/, 1/*HasSrc2Mods*/, - 0/*HasOMod*/, Src0Mod, Src1Mod, Src2Mod, - HasOpSel, 1/*IsVOP3P*/>.ret; + 0/*HasOMod*/, Src0Mod, Src1Mod, Src2Mod, HasOpSel>.ret; + + dag vop3pOpsel = (ins op_sel_hi0:$op_sel_hi); + dag vop3p_neg = (ins neg_lo0:$neg_lo, neg_hi0:$neg_hi); + + dag vop3pFields = !con(!if(HasOpSel, vop3pOpsel, (ins)), vop3p_neg); + dag ret = !con(base, vop3pFields); } class getInsVOP3OpSel <RegisterOperand Src0RC, RegisterOperand Src1RC, @@ -1804,7 +1803,7 @@ class getInsVOP3OpSel <RegisterOperand Src0RC, RegisterOperand Src1RC, dag ret = getInsVOP3Base<Src0RC, Src1RC, Src2RC, NumSrcArgs, HasClamp, 1/*HasModifiers*/, 1/*HasSrc2Mods*/, HasOMod, - Src0Mod, Src1Mod, Src2Mod, 1/*HasOpSel*/, 0>.ret; + Src0Mod, Src1Mod, Src2Mod, /*HasOpSel=*/1>.ret; } class getInsDPPBase <RegisterOperand OldRC, RegisterClass Src0RC, RegisterClass Src1RC, @@ -2390,9 +2389,15 @@ class VOPProfile <list<ValueType> _ArgVT, bit _EnableClamp = 0> { field dag InsDPP8 = getInsDPP8<DstRCDPP, Src0DPP, Src1DPP, Src2DPP, NumSrcArgs, HasModifiers, Src0ModDPP, Src1ModDPP, Src2ModDPP>.ret; - field dag InsVOP3Base = getInsVOP3Base<Src0VOP3DPP, Src1VOP3DPP, + defvar InsVOP3DPPBase = getInsVOP3Base<Src0VOP3DPP, Src1VOP3DPP, Src2VOP3DPP, NumSrcArgs, HasClamp, HasModifiers, HasSrc2Mods, HasOMod, - Src0ModVOP3DPP, Src1ModVOP3DPP, Src2ModVOP3DPP, HasOpSel, IsVOP3P>.ret; + Src0ModVOP3DPP, Src1ModVOP3DPP, Src2ModVOP3DPP, HasOpSel>.ret; + defvar InsVOP3PDPPBase = getInsVOP3P<Src0VOP3DPP, Src1VOP3DPP, + Src2VOP3DPP, NumSrcArgs, HasClamp, HasOpSel, + Src0ModVOP3DPP, Src1ModVOP3DPP, Src2ModVOP3DPP>.ret; + + field dag InsVOP3Base = !if(IsVOP3P, InsVOP3PDPPBase, InsVOP3DPPBase); + field dag InsVOP3DPP = getInsVOP3DPP<InsVOP3Base, DstRCVOP3DPP, NumSrcArgs>.ret; field dag InsVOP3DPP16 = getInsVOP3DPP16<InsVOP3Base, DstRCVOP3DPP, NumSrcArgs>.ret; field dag InsVOP3DPP8 = getInsVOP3DPP8<InsVOP3Base, DstRCVOP3DPP, NumSrcArgs>.ret; diff --git a/llvm/lib/Target/AMDGPU/VOP1Instructions.td b/llvm/lib/Target/AMDGPU/VOP1Instructions.td index 27a7c29cb1ac..99960c94e598 100644 --- a/llvm/lib/Target/AMDGPU/VOP1Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP1Instructions.td @@ -74,6 +74,7 @@ class VOP1_Real <VOP1_Pseudo ps, int EncodingFamily, string real_name = ps.Mnemo // copy relevant pseudo op flags let SubtargetPredicate = ps.SubtargetPredicate; + let OtherPredicates = ps.OtherPredicates; let AsmMatchConverter = ps.AsmMatchConverter; let AsmVariantName = ps.AsmVariantName; let Constraints = ps.Constraints; @@ -157,8 +158,11 @@ multiclass VOP1Inst_t16<string opName, let OtherPredicates = [NotHasTrue16BitInsts, Has16BitInsts] in { defm NAME : VOP1Inst<opName, P, node>; } - let OtherPredicates = [HasTrue16BitInsts] in { - defm _t16 : VOP1Inst<opName#"_t16", VOPProfile_Fake16<P>, node>; + let OtherPredicates = [UseRealTrue16Insts] in { + defm _t16 : VOP1Inst<opName#"_t16", VOPProfile_True16<P>, node>; + } + let OtherPredicates = [UseFakeTrue16Insts] in { + defm _fake16 : VOP1Inst<opName#"_fake16", VOPProfile_Fake16<P>, node>; } } @@ -679,6 +683,7 @@ class VOP1_DPP<bits<8> op, VOP1_DPP_Pseudo ps, VOPProfile p = ps.Pfl, bit isDPP1 let SchedRW = ps.SchedRW; let Uses = ps.Uses; let TRANS = ps.TRANS; + let OtherPredicates = ps.OtherPredicates; bits<8> vdst; let Inst{8-0} = 0xfa; @@ -707,6 +712,7 @@ class VOP1_DPP8<bits<8> op, VOP1_Pseudo ps, VOPProfile p = ps.Pfl> : let Defs = ps.Defs; let SchedRW = ps.SchedRW; let Uses = ps.Uses; + let OtherPredicates = ps.OtherPredicates; bits<8> vdst; let Inst{8-0} = fi; @@ -742,7 +748,9 @@ multiclass VOP1_Real_e32<GFXGen Gen, bits<9> op, string opName = NAME> { multiclass VOP1_Real_e32_with_name<GFXGen Gen, bits<9> op, string opName, string asmName> { defvar ps = !cast<VOP1_Pseudo>(opName#"_e32"); - let AsmString = asmName # ps.AsmOperands in { + let AsmString = asmName # ps.AsmOperands, + DecoderNamespace = Gen.DecoderNamespace # + !if(ps.Pfl.IsRealTrue16, "", "_FAKE16") in { defm NAME : VOP1_Real_e32<Gen, op, opName>; } } @@ -761,7 +769,9 @@ multiclass VOP1_Real_dpp<GFXGen Gen, bits<9> op, string opName = NAME> { multiclass VOP1_Real_dpp_with_name<GFXGen Gen, bits<9> op, string opName, string asmName> { defvar ps = !cast<VOP1_Pseudo>(opName#"_e32"); - let AsmString = asmName # ps.Pfl.AsmDPP16 in { + let AsmString = asmName # ps.Pfl.AsmDPP16, + DecoderNamespace = "DPP" # Gen.DecoderNamespace # + !if(ps.Pfl.IsRealTrue16, "", "_FAKE16") in { defm NAME : VOP1_Real_dpp<Gen, op, opName>; } } @@ -774,7 +784,9 @@ multiclass VOP1_Real_dpp8<GFXGen Gen, bits<9> op, string opName = NAME> { multiclass VOP1_Real_dpp8_with_name<GFXGen Gen, bits<9> op, string opName, string asmName> { defvar ps = !cast<VOP1_Pseudo>(opName#"_e32"); - let AsmString = asmName # ps.Pfl.AsmDPP8 in { + let AsmString = asmName # ps.Pfl.AsmDPP8, + DecoderNamespace = "DPP8" # Gen.DecoderNamespace # + !if(ps.Pfl.IsRealTrue16, "", "_FAKE16") in { defm NAME : VOP1_Real_dpp8<Gen, op, opName>; } } @@ -854,29 +866,30 @@ defm V_CLS_I32 : VOP1_Real_FULL_with_name_gfx11_gfx12<0x03b, "V_FFBH_I32", "v_cls_i32">; defm V_PERMLANE64_B32 : VOP1Only_Real_gfx11_gfx12<0x067>; defm V_MOV_B16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x01c, "v_mov_b16">; -defm V_NOT_B16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x069, "v_not_b16">; -defm V_CVT_I32_I16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x06a, "v_cvt_i32_i16">; -defm V_CVT_U32_U16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x06b, "v_cvt_u32_u16">; +defm V_NOT_B16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x069, "v_not_b16">; +defm V_CVT_I32_I16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x06a, "v_cvt_i32_i16">; +defm V_CVT_U32_U16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x06b, "v_cvt_u32_u16">; defm V_CVT_F16_U16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x050, "v_cvt_f16_u16">; defm V_CVT_F16_I16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x051, "v_cvt_f16_i16">; defm V_CVT_U16_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x052, "v_cvt_u16_f16">; defm V_CVT_I16_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x053, "v_cvt_i16_f16">; -defm V_RCP_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x054, "v_rcp_f16">; -defm V_SQRT_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x055, "v_sqrt_f16">; -defm V_RSQ_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x056, "v_rsq_f16">; -defm V_LOG_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x057, "v_log_f16">; -defm V_EXP_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x058, "v_exp_f16">; -defm V_FREXP_MANT_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x059, "v_frexp_mant_f16">; +defm V_RCP_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x054, "v_rcp_f16">; +defm V_SQRT_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x055, "v_sqrt_f16">; +defm V_RSQ_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x056, "v_rsq_f16">; +defm V_LOG_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x057, "v_log_f16">; +defm V_EXP_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x058, "v_exp_f16">; +defm V_FREXP_MANT_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x059, "v_frexp_mant_f16">; defm V_FREXP_EXP_I16_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x05a, "v_frexp_exp_i16_f16">; -defm V_FLOOR_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x05b, "v_floor_f16">; +defm V_FLOOR_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x05b, "v_floor_f16">; defm V_CEIL_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x05c, "v_ceil_f16">; -defm V_TRUNC_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x05d, "v_trunc_f16">; -defm V_RNDNE_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x05e, "v_rndne_f16">; -defm V_FRACT_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x05f, "v_fract_f16">; -defm V_SIN_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x060, "v_sin_f16">; -defm V_COS_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x061, "v_cos_f16">; -defm V_SAT_PK_U8_I16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x062, "v_sat_pk_u8_i16">; +defm V_CEIL_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x05c, "v_ceil_f16">; +defm V_TRUNC_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x05d, "v_trunc_f16">; +defm V_RNDNE_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x05e, "v_rndne_f16">; +defm V_FRACT_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x05f, "v_fract_f16">; +defm V_SIN_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x060, "v_sin_f16">; +defm V_COS_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x061, "v_cos_f16">; +defm V_SAT_PK_U8_I16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x062, "v_sat_pk_u8_i16">; defm V_CVT_NORM_I16_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x063, "v_cvt_norm_i16_f16">; defm V_CVT_NORM_U16_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x064, "v_cvt_norm_u16_f16">; diff --git a/llvm/lib/Target/AMDGPU/VOP2Instructions.td b/llvm/lib/Target/AMDGPU/VOP2Instructions.td index ecee61daa1c8..48d4e259bc1c 100644 --- a/llvm/lib/Target/AMDGPU/VOP2Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP2Instructions.td @@ -111,8 +111,8 @@ class VOP2_Real <VOP2_Pseudo ps, int EncodingFamily, string real_name = ps.Mnemo class VOP2_Real_Gen <VOP2_Pseudo ps, GFXGen Gen, string real_name = ps.Mnemonic> : VOP2_Real <ps, Gen.Subtarget, real_name> { - let AssemblerPredicate = !if(ps.Pfl.IsRealTrue16, UseRealTrue16Insts, - Gen.AssemblerPredicate); + let AssemblerPredicate = Gen.AssemblerPredicate; + let OtherPredicates = !if(ps.Pfl.IsRealTrue16, [UseRealTrue16Insts], []); let DecoderNamespace = Gen.DecoderNamespace# !if(ps.Pfl.IsRealTrue16, "", "_FAKE16"); } @@ -437,7 +437,7 @@ class VOP_MAC <ValueType vt0, ValueType vt1=vt0> : VOPProfile <[vt0, vt1, vt1, v let InsDPP16 = !con(InsDPP, (ins FI:$fi)); let InsVOP3Base = getInsVOP3Base<Src0VOP3DPP, Src1VOP3DPP, RegisterOperand<VGPR_32>, 3, 0, HasModifiers, HasModifiers, HasOMod, - Src0ModVOP3DPP, Src1ModVOP3DPP, Src2Mod, HasOpSel, 0/*IsVOP3P*/>.ret; + Src0ModVOP3DPP, Src1ModVOP3DPP, Src2Mod, HasOpSel>.ret; // We need a dummy src2 tied to dst to track the use of that register for s_delay_alu let InsVOPDX = (ins Src0RC32:$src0X, Src1RC32:$vsrc1X, VGPRSrc_32:$src2X); let InsVOPDXDeferred = @@ -1275,8 +1275,8 @@ class VOP2_DPP16<bits<6> op, VOP2_DPP_Pseudo ps, int subtarget, class VOP2_DPP16_Gen<bits<6> op, VOP2_DPP_Pseudo ps, GFXGen Gen, string opName = ps.OpName, VOPProfile p = ps.Pfl> : VOP2_DPP16<op, ps, Gen.Subtarget, opName, p> { - let AssemblerPredicate = !if(ps.Pfl.IsRealTrue16, UseRealTrue16Insts, - Gen.AssemblerPredicate); + let AssemblerPredicate = Gen.AssemblerPredicate; + let OtherPredicates = !if(ps.Pfl.IsRealTrue16, [UseRealTrue16Insts], []); let DecoderNamespace = "DPP"#Gen.DecoderNamespace# !if(ps.Pfl.IsRealTrue16, "", "_FAKE16"); } @@ -1304,8 +1304,8 @@ class VOP2_DPP8<bits<6> op, VOP2_Pseudo ps, class VOP2_DPP8_Gen<bits<6> op, VOP2_Pseudo ps, GFXGen Gen, VOPProfile p = ps.Pfl> : VOP2_DPP8<op, ps, p> { - let AssemblerPredicate = !if(ps.Pfl.IsRealTrue16, UseRealTrue16Insts, - Gen.AssemblerPredicate); + let AssemblerPredicate = Gen.AssemblerPredicate; + let OtherPredicates = !if(ps.Pfl.IsRealTrue16, [UseRealTrue16Insts], []); let DecoderNamespace = "DPP8"#Gen.DecoderNamespace# !if(ps.Pfl.IsRealTrue16, "", "_FAKE16"); } diff --git a/llvm/lib/Target/AMDGPU/VOPInstructions.td b/llvm/lib/Target/AMDGPU/VOPInstructions.td index fd4626d902ac..c4b9e7063093 100644 --- a/llvm/lib/Target/AMDGPU/VOPInstructions.td +++ b/llvm/lib/Target/AMDGPU/VOPInstructions.td @@ -208,8 +208,8 @@ class VOP3_Real <VOP_Pseudo ps, int EncodingFamily, string asm_name = ps.Mnemoni class VOP3_Real_Gen <VOP_Pseudo ps, GFXGen Gen, string asm_name = ps.Mnemonic> : VOP3_Real <ps, Gen.Subtarget, asm_name> { - let AssemblerPredicate = !if(ps.Pfl.IsRealTrue16, UseRealTrue16Insts, - Gen.AssemblerPredicate); + let AssemblerPredicate = Gen.AssemblerPredicate; + let OtherPredicates = !if(ps.Pfl.IsRealTrue16, [UseRealTrue16Insts], []); let DecoderNamespace = Gen.DecoderNamespace# !if(ps.Pfl.IsRealTrue16, "", "_FAKE16"); } @@ -1340,8 +1340,8 @@ class VOP3_DPP16<bits<10> op, VOP_DPP_Pseudo ps, int subtarget, class VOP3_DPP16_Gen<bits<10> op, VOP_DPP_Pseudo ps, GFXGen Gen, string opName = ps.OpName> : VOP3_DPP16 <op, ps, Gen.Subtarget, opName> { - let AssemblerPredicate = !if(ps.Pfl.IsRealTrue16, UseRealTrue16Insts, - Gen.AssemblerPredicate); + let AssemblerPredicate = Gen.AssemblerPredicate; + let OtherPredicates = !if(ps.Pfl.IsRealTrue16, [UseRealTrue16Insts], []); let DecoderNamespace = "DPP"#Gen.DecoderNamespace# !if(ps.Pfl.IsRealTrue16, "", "_FAKE16"); } @@ -1470,9 +1470,8 @@ multiclass VOP3_Real_dpp8_with_name<GFXGen Gen, bits<10> op, string opName, let AsmString = asmName # ps.Pfl.AsmVOP3DPP8, DecoderNamespace = "DPP8"#Gen.DecoderNamespace# !if(ps.Pfl.IsRealTrue16, "", "_FAKE16"), - AssemblerPredicate = !if(ps.Pfl.IsRealTrue16, UseRealTrue16Insts, - Gen.AssemblerPredicate) in { - + OtherPredicates = !if(ps.Pfl.IsRealTrue16, [UseRealTrue16Insts], + [TruePredicate]) in { defm NAME : VOP3_Real_dpp8_Base<Gen, op, opName>; } } diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp index 568085bd0ab3..f8a281032c77 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -9577,8 +9577,7 @@ static SDValue SkipExtensionForVMULL(SDNode *N, SelectionDAG &DAG) { SmallVector<SDValue, 8> Ops; SDLoc dl(N); for (unsigned i = 0; i != NumElts; ++i) { - ConstantSDNode *C = cast<ConstantSDNode>(N->getOperand(i)); - const APInt &CInt = C->getAPIntValue(); + const APInt &CInt = N->getConstantOperandAPInt(i); // Element types smaller than 32 bits are not legal, so use i32 elements. // The values are implicitly truncated so sext vs. zext doesn't matter. Ops.push_back(DAG.getConstant(CInt.zextOrTrunc(32), dl, MVT::i32)); @@ -18080,8 +18079,7 @@ SDValue ARMTargetLowering::PerformCMOVToBFICombine(SDNode *CMOV, SelectionDAG &D SDValue Op0 = CMOV->getOperand(0); SDValue Op1 = CMOV->getOperand(1); - auto CCNode = cast<ConstantSDNode>(CMOV->getOperand(2)); - auto CC = CCNode->getAPIntValue().getLimitedValue(); + auto CC = CMOV->getConstantOperandAPInt(2).getLimitedValue(); SDValue CmpZ = CMOV->getOperand(4); // The compare must be against zero. @@ -20109,8 +20107,7 @@ void ARMTargetLowering::computeKnownBitsForTargetNode(const SDValue Op, // The operand to BFI is already a mask suitable for removing the bits it // sets. - ConstantSDNode *CI = cast<ConstantSDNode>(Op.getOperand(2)); - const APInt &Mask = CI->getAPIntValue(); + const APInt &Mask = Op.getConstantOperandAPInt(2); Known.Zero &= Mask; Known.One &= Mask; return; diff --git a/llvm/lib/Target/MSP430/MSP430ISelLowering.cpp b/llvm/lib/Target/MSP430/MSP430ISelLowering.cpp index e68904863cfc..fc066f001316 100644 --- a/llvm/lib/Target/MSP430/MSP430ISelLowering.cpp +++ b/llvm/lib/Target/MSP430/MSP430ISelLowering.cpp @@ -1149,15 +1149,10 @@ SDValue MSP430TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const { // but they are different from CMP. // FIXME: since we're doing a post-processing, use a pseudoinstr here, so // lowering & isel wouldn't diverge. - bool andCC = false; - if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS)) { - if (RHSC->isZero() && LHS.hasOneUse() && - (LHS.getOpcode() == ISD::AND || - (LHS.getOpcode() == ISD::TRUNCATE && - LHS.getOperand(0).getOpcode() == ISD::AND))) { - andCC = true; - } - } + bool andCC = isNullConstant(RHS) && LHS.hasOneUse() && + (LHS.getOpcode() == ISD::AND || + (LHS.getOpcode() == ISD::TRUNCATE && + LHS.getOperand(0).getOpcode() == ISD::AND)); ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get(); SDValue TargetCC; SDValue Flag = EmitCMP(LHS, RHS, TargetCC, CC, dl, DAG); diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp index c65090d915ef..34c5569b8076 100644 --- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp @@ -2019,9 +2019,7 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, DL, RetTy, Args, Outs, retAlignment, HasVAArgs ? std::optional<std::pair<unsigned, const APInt &>>(std::make_pair( - CLI.NumFixedArgs, - cast<ConstantSDNode>(VADeclareParam->getOperand(1)) - ->getAPIntValue())) + CLI.NumFixedArgs, VADeclareParam->getConstantOperandAPInt(1))) : std::nullopt, *CB, UniqueCallSite); const char *ProtoStr = nvTM->getStrPool().save(Proto).data(); @@ -2297,7 +2295,7 @@ SDValue NVPTXTargetLowering::LowerBUILD_VECTOR(SDValue Op, if (VT == MVT::v2f16 || VT == MVT::v2bf16) Value = cast<ConstantFPSDNode>(Operand)->getValueAPF().bitcastToAPInt(); else if (VT == MVT::v2i16 || VT == MVT::v4i8) - Value = cast<ConstantSDNode>(Operand)->getAPIntValue(); + Value = Operand->getAsAPIntVal(); else llvm_unreachable("Unsupported type"); // i8 values are carried around as i16, so we need to zero out upper bits, diff --git a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td index 13665985f52e..e1cced327544 100644 --- a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td +++ b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td @@ -164,6 +164,9 @@ def True : Predicate<"true">; class hasPTX<int version>: Predicate<"Subtarget->getPTXVersion() >= " # version>; class hasSM<int version>: Predicate<"Subtarget->getSmVersion() >= " # version>; +// Explicit records for arch-accelerated SM versions +def hasSM90a : Predicate<"Subtarget->getFullSmVersion() == 901">; + // non-sync shfl instructions are not available on sm_70+ in PTX6.4+ def hasSHFL : Predicate<"!(Subtarget->getSmVersion() >= 70" "&& Subtarget->getPTXVersion() >= 64)">; diff --git a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td index 85eae44f349a..6b062a7f3912 100644 --- a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td +++ b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td @@ -6727,3 +6727,16 @@ def is_explicit_cluster: NVPTXInst<(outs Int1Regs:$d), (ins), "mov.pred\t$d, %is_explicit_cluster;", [(set Int1Regs:$d, (int_nvvm_is_explicit_cluster))]>, Requires<[hasSM<90>, hasPTX<78>]>; + +// setmaxnreg inc/dec intrinsics +let isConvergent = true in { +multiclass SET_MAXNREG<string Action, Intrinsic Intr> { + def : NVPTXInst<(outs), (ins i32imm:$reg_count), + "setmaxnreg." # Action # ".sync.aligned.u32 $reg_count;", + [(Intr timm:$reg_count)]>, + Requires<[hasSM90a, hasPTX<80>]>; +} + +defm INT_SET_MAXNREG_INC : SET_MAXNREG<"inc", int_nvvm_setmaxnreg_inc_sync_aligned_u32>; +defm INT_SET_MAXNREG_DEC : SET_MAXNREG<"dec", int_nvvm_setmaxnreg_dec_sync_aligned_u32>; +} // isConvergent diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index 235df1880b37..4e164fda1d8d 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -16241,7 +16241,7 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, // Since we are doing this pre-legalize, the RHS can be a constant of // arbitrary bitwidth which may cause issues when trying to get the value // from the underlying APInt. - auto RHSAPInt = cast<ConstantSDNode>(RHS)->getAPIntValue(); + auto RHSAPInt = RHS->getAsAPIntVal(); if (!RHSAPInt.isIntN(64)) break; diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.td b/llvm/lib/Target/PowerPC/PPCInstrInfo.td index b1601739fd45..bf756e39bd5d 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrInfo.td +++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.td @@ -1909,7 +1909,7 @@ def STWAT : X_RD5_RS5_IM5<31, 710, (outs), (ins gprc:$RST, gprc:$RA, u5imm:$RB), "stwat $RST, $RA, $RB", IIC_LdStStore>, Requires<[IsISA3_0]>; -let isTerminator = 1, isBarrier = 1, hasCtrlDep = 1 in +let isTrap = 1, hasCtrlDep = 1 in def TRAP : XForm_24<31, 4, (outs), (ins), "trap", IIC_LdStLoad, [(trap)]>; def TWI : DForm_base<3, (outs), (ins u5imm:$RST, gprc:$RA, s16imm:$D, variable_ops), diff --git a/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp b/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp index d616aaeddf41..7d42481db57f 100644 --- a/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp +++ b/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp @@ -199,6 +199,8 @@ class RISCVAsmParser : public MCTargetAsmParser { ParseStatus parseInsnDirectiveOpcode(OperandVector &Operands); ParseStatus parseInsnCDirectiveOpcode(OperandVector &Operands); ParseStatus parseGPRAsFPR(OperandVector &Operands); + template <bool IsRV64Inst> ParseStatus parseGPRPair(OperandVector &Operands); + ParseStatus parseGPRPair(OperandVector &Operands, bool IsRV64Inst); ParseStatus parseFRMArg(OperandVector &Operands); ParseStatus parseFenceArg(OperandVector &Operands); ParseStatus parseReglist(OperandVector &Operands); @@ -466,6 +468,12 @@ public: bool isGPRAsFPR() const { return isGPR() && Reg.IsGPRAsFPR; } + bool isGPRPair() const { + return Kind == KindTy::Register && + RISCVMCRegisterClasses[RISCV::GPRPairRegClassID].contains( + Reg.RegNum); + } + static bool evaluateConstantImm(const MCExpr *Expr, int64_t &Imm, RISCVMCExpr::VariantKind &VK) { if (auto *RE = dyn_cast<RISCVMCExpr>(Expr)) { @@ -1295,11 +1303,15 @@ unsigned RISCVAsmParser::checkTargetMatchPredicate(MCInst &Inst) { const MCInstrDesc &MCID = MII.get(Inst.getOpcode()); for (unsigned I = 0; I < MCID.NumOperands; ++I) { - if (MCID.operands()[I].RegClass == RISCV::GPRPF64RegClassID) { + if (MCID.operands()[I].RegClass == RISCV::GPRPairRegClassID) { const auto &Op = Inst.getOperand(I); assert(Op.isReg()); MCRegister Reg = Op.getReg(); + if (RISCVMCRegisterClasses[RISCV::GPRPairRegClassID].contains(Reg)) + continue; + + // FIXME: We should form a paired register during parsing/matching. if (((Reg.id() - RISCV::X0) & 1) != 0) return Match_RequiresEvenGPRs; } @@ -2222,6 +2234,48 @@ ParseStatus RISCVAsmParser::parseGPRAsFPR(OperandVector &Operands) { return ParseStatus::Success; } +template <bool IsRV64> +ParseStatus RISCVAsmParser::parseGPRPair(OperandVector &Operands) { + return parseGPRPair(Operands, IsRV64); +} + +ParseStatus RISCVAsmParser::parseGPRPair(OperandVector &Operands, + bool IsRV64Inst) { + // If this is not an RV64 GPRPair instruction, don't parse as a GPRPair on + // RV64 as it will prevent matching the RV64 version of the same instruction + // that doesn't use a GPRPair. + // If this is an RV64 GPRPair instruction, there is no RV32 version so we can + // still parse as a pair. + if (!IsRV64Inst && isRV64()) + return ParseStatus::NoMatch; + + if (getLexer().isNot(AsmToken::Identifier)) + return ParseStatus::NoMatch; + + StringRef Name = getLexer().getTok().getIdentifier(); + MCRegister RegNo = matchRegisterNameHelper(isRVE(), Name); + + if (!RegNo) + return ParseStatus::NoMatch; + + if (!RISCVMCRegisterClasses[RISCV::GPRRegClassID].contains(RegNo)) + return ParseStatus::NoMatch; + + if ((RegNo - RISCV::X0) & 1) + return TokError("register must be even"); + + SMLoc S = getLoc(); + SMLoc E = SMLoc::getFromPointer(S.getPointer() + Name.size()); + getLexer().Lex(); + + const MCRegisterInfo *RI = getContext().getRegisterInfo(); + unsigned Pair = RI->getMatchingSuperReg( + RegNo, RISCV::sub_gpr_even, + &RISCVMCRegisterClasses[RISCV::GPRPairRegClassID]); + Operands.push_back(RISCVOperand::createReg(Pair, S, E)); + return ParseStatus::Success; +} + ParseStatus RISCVAsmParser::parseFRMArg(OperandVector &Operands) { if (getLexer().isNot(AsmToken::Identifier)) return TokError( @@ -3335,27 +3389,6 @@ bool RISCVAsmParser::validateInstruction(MCInst &Inst, return Error(Loc, "Operand must be constant 4."); } - bool IsAMOCAS_D = Opcode == RISCV::AMOCAS_D || Opcode == RISCV::AMOCAS_D_AQ || - Opcode == RISCV::AMOCAS_D_RL || - Opcode == RISCV::AMOCAS_D_AQ_RL; - bool IsAMOCAS_Q = Opcode == RISCV::AMOCAS_Q || Opcode == RISCV::AMOCAS_Q_AQ || - Opcode == RISCV::AMOCAS_Q_RL || - Opcode == RISCV::AMOCAS_Q_AQ_RL; - if ((!isRV64() && IsAMOCAS_D) || IsAMOCAS_Q) { - unsigned Rd = Inst.getOperand(0).getReg(); - unsigned Rs2 = Inst.getOperand(2).getReg(); - assert(Rd >= RISCV::X0 && Rd <= RISCV::X31); - if ((Rd - RISCV::X0) % 2 != 0) { - SMLoc Loc = Operands[1]->getStartLoc(); - return Error(Loc, "The destination register must be even."); - } - assert(Rs2 >= RISCV::X0 && Rs2 <= RISCV::X31); - if ((Rs2 - RISCV::X0) % 2 != 0) { - SMLoc Loc = Operands[2]->getStartLoc(); - return Error(Loc, "The source register must be even."); - } - } - const MCInstrDesc &MCID = MII.get(Opcode); if (!(MCID.TSFlags & RISCVII::ConstraintMask)) return false; diff --git a/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp b/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp index ed80da14c795..4dd039159e29 100644 --- a/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp +++ b/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp @@ -171,7 +171,7 @@ static DecodeStatus DecodeGPRCRegisterClass(MCInst &Inst, uint32_t RegNo, return MCDisassembler::Success; } -static DecodeStatus DecodeGPRPF64RegisterClass(MCInst &Inst, uint32_t RegNo, +static DecodeStatus DecodeGPRPairRegisterClass(MCInst &Inst, uint32_t RegNo, uint64_t Address, const MCDisassembler *Decoder) { if (RegNo >= 32 || RegNo & 1) @@ -546,6 +546,10 @@ DecodeStatus RISCVDisassembler::getInstruction(MCInst &MI, uint64_t &Size, !STI.hasFeature(RISCV::Feature64Bit), DecoderTableRV32Zdinx32, "RV32Zdinx table (Double in Integer and rv32)"); + TRY_TO_DECODE(STI.hasFeature(RISCV::FeatureStdExtZacas) && + !STI.hasFeature(RISCV::Feature64Bit), + DecoderTableRV32Zacas32, + "RV32Zacas table (Compare-And-Swap and rv32)"); TRY_TO_DECODE_FEATURE(RISCV::FeatureStdExtZfinx, DecoderTableRVZfinx32, "RVZfinx table (Float in Integer)"); TRY_TO_DECODE_FEATURE(RISCV::FeatureVendorXVentanaCondOps, diff --git a/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp b/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp index ab8070772fe5..ae02e86baf6e 100644 --- a/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp +++ b/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp @@ -47,10 +47,50 @@ RISCVLegalizerInfo::RISCVLegalizerInfo(const RISCVSubtarget &ST) const LLT s32 = LLT::scalar(32); const LLT s64 = LLT::scalar(64); + const LLT nxv1s8 = LLT::scalable_vector(1, s8); + const LLT nxv2s8 = LLT::scalable_vector(2, s8); + const LLT nxv4s8 = LLT::scalable_vector(4, s8); + const LLT nxv8s8 = LLT::scalable_vector(8, s8); + const LLT nxv16s8 = LLT::scalable_vector(16, s8); + const LLT nxv32s8 = LLT::scalable_vector(32, s8); + const LLT nxv64s8 = LLT::scalable_vector(64, s8); + + const LLT nxv1s16 = LLT::scalable_vector(1, s16); + const LLT nxv2s16 = LLT::scalable_vector(2, s16); + const LLT nxv4s16 = LLT::scalable_vector(4, s16); + const LLT nxv8s16 = LLT::scalable_vector(8, s16); + const LLT nxv16s16 = LLT::scalable_vector(16, s16); + const LLT nxv32s16 = LLT::scalable_vector(32, s16); + + const LLT nxv1s32 = LLT::scalable_vector(1, s32); + const LLT nxv2s32 = LLT::scalable_vector(2, s32); + const LLT nxv4s32 = LLT::scalable_vector(4, s32); + const LLT nxv8s32 = LLT::scalable_vector(8, s32); + const LLT nxv16s32 = LLT::scalable_vector(16, s32); + + const LLT nxv1s64 = LLT::scalable_vector(1, s64); + const LLT nxv2s64 = LLT::scalable_vector(2, s64); + const LLT nxv4s64 = LLT::scalable_vector(4, s64); + const LLT nxv8s64 = LLT::scalable_vector(8, s64); + using namespace TargetOpcode; + auto AllVecTys = {nxv1s8, nxv2s8, nxv4s8, nxv8s8, nxv16s8, nxv32s8, + nxv64s8, nxv1s16, nxv2s16, nxv4s16, nxv8s16, nxv16s16, + nxv32s16, nxv1s32, nxv2s32, nxv4s32, nxv8s32, nxv16s32, + nxv1s64, nxv2s64, nxv4s64, nxv8s64}; + getActionDefinitionsBuilder({G_ADD, G_SUB, G_AND, G_OR, G_XOR}) .legalFor({s32, sXLen}) + .legalIf(all( + typeInSet(0, AllVecTys), + LegalityPredicate([=, &ST](const LegalityQuery &Query) { + return ST.hasVInstructions() && + (Query.Types[0].getScalarSizeInBits() != 64 || + ST.hasVInstructionsI64()) && + (Query.Types[0].getElementCount().getKnownMinValue() != 1 || + ST.getELen() == 64); + }))) .widenScalarToNextPow2(0) .clampScalar(0, s32, sXLen); diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFObjectWriter.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFObjectWriter.cpp index 0799267eaf7c..76e5b3ed4025 100644 --- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFObjectWriter.cpp +++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFObjectWriter.cpp @@ -106,6 +106,8 @@ unsigned RISCVELFObjectWriter::getRelocType(MCContext &Ctx, if (Expr->getKind() == MCExpr::Target && cast<RISCVMCExpr>(Expr)->getKind() == RISCVMCExpr::VK_RISCV_32_PCREL) return ELF::R_RISCV_32_PCREL; + if (Target.getSymA()->getKind() == MCSymbolRefExpr::VK_GOTPCREL) + return ELF::R_RISCV_GOT32_PCREL; return ELF::R_RISCV_32; case FK_Data_8: return ELF::R_RISCV_64; diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.cpp index 9db5148208b3..961b8f0afe22 100644 --- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.cpp +++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.cpp @@ -37,6 +37,13 @@ RISCVTargetELFStreamer::RISCVTargetELFStreamer(MCStreamer &S, auto &MAB = static_cast<RISCVAsmBackend &>(MCA.getBackend()); setTargetABI(RISCVABI::computeTargetABI(STI.getTargetTriple(), Features, MAB.getTargetOptions().getABIName())); + // `j label` in `.option norelax; j label; .option relax; ...; label:` needs a + // relocation to ensure the jump target is correct after linking. This is due + // to a limitation that shouldForceRelocation has to make the decision upfront + // without knowing a possibly future .option relax. When RISCVAsmParser is used, + // its ParseInstruction may call setForceRelocs as well. + if (STI.hasFeature(RISCV::FeatureRelax)) + static_cast<RISCVAsmBackend &>(MAB).setForceRelocs(); } RISCVELFStreamer &RISCVTargetELFStreamer::getStreamer() { diff --git a/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp b/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp index 103a2e2da7b9..ed2b1ceb7d6f 100644 --- a/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp +++ b/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp @@ -308,8 +308,10 @@ bool RISCVExpandPseudo::expandRV32ZdinxStore(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) { DebugLoc DL = MBBI->getDebugLoc(); const TargetRegisterInfo *TRI = STI->getRegisterInfo(); - Register Lo = TRI->getSubReg(MBBI->getOperand(0).getReg(), RISCV::sub_32); - Register Hi = TRI->getSubReg(MBBI->getOperand(0).getReg(), RISCV::sub_32_hi); + Register Lo = + TRI->getSubReg(MBBI->getOperand(0).getReg(), RISCV::sub_gpr_even); + Register Hi = + TRI->getSubReg(MBBI->getOperand(0).getReg(), RISCV::sub_gpr_odd); BuildMI(MBB, MBBI, DL, TII->get(RISCV::SW)) .addReg(Lo, getKillRegState(MBBI->getOperand(0).isKill())) .addReg(MBBI->getOperand(1).getReg()) @@ -342,8 +344,10 @@ bool RISCVExpandPseudo::expandRV32ZdinxLoad(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) { DebugLoc DL = MBBI->getDebugLoc(); const TargetRegisterInfo *TRI = STI->getRegisterInfo(); - Register Lo = TRI->getSubReg(MBBI->getOperand(0).getReg(), RISCV::sub_32); - Register Hi = TRI->getSubReg(MBBI->getOperand(0).getReg(), RISCV::sub_32_hi); + Register Lo = + TRI->getSubReg(MBBI->getOperand(0).getReg(), RISCV::sub_gpr_even); + Register Hi = + TRI->getSubReg(MBBI->getOperand(0).getReg(), RISCV::sub_gpr_odd); // If the register of operand 1 is equal to the Lo register, then swap the // order of loading the Lo and Hi statements. diff --git a/llvm/lib/Target/RISCV/RISCVFeatures.td b/llvm/lib/Target/RISCV/RISCVFeatures.td index bb7a3291085d..279509575bb5 100644 --- a/llvm/lib/Target/RISCV/RISCVFeatures.td +++ b/llvm/lib/Target/RISCV/RISCVFeatures.td @@ -736,6 +736,7 @@ def FeatureStdExtZacas def HasStdExtZacas : Predicate<"Subtarget->hasStdExtZacas()">, AssemblerPredicate<(all_of FeatureStdExtZacas), "'Zacas' (Atomic Compare-And-Swap Instructions)">; +def NoStdExtZacas : Predicate<"!Subtarget->hasStdExtZacas()">; //===----------------------------------------------------------------------===// // Vendor extensions diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 0a1a466af591..cb9ffabc4123 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -138,7 +138,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, if (Subtarget.is64Bit()) addRegisterClass(MVT::f64, &RISCV::GPRRegClass); else - addRegisterClass(MVT::f64, &RISCV::GPRPF64RegClass); + addRegisterClass(MVT::f64, &RISCV::GPRPairRegClass); } static const MVT::SimpleValueType BoolVecVTs[] = { @@ -814,8 +814,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, setOperationAction({ISD::FP_TO_SINT_SAT, ISD::FP_TO_UINT_SAT}, VT, Custom); setOperationAction({ISD::LRINT, ISD::LLRINT}, VT, Custom); - setOperationAction({ISD::AVGFLOORU, ISD::SADDSAT, ISD::UADDSAT, - ISD::SSUBSAT, ISD::USUBSAT}, + setOperationAction({ISD::AVGFLOORU, ISD::AVGCEILU, ISD::SADDSAT, + ISD::UADDSAT, ISD::SSUBSAT, ISD::USUBSAT}, VT, Legal); // Integer VTs are lowered as a series of "RISCVISD::TRUNCATE_VECTOR_VL" @@ -1185,8 +1185,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, if (VT.getVectorElementType() != MVT::i64 || Subtarget.hasStdExtV()) setOperationAction({ISD::MULHS, ISD::MULHU}, VT, Custom); - setOperationAction({ISD::AVGFLOORU, ISD::SADDSAT, ISD::UADDSAT, - ISD::SSUBSAT, ISD::USUBSAT}, + setOperationAction({ISD::AVGFLOORU, ISD::AVGCEILU, ISD::SADDSAT, + ISD::UADDSAT, ISD::SSUBSAT, ISD::USUBSAT}, VT, Custom); setOperationAction(ISD::VSELECT, VT, Custom); @@ -5466,6 +5466,7 @@ static unsigned getRISCVVLOp(SDValue Op) { OP_CASE(SSUBSAT) OP_CASE(USUBSAT) OP_CASE(AVGFLOORU) + OP_CASE(AVGCEILU) OP_CASE(FADD) OP_CASE(FSUB) OP_CASE(FMUL) @@ -5570,7 +5571,7 @@ static bool hasMergeOp(unsigned Opcode) { Opcode <= RISCVISD::LAST_RISCV_STRICTFP_OPCODE && "not a RISC-V target specific op"); static_assert(RISCVISD::LAST_VL_VECTOR_OP - RISCVISD::FIRST_VL_VECTOR_OP == - 125 && + 126 && RISCVISD::LAST_RISCV_STRICTFP_OPCODE - ISD::FIRST_TARGET_STRICTFP_OPCODE == 21 && @@ -5596,7 +5597,7 @@ static bool hasMaskOp(unsigned Opcode) { Opcode <= RISCVISD::LAST_RISCV_STRICTFP_OPCODE && "not a RISC-V target specific op"); static_assert(RISCVISD::LAST_VL_VECTOR_OP - RISCVISD::FIRST_VL_VECTOR_OP == - 125 && + 126 && RISCVISD::LAST_RISCV_STRICTFP_OPCODE - ISD::FIRST_TARGET_STRICTFP_OPCODE == 21 && @@ -6461,6 +6462,7 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op, return SplitVectorOp(Op, DAG); [[fallthrough]]; case ISD::AVGFLOORU: + case ISD::AVGCEILU: case ISD::SADDSAT: case ISD::UADDSAT: case ISD::SSUBSAT: @@ -7023,8 +7025,7 @@ foldBinOpIntoSelectIfProfitable(SDNode *BO, SelectionDAG &DAG, if (!NewConstOp) return SDValue(); - const APInt &NewConstAPInt = - cast<ConstantSDNode>(NewConstOp)->getAPIntValue(); + const APInt &NewConstAPInt = NewConstOp->getAsAPIntVal(); if (!NewConstAPInt.isZero() && !NewConstAPInt.isAllOnes()) return SDValue(); @@ -7154,8 +7155,8 @@ SDValue RISCVTargetLowering::lowerSELECT(SDValue Op, SelectionDAG &DAG) const { // is SETGE/SETLE to avoid an XORI. if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV) && CCVal == ISD::SETLT) { - const APInt &TrueVal = cast<ConstantSDNode>(TrueV)->getAPIntValue(); - const APInt &FalseVal = cast<ConstantSDNode>(FalseV)->getAPIntValue(); + const APInt &TrueVal = TrueV->getAsAPIntVal(); + const APInt &FalseVal = FalseV->getAsAPIntVal(); if (TrueVal - 1 == FalseVal) return DAG.getNode(ISD::ADD, DL, VT, CondV, FalseV); if (TrueVal + 1 == FalseVal) @@ -16345,7 +16346,7 @@ static MachineBasicBlock *emitSplitF64Pseudo(MachineInstr &MI, Register SrcReg = MI.getOperand(2).getReg(); const TargetRegisterClass *SrcRC = MI.getOpcode() == RISCV::SplitF64Pseudo_INX - ? &RISCV::GPRPF64RegClass + ? &RISCV::GPRPairRegClass : &RISCV::FPR64RegClass; int FI = MF.getInfo<RISCVMachineFunctionInfo>()->getMoveF64FrameIndex(MF); @@ -16384,7 +16385,7 @@ static MachineBasicBlock *emitBuildPairF64Pseudo(MachineInstr &MI, Register HiReg = MI.getOperand(2).getReg(); const TargetRegisterClass *DstRC = - MI.getOpcode() == RISCV::BuildPairF64Pseudo_INX ? &RISCV::GPRPF64RegClass + MI.getOpcode() == RISCV::BuildPairF64Pseudo_INX ? &RISCV::GPRPairRegClass : &RISCV::FPR64RegClass; int FI = MF.getInfo<RISCVMachineFunctionInfo>()->getMoveF64FrameIndex(MF); @@ -18596,6 +18597,7 @@ const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const { NODE_NAME_CASE(UREM_VL) NODE_NAME_CASE(XOR_VL) NODE_NAME_CASE(AVGFLOORU_VL) + NODE_NAME_CASE(AVGCEILU_VL) NODE_NAME_CASE(SADDSAT_VL) NODE_NAME_CASE(UADDSAT_VL) NODE_NAME_CASE(SSUBSAT_VL) @@ -18752,7 +18754,7 @@ RISCVTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, if (VT == MVT::f32 && Subtarget.hasStdExtZfinx()) return std::make_pair(0U, &RISCV::GPRF32RegClass); if (VT == MVT::f64 && Subtarget.hasStdExtZdinx() && !Subtarget.is64Bit()) - return std::make_pair(0U, &RISCV::GPRPF64RegClass); + return std::make_pair(0U, &RISCV::GPRPairRegClass); return std::make_pair(0U, &RISCV::GPRNoX0RegClass); case 'f': if (Subtarget.hasStdExtZfhmin() && VT == MVT::f16) @@ -18934,7 +18936,7 @@ RISCVTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, // Subtarget into account. if (Res.second == &RISCV::GPRF16RegClass || Res.second == &RISCV::GPRF32RegClass || - Res.second == &RISCV::GPRPF64RegClass) + Res.second == &RISCV::GPRPairRegClass) return std::make_pair(Res.first, &RISCV::GPRRegClass); return Res; @@ -19362,6 +19364,11 @@ bool RISCVTargetLowering::isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, return false; } +ISD::NodeType RISCVTargetLowering::getExtendForAtomicCmpSwapArg() const { + // Zacas will use amocas.w which does not require extension. + return Subtarget.hasStdExtZacas() ? ISD::ANY_EXTEND : ISD::SIGN_EXTEND; +} + Register RISCVTargetLowering::getExceptionPointerRegister( const Constant *PersonalityFn) const { return RISCV::X10; @@ -20017,8 +20024,13 @@ unsigned RISCVTargetLowering::getCustomCtpopCost(EVT VT, } bool RISCVTargetLowering::fallBackToDAGISel(const Instruction &Inst) const { - // At the moment, the only scalable instruction GISel knows how to lower is - // ret with scalable argument. + + // GISel support is in progress or complete for G_ADD, G_SUB, G_AND, G_OR, and + // G_XOR. + unsigned Op = Inst.getOpcode(); + if (Op == Instruction::Add || Op == Instruction::Sub || + Op == Instruction::And || Op == Instruction::Or || Op == Instruction::Xor) + return false; if (Inst.getType()->isScalableTy()) return true; diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h index 5d51fe168b04..c65953e37b17 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.h +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h @@ -255,6 +255,8 @@ enum NodeType : unsigned { // Averaging adds of unsigned integers. AVGFLOORU_VL, + // Rounding averaging adds of unsigned integers. + AVGCEILU_VL, MULHS_VL, MULHU_VL, @@ -631,9 +633,7 @@ public: return ISD::SIGN_EXTEND; } - ISD::NodeType getExtendForAtomicCmpSwapArg() const override { - return ISD::SIGN_EXTEND; - } + ISD::NodeType getExtendForAtomicCmpSwapArg() const override; bool shouldTransformSignedTruncationCheck(EVT XVT, unsigned KeptBits) const override; diff --git a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp index e591aa935c0b..6c9e529e4bfb 100644 --- a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp +++ b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp @@ -1464,20 +1464,6 @@ static void doUnion(DemandedFields &A, DemandedFields B) { A.MaskPolicy |= B.MaskPolicy; } -static bool isNonZeroAVL(const MachineOperand &MO, - const MachineRegisterInfo &MRI) { - if (MO.isReg()) { - if (MO.getReg() == RISCV::X0) - return true; - if (MachineInstr *MI = MRI.getVRegDef(MO.getReg()); - MI && isNonZeroLoadImmediate(*MI)) - return true; - return false; - } - assert(MO.isImm()); - return 0 != MO.getImm(); -} - // Return true if we can mutate PrevMI to match MI without changing any the // fields which would be observed. static bool canMutatePriorConfig(const MachineInstr &PrevMI, @@ -1491,21 +1477,26 @@ static bool canMutatePriorConfig(const MachineInstr &PrevMI, if (Used.VLAny) return false; - // We don't bother to handle the equally zero case here as it's largely - // uninteresting. if (Used.VLZeroness) { if (isVLPreservingConfig(PrevMI)) return false; - if (!isNonZeroAVL(MI.getOperand(1), MRI) || - !isNonZeroAVL(PrevMI.getOperand(1), MRI)) + if (!getInfoForVSETVLI(PrevMI).hasEquallyZeroAVL(getInfoForVSETVLI(MI), + MRI)) return false; } - // TODO: Track whether the register is defined between - // PrevMI and MI. - if (MI.getOperand(1).isReg() && - RISCV::X0 != MI.getOperand(1).getReg()) - return false; + auto &AVL = MI.getOperand(1); + auto &PrevAVL = PrevMI.getOperand(1); + assert(MRI.isSSA()); + + // If the AVL is a register, we need to make sure MI's AVL dominates PrevMI. + // For now just check that PrevMI uses the same virtual register. + if (AVL.isReg() && AVL.getReg() != RISCV::X0) { + if (AVL.getReg().isPhysical()) + return false; + if (!PrevAVL.isReg() || PrevAVL.getReg() != AVL.getReg()) + return false; + } } if (!PrevMI.getOperand(2).isImm() || !MI.getOperand(2).isImm()) diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp index 351f48c1708e..9813c7a70dfc 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp @@ -414,15 +414,16 @@ void RISCVInstrInfo::copyPhysReg(MachineBasicBlock &MBB, return; } - if (RISCV::GPRPF64RegClass.contains(DstReg, SrcReg)) { - // Emit an ADDI for both parts of GPRPF64. + if (RISCV::GPRPairRegClass.contains(DstReg, SrcReg)) { + // Emit an ADDI for both parts of GPRPair. BuildMI(MBB, MBBI, DL, get(RISCV::ADDI), - TRI->getSubReg(DstReg, RISCV::sub_32)) - .addReg(TRI->getSubReg(SrcReg, RISCV::sub_32), getKillRegState(KillSrc)) + TRI->getSubReg(DstReg, RISCV::sub_gpr_even)) + .addReg(TRI->getSubReg(SrcReg, RISCV::sub_gpr_even), + getKillRegState(KillSrc)) .addImm(0); BuildMI(MBB, MBBI, DL, get(RISCV::ADDI), - TRI->getSubReg(DstReg, RISCV::sub_32_hi)) - .addReg(TRI->getSubReg(SrcReg, RISCV::sub_32_hi), + TRI->getSubReg(DstReg, RISCV::sub_gpr_odd)) + .addReg(TRI->getSubReg(SrcReg, RISCV::sub_gpr_odd), getKillRegState(KillSrc)) .addImm(0); return; @@ -607,7 +608,7 @@ void RISCVInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, Opcode = TRI->getRegSizeInBits(RISCV::GPRRegClass) == 32 ? RISCV::SW : RISCV::SD; IsScalableVector = false; - } else if (RISCV::GPRPF64RegClass.hasSubClassEq(RC)) { + } else if (RISCV::GPRPairRegClass.hasSubClassEq(RC)) { Opcode = RISCV::PseudoRV32ZdinxSD; IsScalableVector = false; } else if (RISCV::FPR16RegClass.hasSubClassEq(RC)) { @@ -690,7 +691,7 @@ void RISCVInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, Opcode = TRI->getRegSizeInBits(RISCV::GPRRegClass) == 32 ? RISCV::LW : RISCV::LD; IsScalableVector = false; - } else if (RISCV::GPRPF64RegClass.hasSubClassEq(RC)) { + } else if (RISCV::GPRPairRegClass.hasSubClassEq(RC)) { Opcode = RISCV::PseudoRV32ZdinxLD; IsScalableVector = false; } else if (RISCV::FPR16RegClass.hasSubClassEq(RC)) { diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoA.td b/llvm/lib/Target/RISCV/RISCVInstrInfoA.td index 4d0567e41abc..44552c00c62e 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoA.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoA.td @@ -157,7 +157,16 @@ defm : AMOPat<"atomic_load_min_32", "AMOMIN_W">; defm : AMOPat<"atomic_load_umax_32", "AMOMAXU_W">; defm : AMOPat<"atomic_load_umin_32", "AMOMINU_W">; -let Predicates = [HasStdExtA] in { +defm : AMOPat<"atomic_swap_64", "AMOSWAP_D", i64, [IsRV64]>; +defm : AMOPat<"atomic_load_add_64", "AMOADD_D", i64, [IsRV64]>; +defm : AMOPat<"atomic_load_and_64", "AMOAND_D", i64, [IsRV64]>; +defm : AMOPat<"atomic_load_or_64", "AMOOR_D", i64, [IsRV64]>; +defm : AMOPat<"atomic_load_xor_64", "AMOXOR_D", i64, [IsRV64]>; +defm : AMOPat<"atomic_load_max_64", "AMOMAX_D", i64, [IsRV64]>; +defm : AMOPat<"atomic_load_min_64", "AMOMIN_D", i64, [IsRV64]>; +defm : AMOPat<"atomic_load_umax_64", "AMOMAXU_D", i64, [IsRV64]>; +defm : AMOPat<"atomic_load_umin_64", "AMOMINU_D", i64, [IsRV64]>; + /// Pseudo AMOs @@ -169,21 +178,6 @@ class PseudoAMO : Pseudo<(outs GPR:$res, GPR:$scratch), let hasSideEffects = 0; } -let Size = 20 in -def PseudoAtomicLoadNand32 : PseudoAMO; -// Ordering constants must be kept in sync with the AtomicOrdering enum in -// AtomicOrdering.h. -def : Pat<(XLenVT (atomic_load_nand_32_monotonic GPR:$addr, GPR:$incr)), - (PseudoAtomicLoadNand32 GPR:$addr, GPR:$incr, 2)>; -def : Pat<(XLenVT (atomic_load_nand_32_acquire GPR:$addr, GPR:$incr)), - (PseudoAtomicLoadNand32 GPR:$addr, GPR:$incr, 4)>; -def : Pat<(XLenVT (atomic_load_nand_32_release GPR:$addr, GPR:$incr)), - (PseudoAtomicLoadNand32 GPR:$addr, GPR:$incr, 5)>; -def : Pat<(XLenVT (atomic_load_nand_32_acq_rel GPR:$addr, GPR:$incr)), - (PseudoAtomicLoadNand32 GPR:$addr, GPR:$incr, 6)>; -def : Pat<(XLenVT (atomic_load_nand_32_seq_cst GPR:$addr, GPR:$incr)), - (PseudoAtomicLoadNand32 GPR:$addr, GPR:$incr, 7)>; - class PseudoMaskedAMO : Pseudo<(outs GPR:$res, GPR:$scratch), (ins GPR:$addr, GPR:$incr, GPR:$mask, ixlenimm:$ordering), []> { @@ -224,6 +218,23 @@ class PseudoMaskedAMOMinMaxPat<Intrinsic intrin, Pseudo AMOInst> (AMOInst GPR:$addr, GPR:$incr, GPR:$mask, GPR:$shiftamt, timm:$ordering)>; +let Predicates = [HasStdExtA] in { + +let Size = 20 in +def PseudoAtomicLoadNand32 : PseudoAMO; +// Ordering constants must be kept in sync with the AtomicOrdering enum in +// AtomicOrdering.h. +def : Pat<(XLenVT (atomic_load_nand_32_monotonic GPR:$addr, GPR:$incr)), + (PseudoAtomicLoadNand32 GPR:$addr, GPR:$incr, 2)>; +def : Pat<(XLenVT (atomic_load_nand_32_acquire GPR:$addr, GPR:$incr)), + (PseudoAtomicLoadNand32 GPR:$addr, GPR:$incr, 4)>; +def : Pat<(XLenVT (atomic_load_nand_32_release GPR:$addr, GPR:$incr)), + (PseudoAtomicLoadNand32 GPR:$addr, GPR:$incr, 5)>; +def : Pat<(XLenVT (atomic_load_nand_32_acq_rel GPR:$addr, GPR:$incr)), + (PseudoAtomicLoadNand32 GPR:$addr, GPR:$incr, 6)>; +def : Pat<(XLenVT (atomic_load_nand_32_seq_cst GPR:$addr, GPR:$incr)), + (PseudoAtomicLoadNand32 GPR:$addr, GPR:$incr, 7)>; + let Size = 28 in def PseudoMaskedAtomicSwap32 : PseudoMaskedAMO; def : PseudoMaskedAMOPat<int_riscv_masked_atomicrmw_xchg_i32, @@ -256,6 +267,43 @@ let Size = 36 in def PseudoMaskedAtomicLoadUMin32 : PseudoMaskedAMOUMinUMax; def : PseudoMaskedAMOPat<int_riscv_masked_atomicrmw_umin_i32, PseudoMaskedAtomicLoadUMin32>; +} // Predicates = [HasStdExtA] + +let Predicates = [HasStdExtA, IsRV64] in { + +let Size = 20 in +def PseudoAtomicLoadNand64 : PseudoAMO; +// Ordering constants must be kept in sync with the AtomicOrdering enum in +// AtomicOrdering.h. +def : Pat<(i64 (atomic_load_nand_64_monotonic GPR:$addr, GPR:$incr)), + (PseudoAtomicLoadNand64 GPR:$addr, GPR:$incr, 2)>; +def : Pat<(i64 (atomic_load_nand_64_acquire GPR:$addr, GPR:$incr)), + (PseudoAtomicLoadNand64 GPR:$addr, GPR:$incr, 4)>; +def : Pat<(i64 (atomic_load_nand_64_release GPR:$addr, GPR:$incr)), + (PseudoAtomicLoadNand64 GPR:$addr, GPR:$incr, 5)>; +def : Pat<(i64 (atomic_load_nand_64_acq_rel GPR:$addr, GPR:$incr)), + (PseudoAtomicLoadNand64 GPR:$addr, GPR:$incr, 6)>; +def : Pat<(i64 (atomic_load_nand_64_seq_cst GPR:$addr, GPR:$incr)), + (PseudoAtomicLoadNand64 GPR:$addr, GPR:$incr, 7)>; + +def : PseudoMaskedAMOPat<int_riscv_masked_atomicrmw_xchg_i64, + PseudoMaskedAtomicSwap32>; +def : PseudoMaskedAMOPat<int_riscv_masked_atomicrmw_add_i64, + PseudoMaskedAtomicLoadAdd32>; +def : PseudoMaskedAMOPat<int_riscv_masked_atomicrmw_sub_i64, + PseudoMaskedAtomicLoadSub32>; +def : PseudoMaskedAMOPat<int_riscv_masked_atomicrmw_nand_i64, + PseudoMaskedAtomicLoadNand32>; +def : PseudoMaskedAMOMinMaxPat<int_riscv_masked_atomicrmw_max_i64, + PseudoMaskedAtomicLoadMax32>; +def : PseudoMaskedAMOMinMaxPat<int_riscv_masked_atomicrmw_min_i64, + PseudoMaskedAtomicLoadMin32>; +def : PseudoMaskedAMOPat<int_riscv_masked_atomicrmw_umax_i64, + PseudoMaskedAtomicLoadUMax32>; +def : PseudoMaskedAMOPat<int_riscv_masked_atomicrmw_umin_i64, + PseudoMaskedAtomicLoadUMin32>; +} // Predicates = [HasStdExtA, IsRV64] + /// Compare and exchange @@ -285,9 +333,17 @@ multiclass PseudoCmpXchgPat<string Op, Pseudo CmpXchgInst, (CmpXchgInst GPR:$addr, GPR:$cmp, GPR:$new, 7)>; } +let Predicates = [HasStdExtA, NoStdExtZacas] in { def PseudoCmpXchg32 : PseudoCmpXchg; defm : PseudoCmpXchgPat<"atomic_cmp_swap_32", PseudoCmpXchg32>; +} + +let Predicates = [HasStdExtA, NoStdExtZacas, IsRV64] in { +def PseudoCmpXchg64 : PseudoCmpXchg; +defm : PseudoCmpXchgPat<"atomic_cmp_swap_64", PseudoCmpXchg64, i64>; +} +let Predicates = [HasStdExtA] in { def PseudoMaskedCmpXchg32 : Pseudo<(outs GPR:$res, GPR:$scratch), (ins GPR:$addr, GPR:$cmpval, GPR:$newval, GPR:$mask, @@ -303,60 +359,9 @@ def : Pat<(int_riscv_masked_cmpxchg_i32 GPR:$addr, GPR:$cmpval, GPR:$newval, GPR:$mask, timm:$ordering), (PseudoMaskedCmpXchg32 GPR:$addr, GPR:$cmpval, GPR:$newval, GPR:$mask, timm:$ordering)>; - } // Predicates = [HasStdExtA] -defm : AMOPat<"atomic_swap_64", "AMOSWAP_D", i64, [IsRV64]>; -defm : AMOPat<"atomic_load_add_64", "AMOADD_D", i64, [IsRV64]>; -defm : AMOPat<"atomic_load_and_64", "AMOAND_D", i64, [IsRV64]>; -defm : AMOPat<"atomic_load_or_64", "AMOOR_D", i64, [IsRV64]>; -defm : AMOPat<"atomic_load_xor_64", "AMOXOR_D", i64, [IsRV64]>; -defm : AMOPat<"atomic_load_max_64", "AMOMAX_D", i64, [IsRV64]>; -defm : AMOPat<"atomic_load_min_64", "AMOMIN_D", i64, [IsRV64]>; -defm : AMOPat<"atomic_load_umax_64", "AMOMAXU_D", i64, [IsRV64]>; -defm : AMOPat<"atomic_load_umin_64", "AMOMINU_D", i64, [IsRV64]>; - let Predicates = [HasStdExtA, IsRV64] in { - -/// 64-bit pseudo AMOs - -let Size = 20 in -def PseudoAtomicLoadNand64 : PseudoAMO; -// Ordering constants must be kept in sync with the AtomicOrdering enum in -// AtomicOrdering.h. -def : Pat<(i64 (atomic_load_nand_64_monotonic GPR:$addr, GPR:$incr)), - (PseudoAtomicLoadNand64 GPR:$addr, GPR:$incr, 2)>; -def : Pat<(i64 (atomic_load_nand_64_acquire GPR:$addr, GPR:$incr)), - (PseudoAtomicLoadNand64 GPR:$addr, GPR:$incr, 4)>; -def : Pat<(i64 (atomic_load_nand_64_release GPR:$addr, GPR:$incr)), - (PseudoAtomicLoadNand64 GPR:$addr, GPR:$incr, 5)>; -def : Pat<(i64 (atomic_load_nand_64_acq_rel GPR:$addr, GPR:$incr)), - (PseudoAtomicLoadNand64 GPR:$addr, GPR:$incr, 6)>; -def : Pat<(i64 (atomic_load_nand_64_seq_cst GPR:$addr, GPR:$incr)), - (PseudoAtomicLoadNand64 GPR:$addr, GPR:$incr, 7)>; - -def : PseudoMaskedAMOPat<int_riscv_masked_atomicrmw_xchg_i64, - PseudoMaskedAtomicSwap32>; -def : PseudoMaskedAMOPat<int_riscv_masked_atomicrmw_add_i64, - PseudoMaskedAtomicLoadAdd32>; -def : PseudoMaskedAMOPat<int_riscv_masked_atomicrmw_sub_i64, - PseudoMaskedAtomicLoadSub32>; -def : PseudoMaskedAMOPat<int_riscv_masked_atomicrmw_nand_i64, - PseudoMaskedAtomicLoadNand32>; -def : PseudoMaskedAMOMinMaxPat<int_riscv_masked_atomicrmw_max_i64, - PseudoMaskedAtomicLoadMax32>; -def : PseudoMaskedAMOMinMaxPat<int_riscv_masked_atomicrmw_min_i64, - PseudoMaskedAtomicLoadMin32>; -def : PseudoMaskedAMOPat<int_riscv_masked_atomicrmw_umax_i64, - PseudoMaskedAtomicLoadUMax32>; -def : PseudoMaskedAMOPat<int_riscv_masked_atomicrmw_umin_i64, - PseudoMaskedAtomicLoadUMin32>; - -/// 64-bit compare and exchange - -def PseudoCmpXchg64 : PseudoCmpXchg; -defm : PseudoCmpXchgPat<"atomic_cmp_swap_64", PseudoCmpXchg64, i64>; - def : Pat<(int_riscv_masked_cmpxchg_i64 GPR:$addr, GPR:$cmpval, GPR:$newval, GPR:$mask, timm:$ordering), (PseudoMaskedCmpXchg32 @@ -408,6 +413,7 @@ defm : AMOPat2<"atomic_load_min_32", "AMOMIN_W", i32>; defm : AMOPat2<"atomic_load_umax_32", "AMOMAXU_W", i32>; defm : AMOPat2<"atomic_load_umin_32", "AMOMINU_W", i32>; +let Predicates = [HasStdExtA, IsRV64] in defm : PseudoCmpXchgPat<"atomic_cmp_swap_32", PseudoCmpXchg32, i32>; let Predicates = [HasAtomicLdSt] in { diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoD.td b/llvm/lib/Target/RISCV/RISCVInstrInfoD.td index 418421b2a556..fec43d814098 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoD.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoD.td @@ -33,8 +33,8 @@ def AddrRegImmINX : ComplexPattern<iPTR, 2, "SelectAddrRegImmINX">; // Zdinx -def GPRPF64AsFPR : AsmOperandClass { - let Name = "GPRPF64AsFPR"; +def GPRPairAsFPR : AsmOperandClass { + let Name = "GPRPairAsFPR"; let ParserMethod = "parseGPRAsFPR"; let PredicateMethod = "isGPRAsFPR"; let RenderMethod = "addRegOperands"; @@ -52,8 +52,8 @@ def FPR64INX : RegisterOperand<GPR> { let DecoderMethod = "DecodeGPRRegisterClass"; } -def FPR64IN32X : RegisterOperand<GPRPF64> { - let ParserMatchClass = GPRPF64AsFPR; +def FPR64IN32X : RegisterOperand<GPRPair> { + let ParserMatchClass = GPRPairAsFPR; } def DExt : ExtInfo<"", "", [HasStdExtD], f64, FPR64, FPR32, FPR64, ?>; @@ -515,15 +515,15 @@ def PseudoFROUND_D_IN32X : PseudoFROUND<FPR64IN32X, f64>; /// Loads let isCall = 0, mayLoad = 1, mayStore = 0, Size = 8, isCodeGenOnly = 1 in -def PseudoRV32ZdinxLD : Pseudo<(outs GPRPF64:$dst), (ins GPR:$rs1, simm12:$imm12), []>; +def PseudoRV32ZdinxLD : Pseudo<(outs GPRPair:$dst), (ins GPR:$rs1, simm12:$imm12), []>; def : Pat<(f64 (load (AddrRegImmINX (XLenVT GPR:$rs1), simm12:$imm12))), (PseudoRV32ZdinxLD GPR:$rs1, simm12:$imm12)>; /// Stores let isCall = 0, mayLoad = 0, mayStore = 1, Size = 8, isCodeGenOnly = 1 in -def PseudoRV32ZdinxSD : Pseudo<(outs), (ins GPRPF64:$rs2, GPRNoX0:$rs1, simm12:$imm12), []>; -def : Pat<(store (f64 GPRPF64:$rs2), (AddrRegImmINX (XLenVT GPR:$rs1), simm12:$imm12)), - (PseudoRV32ZdinxSD GPRPF64:$rs2, GPR:$rs1, simm12:$imm12)>; +def PseudoRV32ZdinxSD : Pseudo<(outs), (ins GPRPair:$rs2, GPRNoX0:$rs1, simm12:$imm12), []>; +def : Pat<(store (f64 GPRPair:$rs2), (AddrRegImmINX (XLenVT GPR:$rs1), simm12:$imm12)), + (PseudoRV32ZdinxSD GPRPair:$rs2, GPR:$rs1, simm12:$imm12)>; /// Pseudo-instructions needed for the soft-float ABI with RV32D diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td index 4f87c36506e5..8ebd8b89c119 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td @@ -877,6 +877,23 @@ multiclass VPatMultiplyAddSDNode_VV_VX<SDNode op, string instruction_name> { } } +multiclass VPatAVGADD_VV_VX_RM<SDNode vop, int vxrm> { + foreach vti = AllIntegerVectors in { + let Predicates = GetVTypePredicates<vti>.Predicates in { + def : Pat<(vop (vti.Vector vti.RegClass:$rs1), + (vti.Vector vti.RegClass:$rs2)), + (!cast<Instruction>("PseudoVAADDU_VV_"#vti.LMul.MX) + (vti.Vector (IMPLICIT_DEF)), vti.RegClass:$rs1, vti.RegClass:$rs2, + vxrm, vti.AVL, vti.Log2SEW, TA_MA)>; + def : Pat<(vop (vti.Vector vti.RegClass:$rs1), + (vti.Vector (SplatPat (XLenVT GPR:$rs2)))), + (!cast<Instruction>("PseudoVAADDU_VX_"#vti.LMul.MX) + (vti.Vector (IMPLICIT_DEF)), vti.RegClass:$rs1, GPR:$rs2, + vxrm, vti.AVL, vti.Log2SEW, TA_MA)>; + } + } +} + //===----------------------------------------------------------------------===// // Patterns. //===----------------------------------------------------------------------===// @@ -1132,20 +1149,8 @@ defm : VPatBinarySDNode_VV_VX<ssubsat, "PseudoVSSUB">; defm : VPatBinarySDNode_VV_VX<usubsat, "PseudoVSSUBU">; // 12.2. Vector Single-Width Averaging Add and Subtract -foreach vti = AllIntegerVectors in { - let Predicates = GetVTypePredicates<vti>.Predicates in { - def : Pat<(avgflooru (vti.Vector vti.RegClass:$rs1), - (vti.Vector vti.RegClass:$rs2)), - (!cast<Instruction>("PseudoVAADDU_VV_"#vti.LMul.MX) - (vti.Vector (IMPLICIT_DEF)), vti.RegClass:$rs1, vti.RegClass:$rs2, - 0b10, vti.AVL, vti.Log2SEW, TA_MA)>; - def : Pat<(avgflooru (vti.Vector vti.RegClass:$rs1), - (vti.Vector (SplatPat (XLenVT GPR:$rs2)))), - (!cast<Instruction>("PseudoVAADDU_VX_"#vti.LMul.MX) - (vti.Vector (IMPLICIT_DEF)), vti.RegClass:$rs1, GPR:$rs2, - 0b10, vti.AVL, vti.Log2SEW, TA_MA)>; - } -} +defm : VPatAVGADD_VV_VX_RM<avgflooru, 0b10>; +defm : VPatAVGADD_VV_VX_RM<avgceilu, 0b00>; // 15. Vector Mask Instructions diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td index d60ff4b5fab0..1deb9a709463 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td @@ -112,6 +112,7 @@ def riscv_cttz_vl : SDNode<"RISCVISD::CTTZ_VL", SDT_RISCVIntUnOp_VL> def riscv_ctpop_vl : SDNode<"RISCVISD::CTPOP_VL", SDT_RISCVIntUnOp_VL>; def riscv_avgflooru_vl : SDNode<"RISCVISD::AVGFLOORU_VL", SDT_RISCVIntBinOp_VL, [SDNPCommutative]>; +def riscv_avgceilu_vl : SDNode<"RISCVISD::AVGCEILU_VL", SDT_RISCVIntBinOp_VL, [SDNPCommutative]>; def riscv_saddsat_vl : SDNode<"RISCVISD::SADDSAT_VL", SDT_RISCVIntBinOp_VL, [SDNPCommutative]>; def riscv_uaddsat_vl : SDNode<"RISCVISD::UADDSAT_VL", SDT_RISCVIntBinOp_VL, [SDNPCommutative]>; def riscv_ssubsat_vl : SDNode<"RISCVISD::SSUBSAT_VL", SDT_RISCVIntBinOp_VL>; @@ -2031,6 +2032,25 @@ multiclass VPatSlide1VL_VF<SDNode vop, string instruction_name> { } } +multiclass VPatAVGADDVL_VV_VX_RM<SDNode vop, int vxrm> { + foreach vti = AllIntegerVectors in { + let Predicates = GetVTypePredicates<vti>.Predicates in { + def : Pat<(vop (vti.Vector vti.RegClass:$rs1), + (vti.Vector vti.RegClass:$rs2), + vti.RegClass:$merge, (vti.Mask V0), VLOpFrag), + (!cast<Instruction>("PseudoVAADDU_VV_"#vti.LMul.MX#"_MASK") + vti.RegClass:$merge, vti.RegClass:$rs1, vti.RegClass:$rs2, + (vti.Mask V0), vxrm, GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; + def : Pat<(vop (vti.Vector vti.RegClass:$rs1), + (vti.Vector (SplatPat (XLenVT GPR:$rs2))), + vti.RegClass:$merge, (vti.Mask V0), VLOpFrag), + (!cast<Instruction>("PseudoVAADDU_VX_"#vti.LMul.MX#"_MASK") + vti.RegClass:$merge, vti.RegClass:$rs1, GPR:$rs2, + (vti.Mask V0), vxrm, GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; + } + } +} + //===----------------------------------------------------------------------===// // Patterns. //===----------------------------------------------------------------------===// @@ -2308,22 +2328,8 @@ defm : VPatBinaryVL_VV_VX<riscv_ssubsat_vl, "PseudoVSSUB">; defm : VPatBinaryVL_VV_VX<riscv_usubsat_vl, "PseudoVSSUBU">; // 12.2. Vector Single-Width Averaging Add and Subtract -foreach vti = AllIntegerVectors in { - let Predicates = GetVTypePredicates<vti>.Predicates in { - def : Pat<(riscv_avgflooru_vl (vti.Vector vti.RegClass:$rs1), - (vti.Vector vti.RegClass:$rs2), - vti.RegClass:$merge, (vti.Mask V0), VLOpFrag), - (!cast<Instruction>("PseudoVAADDU_VV_"#vti.LMul.MX#"_MASK") - vti.RegClass:$merge, vti.RegClass:$rs1, vti.RegClass:$rs2, - (vti.Mask V0), 0b10, GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; - def : Pat<(riscv_avgflooru_vl (vti.Vector vti.RegClass:$rs1), - (vti.Vector (SplatPat (XLenVT GPR:$rs2))), - vti.RegClass:$merge, (vti.Mask V0), VLOpFrag), - (!cast<Instruction>("PseudoVAADDU_VX_"#vti.LMul.MX#"_MASK") - vti.RegClass:$merge, vti.RegClass:$rs1, GPR:$rs2, - (vti.Mask V0), 0b10, GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; - } -} +defm : VPatAVGADDVL_VV_VX_RM<riscv_avgflooru_vl, 0b10>; +defm : VPatAVGADDVL_VV_VX_RM<riscv_avgceilu_vl, 0b00>; // 12.5. Vector Narrowing Fixed-Point Clip Instructions class VPatTruncSatClipMaxMinBase<string inst, diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZa.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZa.td index a09f5715b24f..ffcdd0010749 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoZa.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZa.td @@ -17,15 +17,107 @@ // Zacas (Atomic Compare-and-Swap) //===----------------------------------------------------------------------===// +def GPRPairRV32Operand : AsmOperandClass { + let Name = "GPRPairRV32"; + let ParserMethod = "parseGPRPair<false>"; + let PredicateMethod = "isGPRPair"; + let RenderMethod = "addRegOperands"; +} + +def GPRPairRV64Operand : AsmOperandClass { + let Name = "GPRPairRV64"; + let ParserMethod = "parseGPRPair<true>"; + let PredicateMethod = "isGPRPair"; + let RenderMethod = "addRegOperands"; +} + +def GPRPairRV32 : RegisterOperand<GPRPair> { + let ParserMatchClass = GPRPairRV32Operand; +} + +def GPRPairRV64 : RegisterOperand<GPRPair> { + let ParserMatchClass = GPRPairRV64Operand; +} + +let hasSideEffects = 0, mayLoad = 1, mayStore = 1, Constraints = "$rd = $rd_wb" in +class AMO_cas<bits<5> funct5, bit aq, bit rl, bits<3> funct3, string opcodestr, + DAGOperand RC> + : RVInstRAtomic<funct5, aq, rl, funct3, OPC_AMO, + (outs RC:$rd_wb), (ins RC:$rd, GPRMemZeroOffset:$rs1, RC:$rs2), + opcodestr, "$rd, $rs2, $rs1">; + +multiclass AMO_cas_aq_rl<bits<5> funct5, bits<3> funct3, string opcodestr, + DAGOperand RC> { + def "" : AMO_cas<funct5, 0, 0, funct3, opcodestr, RC>; + def _AQ : AMO_cas<funct5, 1, 0, funct3, opcodestr # ".aq", RC>; + def _RL : AMO_cas<funct5, 0, 1, funct3, opcodestr # ".rl", RC>; + def _AQ_RL : AMO_cas<funct5, 1, 1, funct3, opcodestr # ".aqrl", RC>; +} + let Predicates = [HasStdExtZacas] in { -defm AMOCAS_W : AMO_rr_aq_rl<0b00101, 0b010, "amocas.w">; -defm AMOCAS_D : AMO_rr_aq_rl<0b00101, 0b011, "amocas.d">; +defm AMOCAS_W : AMO_cas_aq_rl<0b00101, 0b010, "amocas.w", GPR>; } // Predicates = [HasStdExtZacas] +let Predicates = [HasStdExtZacas, IsRV32], DecoderNamespace = "RV32Zacas" in { +defm AMOCAS_D_RV32 : AMO_cas_aq_rl<0b00101, 0b011, "amocas.d", GPRPairRV32>; +} // Predicates = [HasStdExtZacas, IsRV32] + let Predicates = [HasStdExtZacas, IsRV64] in { -defm AMOCAS_Q : AMO_rr_aq_rl<0b00101, 0b100, "amocas.q">; +defm AMOCAS_D_RV64 : AMO_cas_aq_rl<0b00101, 0b011, "amocas.d", GPR>; +defm AMOCAS_Q : AMO_cas_aq_rl<0b00101, 0b100, "amocas.q", GPRPairRV64>; } // Predicates = [HasStdExtZacas, IsRV64] +multiclass AMOCASPat<string AtomicOp, string BaseInst, ValueType vt = XLenVT, + list<Predicate> ExtraPreds = []> { + let Predicates = !listconcat([HasStdExtZacas, NotHasStdExtZtso], ExtraPreds) in { + def : Pat<(!cast<PatFrag>(AtomicOp#"_monotonic") (vt GPR:$addr), + (vt GPR:$cmp), + (vt GPR:$new)), + (!cast<RVInst>(BaseInst) GPR:$cmp, GPR:$addr, GPR:$new)>; + def : Pat<(!cast<PatFrag>(AtomicOp#"_acquire") (vt GPR:$addr), + (vt GPR:$cmp), + (vt GPR:$new)), + (!cast<RVInst>(BaseInst#"_AQ") GPR:$cmp, GPR:$addr, GPR:$new)>; + def : Pat<(!cast<PatFrag>(AtomicOp#"_release") (vt GPR:$addr), + (vt GPR:$cmp), + (vt GPR:$new)), + (!cast<RVInst>(BaseInst#"_RL") GPR:$cmp, GPR:$addr, GPR:$new)>; + def : Pat<(!cast<PatFrag>(AtomicOp#"_acq_rel") (vt GPR:$addr), + (vt GPR:$cmp), + (vt GPR:$new)), + (!cast<RVInst>(BaseInst#"_AQ_RL") GPR:$cmp, GPR:$addr, GPR:$new)>; + def : Pat<(!cast<PatFrag>(AtomicOp#"_seq_cst") (vt GPR:$addr), + (vt GPR:$cmp), + (vt GPR:$new)), + (!cast<RVInst>(BaseInst#"_AQ_RL") GPR:$cmp, GPR:$addr, GPR:$new)>; + } // Predicates = !listconcat([HasStdExtZacas, NotHasStdExtZtso], ExtraPreds) + let Predicates = !listconcat([HasStdExtZacas, HasStdExtZtso], ExtraPreds) in { + def : Pat<(!cast<PatFrag>(AtomicOp#"_monotonic") (vt GPR:$addr), + (vt GPR:$cmp), + (vt GPR:$new)), + (!cast<RVInst>(BaseInst) GPR:$cmp, GPR:$addr, GPR:$new)>; + def : Pat<(!cast<PatFrag>(AtomicOp#"_acquire") (vt GPR:$addr), + (vt GPR:$cmp), + (vt GPR:$new)), + (!cast<RVInst>(BaseInst) GPR:$cmp, GPR:$addr, GPR:$new)>; + def : Pat<(!cast<PatFrag>(AtomicOp#"_release") (vt GPR:$addr), + (vt GPR:$cmp), + (vt GPR:$new)), + (!cast<RVInst>(BaseInst) GPR:$cmp, GPR:$addr, GPR:$new)>; + def : Pat<(!cast<PatFrag>(AtomicOp#"_acq_rel") (vt GPR:$addr), + (vt GPR:$cmp), + (vt GPR:$new)), + (!cast<RVInst>(BaseInst) GPR:$cmp, GPR:$addr, GPR:$new)>; + def : Pat<(!cast<PatFrag>(AtomicOp#"_seq_cst") (vt GPR:$addr), + (vt GPR:$cmp), + (vt GPR:$new)), + (!cast<RVInst>(BaseInst) GPR:$cmp, GPR:$addr, GPR:$new)>; + } // Predicates = !listconcat([HasStdExtZacas, HasStdExtZtso], ExtraPreds) +} + +defm : AMOCASPat<"atomic_cmp_swap_32", "AMOCAS_W">; +defm : AMOCASPat<"atomic_cmp_swap_64", "AMOCAS_D_RV64", i64, [IsRV64]>; + //===----------------------------------------------------------------------===// // Zawrs (Wait-on-Reservation-Set) //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/RISCV/RISCVRegisterInfo.td b/llvm/lib/Target/RISCV/RISCVRegisterInfo.td index a59d058382fe..5a4d8c4cfece 100644 --- a/llvm/lib/Target/RISCV/RISCVRegisterInfo.td +++ b/llvm/lib/Target/RISCV/RISCVRegisterInfo.td @@ -63,7 +63,10 @@ def sub_vrm1_5 : ComposedSubRegIndex<sub_vrm2_2, sub_vrm1_1>; def sub_vrm1_6 : ComposedSubRegIndex<sub_vrm2_3, sub_vrm1_0>; def sub_vrm1_7 : ComposedSubRegIndex<sub_vrm2_3, sub_vrm1_1>; -def sub_32_hi : SubRegIndex<32, 32>; +// GPR sizes change with HwMode. +// FIXME: Support HwMode in SubRegIndex? +def sub_gpr_even : SubRegIndex<-1>; +def sub_gpr_odd : SubRegIndex<-1, -1>; } // Namespace = "RISCV" // Integer registers @@ -118,6 +121,8 @@ def XLenVT : ValueTypeByHwMode<[RV32, RV64], // Allow f64 in GPR for ZDINX on RV64. def XLenFVT : ValueTypeByHwMode<[RV64], [f64]>; +def XLenPairFVT : ValueTypeByHwMode<[RV32], + [f64]>; def XLenRI : RegInfoByHwMode< [RV32, RV64], [RegInfo<32,32,32>, RegInfo<64,64,64>]>; @@ -546,33 +551,37 @@ def DUMMY_REG_PAIR_WITH_X0 : RISCVReg<0, "0">; def GPRAll : GPRRegisterClass<(add GPR, DUMMY_REG_PAIR_WITH_X0)>; let RegAltNameIndices = [ABIRegAltName] in { - def X0_PD : RISCVRegWithSubRegs<0, X0.AsmName, - [X0, DUMMY_REG_PAIR_WITH_X0], - X0.AltNames> { - let SubRegIndices = [sub_32, sub_32_hi]; + def X0_Pair : RISCVRegWithSubRegs<0, X0.AsmName, + [X0, DUMMY_REG_PAIR_WITH_X0], + X0.AltNames> { + let SubRegIndices = [sub_gpr_even, sub_gpr_odd]; let CoveredBySubRegs = 1; } foreach I = 1-15 in { defvar Index = !shl(I, 1); + defvar IndexP1 = !add(Index, 1); defvar Reg = !cast<Register>("X"#Index); - defvar RegP1 = !cast<Register>("X"#!add(Index,1)); - def X#Index#_PD : RISCVRegWithSubRegs<Index, Reg.AsmName, - [Reg, RegP1], - Reg.AltNames> { - let SubRegIndices = [sub_32, sub_32_hi]; + defvar RegP1 = !cast<Register>("X"#IndexP1); + def "X" # Index #"_X" # IndexP1 : RISCVRegWithSubRegs<Index, + Reg.AsmName, + [Reg, RegP1], + Reg.AltNames> { + let SubRegIndices = [sub_gpr_even, sub_gpr_odd]; let CoveredBySubRegs = 1; } } } -let RegInfos = RegInfoByHwMode<[RV64], [RegInfo<64, 64, 64>]> in -def GPRPF64 : RegisterClass<"RISCV", [f64], 64, (add - X10_PD, X12_PD, X14_PD, X16_PD, - X6_PD, - X28_PD, X30_PD, - X8_PD, - X18_PD, X20_PD, X22_PD, X24_PD, X26_PD, - X0_PD, X2_PD, X4_PD +let RegInfos = RegInfoByHwMode<[RV32, RV64], + [RegInfo<64, 64, 64>, RegInfo<128, 128, 128>]>, + DecoderMethod = "DecodeGPRPairRegisterClass" in +def GPRPair : RegisterClass<"RISCV", [XLenPairFVT], 64, (add + X10_X11, X12_X13, X14_X15, X16_X17, + X6_X7, + X28_X29, X30_X31, + X8_X9, + X18_X19, X20_X21, X22_X23, X24_X25, X26_X27, + X0_Pair, X2_X3, X4_X5 )>; // The register class is added for inline assembly for vector mask types. diff --git a/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp b/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp index 320f91c76057..815eca1240d8 100644 --- a/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp +++ b/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp @@ -1649,7 +1649,7 @@ void SystemZDAGToDAGISel::Select(SDNode *Node) { } } if (Node->getValueType(0) == MVT::i128) { - const APInt &Val = cast<ConstantSDNode>(Node)->getAPIntValue(); + const APInt &Val = Node->getAsAPIntVal(); SystemZVectorConstantInfo VCI(Val); if (VCI.isVectorConstantLegal(*Subtarget)) { loadVectorConstant(VCI, Node); diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp index 2450c6801a66..7d387c7b9f2f 100644 --- a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp +++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp @@ -340,6 +340,13 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM, setLibcallName(RTLIB::SHL_I128, nullptr); setLibcallName(RTLIB::SRA_I128, nullptr); + // Also expand 256 bit shifts if i128 is a legal type. + if (isTypeLegal(MVT::i128)) { + setOperationAction(ISD::SRL_PARTS, MVT::i128, Expand); + setOperationAction(ISD::SHL_PARTS, MVT::i128, Expand); + setOperationAction(ISD::SRA_PARTS, MVT::i128, Expand); + } + // Handle bitcast from fp128 to i128. if (!isTypeLegal(MVT::i128)) setOperationAction(ISD::BITCAST, MVT::i128, Custom); diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyFastISel.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyFastISel.cpp index 15dc44a04395..7f0140a5e8c6 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyFastISel.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyFastISel.cpp @@ -839,9 +839,9 @@ bool WebAssemblyFastISel::selectCall(const Instruction *I) { unsigned Reg; - if (Attrs.hasParamAttr(I, Attribute::SExt)) + if (Call->paramHasAttr(I, Attribute::SExt)) Reg = getRegForSignedValue(V); - else if (Attrs.hasParamAttr(I, Attribute::ZExt)) + else if (Call->paramHasAttr(I, Attribute::ZExt)) Reg = getRegForUnsignedValue(V); else Reg = getRegForValue(V); diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h b/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h index 304b998e1f26..e006dd877360 100644 --- a/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h +++ b/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h @@ -148,21 +148,25 @@ classifyFirstOpcodeInMacroFusion(unsigned Opcode) { case X86::AND16ri8: case X86::AND16rm: case X86::AND16rr: + case X86::AND16rr_REV: case X86::AND32i32: case X86::AND32ri: case X86::AND32ri8: case X86::AND32rm: case X86::AND32rr: + case X86::AND32rr_REV: case X86::AND64i32: case X86::AND64ri32: case X86::AND64ri8: case X86::AND64rm: case X86::AND64rr: + case X86::AND64rr_REV: case X86::AND8i8: case X86::AND8ri: case X86::AND8ri8: case X86::AND8rm: case X86::AND8rr: + case X86::AND8rr_REV: return FirstMacroFusionInstKind::And; // CMP case X86::CMP16i16: @@ -171,24 +175,28 @@ classifyFirstOpcodeInMacroFusion(unsigned Opcode) { case X86::CMP16ri8: case X86::CMP16rm: case X86::CMP16rr: + case X86::CMP16rr_REV: case X86::CMP32i32: case X86::CMP32mr: case X86::CMP32ri: case X86::CMP32ri8: case X86::CMP32rm: case X86::CMP32rr: + case X86::CMP32rr_REV: case X86::CMP64i32: case X86::CMP64mr: case X86::CMP64ri32: case X86::CMP64ri8: case X86::CMP64rm: case X86::CMP64rr: + case X86::CMP64rr_REV: case X86::CMP8i8: case X86::CMP8mr: case X86::CMP8ri: case X86::CMP8ri8: case X86::CMP8rm: case X86::CMP8rr: + case X86::CMP8rr_REV: return FirstMacroFusionInstKind::Cmp; // ADD case X86::ADD16i16: @@ -196,42 +204,50 @@ classifyFirstOpcodeInMacroFusion(unsigned Opcode) { case X86::ADD16ri8: case X86::ADD16rm: case X86::ADD16rr: + case X86::ADD16rr_REV: case X86::ADD32i32: case X86::ADD32ri: case X86::ADD32ri8: case X86::ADD32rm: case X86::ADD32rr: + case X86::ADD32rr_REV: case X86::ADD64i32: case X86::ADD64ri32: case X86::ADD64ri8: case X86::ADD64rm: case X86::ADD64rr: + case X86::ADD64rr_REV: case X86::ADD8i8: case X86::ADD8ri: case X86::ADD8ri8: case X86::ADD8rm: case X86::ADD8rr: + case X86::ADD8rr_REV: // SUB case X86::SUB16i16: case X86::SUB16ri: case X86::SUB16ri8: case X86::SUB16rm: case X86::SUB16rr: + case X86::SUB16rr_REV: case X86::SUB32i32: case X86::SUB32ri: case X86::SUB32ri8: case X86::SUB32rm: case X86::SUB32rr: + case X86::SUB32rr_REV: case X86::SUB64i32: case X86::SUB64ri32: case X86::SUB64ri8: case X86::SUB64rm: case X86::SUB64rr: + case X86::SUB64rr_REV: case X86::SUB8i8: case X86::SUB8ri: case X86::SUB8ri8: case X86::SUB8rm: case X86::SUB8rr: + case X86::SUB8rr_REV: return FirstMacroFusionInstKind::AddSub; // INC case X86::INC16r: diff --git a/llvm/lib/Target/X86/X86FlagsCopyLowering.cpp b/llvm/lib/Target/X86/X86FlagsCopyLowering.cpp index aad839b83ee1..b13bf361ab79 100644 --- a/llvm/lib/Target/X86/X86FlagsCopyLowering.cpp +++ b/llvm/lib/Target/X86/X86FlagsCopyLowering.cpp @@ -173,6 +173,7 @@ static FlagArithMnemonic getMnemonicFromOpcode(unsigned Opcode) { #define LLVM_EXPAND_ADC_SBB_INSTR(MNEMONIC) \ LLVM_EXPAND_INSTR_SIZES(MNEMONIC, rr) \ + LLVM_EXPAND_INSTR_SIZES(MNEMONIC, rr_REV) \ LLVM_EXPAND_INSTR_SIZES(MNEMONIC, rm) \ LLVM_EXPAND_INSTR_SIZES(MNEMONIC, mr) \ case X86::MNEMONIC##8ri: \ diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 5a28240ea9e2..700ab797b2f6 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -2444,6 +2444,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, ISD::SRL, ISD::OR, ISD::AND, + ISD::BITREVERSE, ISD::ADD, ISD::FADD, ISD::FSUB, @@ -4821,8 +4822,8 @@ static bool getTargetConstantBitsFromNode(SDValue Op, unsigned EltSizeInBits, APInt UndefSrcElts(NumSrcElts, 0); SmallVector<APInt, 64> SrcEltBits; - auto *CN = cast<ConstantSDNode>(Op.getOperand(0).getOperand(0)); - SrcEltBits.push_back(CN->getAPIntValue().zextOrTrunc(SrcEltSizeInBits)); + const APInt &C = Op.getOperand(0).getConstantOperandAPInt(0); + SrcEltBits.push_back(C.zextOrTrunc(SrcEltSizeInBits)); SrcEltBits.append(NumSrcElts - 1, APInt(SrcEltSizeInBits, 0)); return CastBitData(UndefSrcElts, SrcEltBits); } @@ -17223,6 +17224,7 @@ static SDValue lower1BitShuffle(const SDLoc &DL, ArrayRef<int> Mask, "Cannot lower 512-bit vectors w/o basic ISA!"); int NumElts = Mask.size(); + int NumV2Elements = count_if(Mask, [NumElts](int M) { return M >= NumElts; }); // Try to recognize shuffles that are just padding a subvector with zeros. int SubvecElts = 0; @@ -17288,17 +17290,18 @@ static SDValue lower1BitShuffle(const SDLoc &DL, ArrayRef<int> Mask, Offset += NumElts; // Increment for next iteration. } - // If we're broadcasting a SETCC result, try to broadcast the ops instead. + // If we're performing an unary shuffle on a SETCC result, try to shuffle the + // ops instead. // TODO: What other unary shuffles would benefit from this? - if (isBroadcastShuffleMask(Mask) && V1.getOpcode() == ISD::SETCC && - V1->hasOneUse()) { + if (NumV2Elements == 0 && V1.getOpcode() == ISD::SETCC && V1->hasOneUse()) { SDValue Op0 = V1.getOperand(0); SDValue Op1 = V1.getOperand(1); ISD::CondCode CC = cast<CondCodeSDNode>(V1.getOperand(2))->get(); EVT OpVT = Op0.getValueType(); - return DAG.getSetCC( - DL, VT, DAG.getVectorShuffle(OpVT, DL, Op0, DAG.getUNDEF(OpVT), Mask), - DAG.getVectorShuffle(OpVT, DL, Op1, DAG.getUNDEF(OpVT), Mask), CC); + if (OpVT.getScalarSizeInBits() >= 32 || isBroadcastShuffleMask(Mask)) + return DAG.getSetCC( + DL, VT, DAG.getVectorShuffle(OpVT, DL, Op0, DAG.getUNDEF(OpVT), Mask), + DAG.getVectorShuffle(OpVT, DL, Op1, DAG.getUNDEF(OpVT), Mask), CC); } MVT ExtVT; @@ -22551,7 +22554,7 @@ static SDValue EmitCmp(SDValue Op0, SDValue Op1, unsigned X86CC, // FIXME: Do this for non-constant compares for constant on LHS? if (CmpVT == MVT::i64 && isa<ConstantSDNode>(Op1) && !isX86CCSigned(X86CC) && Op0.hasOneUse() && // Hacky way to not break CSE opportunities with sub. - cast<ConstantSDNode>(Op1)->getAPIntValue().getActiveBits() <= 32 && + Op1->getAsAPIntVal().getActiveBits() <= 32 && DAG.MaskedValueIsZero(Op0, APInt::getHighBitsSet(64, 32))) { CmpVT = MVT::i32; Op0 = DAG.getNode(ISD::TRUNCATE, dl, CmpVT, Op0); @@ -47029,8 +47032,8 @@ static SDValue combineShiftRightArithmetic(SDNode *N, SelectionDAG &DAG, SDValue N00 = N0.getOperand(0); SDValue N01 = N0.getOperand(1); - APInt ShlConst = (cast<ConstantSDNode>(N01))->getAPIntValue(); - APInt SarConst = (cast<ConstantSDNode>(N1))->getAPIntValue(); + APInt ShlConst = N01->getAsAPIntVal(); + APInt SarConst = N1->getAsAPIntVal(); EVT CVT = N1.getValueType(); if (SarConst.isNegative()) @@ -51835,6 +51838,33 @@ static SDValue combineXor(SDNode *N, SelectionDAG &DAG, return combineFneg(N, DAG, DCI, Subtarget); } +static SDValue combineBITREVERSE(SDNode *N, SelectionDAG &DAG, + TargetLowering::DAGCombinerInfo &DCI, + const X86Subtarget &Subtarget) { + SDValue N0 = N->getOperand(0); + EVT VT = N->getValueType(0); + + // Convert a (iX bitreverse(bitcast(vXi1 X))) -> (iX bitcast(shuffle(X))) + if (VT.isInteger() && N0.getOpcode() == ISD::BITCAST && N0.hasOneUse()) { + SDValue Src = N0.getOperand(0); + EVT SrcVT = Src.getValueType(); + if (SrcVT.isVector() && SrcVT.getScalarType() == MVT::i1 && + (DCI.isBeforeLegalize() || + DAG.getTargetLoweringInfo().isTypeLegal(SrcVT)) && + Subtarget.hasSSSE3()) { + unsigned NumElts = SrcVT.getVectorNumElements(); + SmallVector<int, 32> ReverseMask(NumElts); + for (unsigned I = 0; I != NumElts; ++I) + ReverseMask[I] = (NumElts - 1) - I; + SDValue Rev = + DAG.getVectorShuffle(SrcVT, SDLoc(N), Src, Src, ReverseMask); + return DAG.getBitcast(VT, Rev); + } + } + + return SDValue(); +} + static SDValue combineBEXTR(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget &Subtarget) { @@ -56124,6 +56154,7 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N, case ISD::AND: return combineAnd(N, DAG, DCI, Subtarget); case ISD::OR: return combineOr(N, DAG, DCI, Subtarget); case ISD::XOR: return combineXor(N, DAG, DCI, Subtarget); + case ISD::BITREVERSE: return combineBITREVERSE(N, DAG, DCI, Subtarget); case X86ISD::BEXTR: case X86ISD::BEXTRI: return combineBEXTR(N, DAG, DCI, Subtarget); case ISD::LOAD: return combineLoad(N, DAG, DCI, Subtarget); diff --git a/llvm/lib/Target/X86/X86InstrArithmetic.td b/llvm/lib/Target/X86/X86InstrArithmetic.td index 5cfa95e085e3..76b0fe5f5cad 100644 --- a/llvm/lib/Target/X86/X86InstrArithmetic.td +++ b/llvm/lib/Target/X86/X86InstrArithmetic.td @@ -1107,43 +1107,85 @@ def : Pat<(store (X86adc_flag GR64:$src, (loadi64 addr:$dst), EFLAGS), // Patterns for basic arithmetic ops with relocImm for the immediate field. multiclass ArithBinOp_RF_relocImm_Pats<SDNode OpNodeFlag, SDNode OpNode> { - def : Pat<(OpNodeFlag GR8:$src1, relocImm8_su:$src2), - (!cast<Instruction>(NAME#"8ri") GR8:$src1, relocImm8_su:$src2)>; - def : Pat<(OpNodeFlag GR16:$src1, relocImm16_su:$src2), - (!cast<Instruction>(NAME#"16ri") GR16:$src1, relocImm16_su:$src2)>; - def : Pat<(OpNodeFlag GR32:$src1, relocImm32_su:$src2), - (!cast<Instruction>(NAME#"32ri") GR32:$src1, relocImm32_su:$src2)>; - def : Pat<(OpNodeFlag GR64:$src1, i64relocImmSExt32_su:$src2), - (!cast<Instruction>(NAME#"64ri32") GR64:$src1, i64relocImmSExt32_su:$src2)>; - - def : Pat<(store (OpNode (load addr:$dst), relocImm8_su:$src), addr:$dst), - (!cast<Instruction>(NAME#"8mi") addr:$dst, relocImm8_su:$src)>; - def : Pat<(store (OpNode (load addr:$dst), relocImm16_su:$src), addr:$dst), - (!cast<Instruction>(NAME#"16mi") addr:$dst, relocImm16_su:$src)>; - def : Pat<(store (OpNode (load addr:$dst), relocImm32_su:$src), addr:$dst), - (!cast<Instruction>(NAME#"32mi") addr:$dst, relocImm32_su:$src)>; - def : Pat<(store (OpNode (load addr:$dst), i64relocImmSExt32_su:$src), addr:$dst), - (!cast<Instruction>(NAME#"64mi32") addr:$dst, i64relocImmSExt32_su:$src)>; + let Predicates = [NoNDD] in { + def : Pat<(OpNodeFlag GR8:$src1, relocImm8_su:$src2), + (!cast<Instruction>(NAME#"8ri") GR8:$src1, relocImm8_su:$src2)>; + def : Pat<(OpNodeFlag GR16:$src1, relocImm16_su:$src2), + (!cast<Instruction>(NAME#"16ri") GR16:$src1, relocImm16_su:$src2)>; + def : Pat<(OpNodeFlag GR32:$src1, relocImm32_su:$src2), + (!cast<Instruction>(NAME#"32ri") GR32:$src1, relocImm32_su:$src2)>; + def : Pat<(OpNodeFlag GR64:$src1, i64relocImmSExt32_su:$src2), + (!cast<Instruction>(NAME#"64ri32") GR64:$src1, i64relocImmSExt32_su:$src2)>; + + def : Pat<(store (OpNode (load addr:$dst), relocImm8_su:$src), addr:$dst), + (!cast<Instruction>(NAME#"8mi") addr:$dst, relocImm8_su:$src)>; + def : Pat<(store (OpNode (load addr:$dst), relocImm16_su:$src), addr:$dst), + (!cast<Instruction>(NAME#"16mi") addr:$dst, relocImm16_su:$src)>; + def : Pat<(store (OpNode (load addr:$dst), relocImm32_su:$src), addr:$dst), + (!cast<Instruction>(NAME#"32mi") addr:$dst, relocImm32_su:$src)>; + def : Pat<(store (OpNode (load addr:$dst), i64relocImmSExt32_su:$src), addr:$dst), + (!cast<Instruction>(NAME#"64mi32") addr:$dst, i64relocImmSExt32_su:$src)>; + } + let Predicates = [HasNDD] in { + def : Pat<(OpNodeFlag GR8:$src1, relocImm8_su:$src2), + (!cast<Instruction>(NAME#"8ri_ND") GR8:$src1, relocImm8_su:$src2)>; + def : Pat<(OpNodeFlag GR16:$src1, relocImm16_su:$src2), + (!cast<Instruction>(NAME#"16ri_ND") GR16:$src1, relocImm16_su:$src2)>; + def : Pat<(OpNodeFlag GR32:$src1, relocImm32_su:$src2), + (!cast<Instruction>(NAME#"32ri_ND") GR32:$src1, relocImm32_su:$src2)>; + def : Pat<(OpNodeFlag GR64:$src1, i64relocImmSExt32_su:$src2), + (!cast<Instruction>(NAME#"64ri32_ND") GR64:$src1, i64relocImmSExt32_su:$src2)>; + + def : Pat<(OpNode (load addr:$dst), relocImm8_su:$src), + (!cast<Instruction>(NAME#"8mi_ND") addr:$dst, relocImm8_su:$src)>; + def : Pat<(OpNode (load addr:$dst), relocImm16_su:$src), + (!cast<Instruction>(NAME#"16mi_ND") addr:$dst, relocImm16_su:$src)>; + def : Pat<(OpNode (load addr:$dst), relocImm32_su:$src), + (!cast<Instruction>(NAME#"32mi_ND") addr:$dst, relocImm32_su:$src)>; + def : Pat<(OpNode (load addr:$dst), i64relocImmSExt32_su:$src), + (!cast<Instruction>(NAME#"64mi32_ND") addr:$dst, i64relocImmSExt32_su:$src)>; + } } multiclass ArithBinOp_RFF_relocImm_Pats<SDNode OpNodeFlag> { - def : Pat<(OpNodeFlag GR8:$src1, relocImm8_su:$src2, EFLAGS), - (!cast<Instruction>(NAME#"8ri") GR8:$src1, relocImm8_su:$src2)>; - def : Pat<(OpNodeFlag GR16:$src1, relocImm16_su:$src2, EFLAGS), - (!cast<Instruction>(NAME#"16ri") GR16:$src1, relocImm16_su:$src2)>; - def : Pat<(OpNodeFlag GR32:$src1, relocImm32_su:$src2, EFLAGS), - (!cast<Instruction>(NAME#"32ri") GR32:$src1, relocImm32_su:$src2)>; - def : Pat<(OpNodeFlag GR64:$src1, i64relocImmSExt32_su:$src2, EFLAGS), - (!cast<Instruction>(NAME#"64ri32") GR64:$src1, i64relocImmSExt32_su:$src2)>; - - def : Pat<(store (OpNodeFlag (load addr:$dst), relocImm8_su:$src, EFLAGS), addr:$dst), - (!cast<Instruction>(NAME#"8mi") addr:$dst, relocImm8_su:$src)>; - def : Pat<(store (OpNodeFlag (load addr:$dst), relocImm16_su:$src, EFLAGS), addr:$dst), - (!cast<Instruction>(NAME#"16mi") addr:$dst, relocImm16_su:$src)>; - def : Pat<(store (OpNodeFlag (load addr:$dst), relocImm32_su:$src, EFLAGS), addr:$dst), - (!cast<Instruction>(NAME#"32mi") addr:$dst, relocImm32_su:$src)>; - def : Pat<(store (OpNodeFlag (load addr:$dst), i64relocImmSExt32_su:$src, EFLAGS), addr:$dst), - (!cast<Instruction>(NAME#"64mi32") addr:$dst, i64relocImmSExt32_su:$src)>; + let Predicates = [NoNDD] in { + def : Pat<(OpNodeFlag GR8:$src1, relocImm8_su:$src2, EFLAGS), + (!cast<Instruction>(NAME#"8ri") GR8:$src1, relocImm8_su:$src2)>; + def : Pat<(OpNodeFlag GR16:$src1, relocImm16_su:$src2, EFLAGS), + (!cast<Instruction>(NAME#"16ri") GR16:$src1, relocImm16_su:$src2)>; + def : Pat<(OpNodeFlag GR32:$src1, relocImm32_su:$src2, EFLAGS), + (!cast<Instruction>(NAME#"32ri") GR32:$src1, relocImm32_su:$src2)>; + def : Pat<(OpNodeFlag GR64:$src1, i64relocImmSExt32_su:$src2, EFLAGS), + (!cast<Instruction>(NAME#"64ri32") GR64:$src1, i64relocImmSExt32_su:$src2)>; + + def : Pat<(store (OpNodeFlag (load addr:$dst), relocImm8_su:$src, EFLAGS), addr:$dst), + (!cast<Instruction>(NAME#"8mi") addr:$dst, relocImm8_su:$src)>; + def : Pat<(store (OpNodeFlag (load addr:$dst), relocImm16_su:$src, EFLAGS), addr:$dst), + (!cast<Instruction>(NAME#"16mi") addr:$dst, relocImm16_su:$src)>; + def : Pat<(store (OpNodeFlag (load addr:$dst), relocImm32_su:$src, EFLAGS), addr:$dst), + (!cast<Instruction>(NAME#"32mi") addr:$dst, relocImm32_su:$src)>; + def : Pat<(store (OpNodeFlag (load addr:$dst), i64relocImmSExt32_su:$src, EFLAGS), addr:$dst), + (!cast<Instruction>(NAME#"64mi32") addr:$dst, i64relocImmSExt32_su:$src)>; + } + let Predicates = [HasNDD] in { + def : Pat<(OpNodeFlag GR8:$src1, relocImm8_su:$src2, EFLAGS), + (!cast<Instruction>(NAME#"8ri_ND") GR8:$src1, relocImm8_su:$src2)>; + def : Pat<(OpNodeFlag GR16:$src1, relocImm16_su:$src2, EFLAGS), + (!cast<Instruction>(NAME#"16ri_ND") GR16:$src1, relocImm16_su:$src2)>; + def : Pat<(OpNodeFlag GR32:$src1, relocImm32_su:$src2, EFLAGS), + (!cast<Instruction>(NAME#"32ri_ND") GR32:$src1, relocImm32_su:$src2)>; + def : Pat<(OpNodeFlag GR64:$src1, i64relocImmSExt32_su:$src2, EFLAGS), + (!cast<Instruction>(NAME#"64ri32_ND") GR64:$src1, i64relocImmSExt32_su:$src2)>; + + def : Pat<(OpNodeFlag (load addr:$dst), relocImm8_su:$src, EFLAGS), + (!cast<Instruction>(NAME#"8mi_ND") addr:$dst, relocImm8_su:$src)>; + def : Pat<(OpNodeFlag (load addr:$dst), relocImm16_su:$src, EFLAGS), + (!cast<Instruction>(NAME#"16mi_ND") addr:$dst, relocImm16_su:$src)>; + def : Pat<(OpNodeFlag (load addr:$dst), relocImm32_su:$src, EFLAGS), + (!cast<Instruction>(NAME#"32mi_ND") addr:$dst, relocImm32_su:$src)>; + def : Pat<(OpNodeFlag (load addr:$dst), i64relocImmSExt32_su:$src, EFLAGS), + (!cast<Instruction>(NAME#"64mi32_ND") addr:$dst, i64relocImmSExt32_su:$src)>; + } } multiclass ArithBinOp_F_relocImm_Pats<SDNode OpNodeFlag> { diff --git a/llvm/lib/Target/X86/X86InstrCompiler.td b/llvm/lib/Target/X86/X86InstrCompiler.td index c77c77ee4a3e..422391a6e02a 100644 --- a/llvm/lib/Target/X86/X86InstrCompiler.td +++ b/llvm/lib/Target/X86/X86InstrCompiler.td @@ -1550,13 +1550,24 @@ def : Pat<(X86add_flag_nocf GR64:$src1, 0x0000000080000000), // AddedComplexity is needed to give priority over i64immSExt8 and i64immSExt32. let AddedComplexity = 1 in { -def : Pat<(and GR64:$src, i64immZExt32:$imm), - (SUBREG_TO_REG - (i64 0), - (AND32ri - (EXTRACT_SUBREG GR64:$src, sub_32bit), - (i32 (GetLo32XForm imm:$imm))), - sub_32bit)>; + let Predicates = [NoNDD] in { + def : Pat<(and GR64:$src, i64immZExt32:$imm), + (SUBREG_TO_REG + (i64 0), + (AND32ri + (EXTRACT_SUBREG GR64:$src, sub_32bit), + (i32 (GetLo32XForm imm:$imm))), + sub_32bit)>; + } + let Predicates = [HasNDD] in { + def : Pat<(and GR64:$src, i64immZExt32:$imm), + (SUBREG_TO_REG + (i64 0), + (AND32ri_ND + (EXTRACT_SUBREG GR64:$src, sub_32bit), + (i32 (GetLo32XForm imm:$imm))), + sub_32bit)>; + } } // AddedComplexity = 1 @@ -1762,10 +1773,18 @@ def : Pat<(X86xor_flag (i8 (trunc GR32:$src)), // where the least significant bit is not 0. However, the probability of this // happening is considered low enough that this is officially not a // "real problem". -def : Pat<(shl GR8 :$src1, (i8 1)), (ADD8rr GR8 :$src1, GR8 :$src1)>; -def : Pat<(shl GR16:$src1, (i8 1)), (ADD16rr GR16:$src1, GR16:$src1)>; -def : Pat<(shl GR32:$src1, (i8 1)), (ADD32rr GR32:$src1, GR32:$src1)>; -def : Pat<(shl GR64:$src1, (i8 1)), (ADD64rr GR64:$src1, GR64:$src1)>; +let Predicates = [NoNDD] in { + def : Pat<(shl GR8 :$src1, (i8 1)), (ADD8rr GR8 :$src1, GR8 :$src1)>; + def : Pat<(shl GR16:$src1, (i8 1)), (ADD16rr GR16:$src1, GR16:$src1)>; + def : Pat<(shl GR32:$src1, (i8 1)), (ADD32rr GR32:$src1, GR32:$src1)>; + def : Pat<(shl GR64:$src1, (i8 1)), (ADD64rr GR64:$src1, GR64:$src1)>; +} +let Predicates = [HasNDD] in { + def : Pat<(shl GR8 :$src1, (i8 1)), (ADD8rr_ND GR8 :$src1, GR8 :$src1)>; + def : Pat<(shl GR16:$src1, (i8 1)), (ADD16rr_ND GR16:$src1, GR16:$src1)>; + def : Pat<(shl GR32:$src1, (i8 1)), (ADD32rr_ND GR32:$src1, GR32:$src1)>; + def : Pat<(shl GR64:$src1, (i8 1)), (ADD64rr_ND GR64:$src1, GR64:$src1)>; +} // Shift amount is implicitly masked. multiclass MaskedShiftAmountPats<SDNode frag, string name> { @@ -1937,75 +1956,179 @@ defm : one_bit_patterns<GR64, i64, BTR64rr, BTS64rr, BTC64rr, shiftMask64>; // EFLAGS-defining Patterns //===----------------------------------------------------------------------===// -// add reg, reg -def : Pat<(add GR8 :$src1, GR8 :$src2), (ADD8rr GR8 :$src1, GR8 :$src2)>; -def : Pat<(add GR16:$src1, GR16:$src2), (ADD16rr GR16:$src1, GR16:$src2)>; -def : Pat<(add GR32:$src1, GR32:$src2), (ADD32rr GR32:$src1, GR32:$src2)>; -def : Pat<(add GR64:$src1, GR64:$src2), (ADD64rr GR64:$src1, GR64:$src2)>; - -// add reg, mem -def : Pat<(add GR8:$src1, (loadi8 addr:$src2)), - (ADD8rm GR8:$src1, addr:$src2)>; -def : Pat<(add GR16:$src1, (loadi16 addr:$src2)), - (ADD16rm GR16:$src1, addr:$src2)>; -def : Pat<(add GR32:$src1, (loadi32 addr:$src2)), - (ADD32rm GR32:$src1, addr:$src2)>; -def : Pat<(add GR64:$src1, (loadi64 addr:$src2)), - (ADD64rm GR64:$src1, addr:$src2)>; - -// add reg, imm -def : Pat<(add GR8 :$src1, imm:$src2), (ADD8ri GR8:$src1 , imm:$src2)>; -def : Pat<(add GR16:$src1, imm:$src2), (ADD16ri GR16:$src1, imm:$src2)>; -def : Pat<(add GR32:$src1, imm:$src2), (ADD32ri GR32:$src1, imm:$src2)>; -def : Pat<(add GR64:$src1, i64immSExt32:$src2), (ADD64ri32 GR64:$src1, i64immSExt32:$src2)>; - -// sub reg, reg -def : Pat<(sub GR8 :$src1, GR8 :$src2), (SUB8rr GR8 :$src1, GR8 :$src2)>; -def : Pat<(sub GR16:$src1, GR16:$src2), (SUB16rr GR16:$src1, GR16:$src2)>; -def : Pat<(sub GR32:$src1, GR32:$src2), (SUB32rr GR32:$src1, GR32:$src2)>; -def : Pat<(sub GR64:$src1, GR64:$src2), (SUB64rr GR64:$src1, GR64:$src2)>; - -// sub reg, mem -def : Pat<(sub GR8:$src1, (loadi8 addr:$src2)), - (SUB8rm GR8:$src1, addr:$src2)>; -def : Pat<(sub GR16:$src1, (loadi16 addr:$src2)), - (SUB16rm GR16:$src1, addr:$src2)>; -def : Pat<(sub GR32:$src1, (loadi32 addr:$src2)), - (SUB32rm GR32:$src1, addr:$src2)>; -def : Pat<(sub GR64:$src1, (loadi64 addr:$src2)), - (SUB64rm GR64:$src1, addr:$src2)>; - -// sub reg, imm -def : Pat<(sub GR8:$src1, imm:$src2), - (SUB8ri GR8:$src1, imm:$src2)>; -def : Pat<(sub GR16:$src1, imm:$src2), - (SUB16ri GR16:$src1, imm:$src2)>; -def : Pat<(sub GR32:$src1, imm:$src2), - (SUB32ri GR32:$src1, imm:$src2)>; -def : Pat<(sub GR64:$src1, i64immSExt32:$src2), - (SUB64ri32 GR64:$src1, i64immSExt32:$src2)>; - -// sub 0, reg -def : Pat<(X86sub_flag 0, GR8 :$src), (NEG8r GR8 :$src)>; -def : Pat<(X86sub_flag 0, GR16:$src), (NEG16r GR16:$src)>; -def : Pat<(X86sub_flag 0, GR32:$src), (NEG32r GR32:$src)>; -def : Pat<(X86sub_flag 0, GR64:$src), (NEG64r GR64:$src)>; - -// mul reg, reg -def : Pat<(mul GR16:$src1, GR16:$src2), - (IMUL16rr GR16:$src1, GR16:$src2)>; -def : Pat<(mul GR32:$src1, GR32:$src2), - (IMUL32rr GR32:$src1, GR32:$src2)>; -def : Pat<(mul GR64:$src1, GR64:$src2), - (IMUL64rr GR64:$src1, GR64:$src2)>; - -// mul reg, mem -def : Pat<(mul GR16:$src1, (loadi16 addr:$src2)), - (IMUL16rm GR16:$src1, addr:$src2)>; -def : Pat<(mul GR32:$src1, (loadi32 addr:$src2)), - (IMUL32rm GR32:$src1, addr:$src2)>; -def : Pat<(mul GR64:$src1, (loadi64 addr:$src2)), - (IMUL64rm GR64:$src1, addr:$src2)>; +multiclass EFLAGSDefiningPats<string suffix, Predicate p> { + let Predicates = [p] in { + // add reg, reg + def : Pat<(add GR8 :$src1, GR8 :$src2), (!cast<Instruction>(ADD8rr#suffix) GR8 :$src1, GR8 :$src2)>; + def : Pat<(add GR16:$src1, GR16:$src2), (!cast<Instruction>(ADD16rr#suffix) GR16:$src1, GR16:$src2)>; + def : Pat<(add GR32:$src1, GR32:$src2), (!cast<Instruction>(ADD32rr#suffix) GR32:$src1, GR32:$src2)>; + def : Pat<(add GR64:$src1, GR64:$src2), (!cast<Instruction>(ADD64rr#suffix) GR64:$src1, GR64:$src2)>; + + // add reg, mem + def : Pat<(add GR8:$src1, (loadi8 addr:$src2)), + (!cast<Instruction>(ADD8rm#suffix) GR8:$src1, addr:$src2)>; + def : Pat<(add GR16:$src1, (loadi16 addr:$src2)), + (!cast<Instruction>(ADD16rm#suffix) GR16:$src1, addr:$src2)>; + def : Pat<(add GR32:$src1, (loadi32 addr:$src2)), + (!cast<Instruction>(ADD32rm#suffix) GR32:$src1, addr:$src2)>; + def : Pat<(add GR64:$src1, (loadi64 addr:$src2)), + (!cast<Instruction>(ADD64rm#suffix) GR64:$src1, addr:$src2)>; + + // add reg, imm + def : Pat<(add GR8 :$src1, imm:$src2), (!cast<Instruction>(ADD8ri#suffix) GR8:$src1 , imm:$src2)>; + def : Pat<(add GR16:$src1, imm:$src2), (!cast<Instruction>(ADD16ri#suffix) GR16:$src1, imm:$src2)>; + def : Pat<(add GR32:$src1, imm:$src2), (!cast<Instruction>(ADD32ri#suffix) GR32:$src1, imm:$src2)>; + def : Pat<(add GR64:$src1, i64immSExt32:$src2), (!cast<Instruction>(ADD64ri32#suffix) GR64:$src1, i64immSExt32:$src2)>; + + // sub reg, reg + def : Pat<(sub GR8 :$src1, GR8 :$src2), (!cast<Instruction>(SUB8rr#suffix) GR8 :$src1, GR8 :$src2)>; + def : Pat<(sub GR16:$src1, GR16:$src2), (!cast<Instruction>(SUB16rr#suffix) GR16:$src1, GR16:$src2)>; + def : Pat<(sub GR32:$src1, GR32:$src2), (!cast<Instruction>(SUB32rr#suffix) GR32:$src1, GR32:$src2)>; + def : Pat<(sub GR64:$src1, GR64:$src2), (!cast<Instruction>(SUB64rr#suffix) GR64:$src1, GR64:$src2)>; + + // sub reg, mem + def : Pat<(sub GR8:$src1, (loadi8 addr:$src2)), + (!cast<Instruction>(SUB8rm#suffix) GR8:$src1, addr:$src2)>; + def : Pat<(sub GR16:$src1, (loadi16 addr:$src2)), + (!cast<Instruction>(SUB16rm#suffix) GR16:$src1, addr:$src2)>; + def : Pat<(sub GR32:$src1, (loadi32 addr:$src2)), + (!cast<Instruction>(SUB32rm#suffix) GR32:$src1, addr:$src2)>; + def : Pat<(sub GR64:$src1, (loadi64 addr:$src2)), + (!cast<Instruction>(SUB64rm#suffix) GR64:$src1, addr:$src2)>; + + // sub reg, imm + def : Pat<(sub GR8:$src1, imm:$src2), + (!cast<Instruction>(SUB8ri#suffix) GR8:$src1, imm:$src2)>; + def : Pat<(sub GR16:$src1, imm:$src2), + (!cast<Instruction>(SUB16ri#suffix) GR16:$src1, imm:$src2)>; + def : Pat<(sub GR32:$src1, imm:$src2), + (!cast<Instruction>(SUB32ri#suffix) GR32:$src1, imm:$src2)>; + def : Pat<(sub GR64:$src1, i64immSExt32:$src2), + (!cast<Instruction>(SUB64ri32#suffix) GR64:$src1, i64immSExt32:$src2)>; + + // sub 0, reg + def : Pat<(X86sub_flag 0, GR8 :$src), (!cast<Instruction>(NEG8r#suffix) GR8 :$src)>; + def : Pat<(X86sub_flag 0, GR16:$src), (!cast<Instruction>(NEG16r#suffix) GR16:$src)>; + def : Pat<(X86sub_flag 0, GR32:$src), (!cast<Instruction>(NEG32r#suffix) GR32:$src)>; + def : Pat<(X86sub_flag 0, GR64:$src), (!cast<Instruction>(NEG64r#suffix) GR64:$src)>; + + // mul reg, reg + def : Pat<(mul GR16:$src1, GR16:$src2), + (!cast<Instruction>(IMUL16rr#suffix) GR16:$src1, GR16:$src2)>; + def : Pat<(mul GR32:$src1, GR32:$src2), + (!cast<Instruction>(IMUL32rr#suffix) GR32:$src1, GR32:$src2)>; + def : Pat<(mul GR64:$src1, GR64:$src2), + (!cast<Instruction>(IMUL64rr#suffix) GR64:$src1, GR64:$src2)>; + + // mul reg, mem + def : Pat<(mul GR16:$src1, (loadi16 addr:$src2)), + (!cast<Instruction>(IMUL16rm#suffix) GR16:$src1, addr:$src2)>; + def : Pat<(mul GR32:$src1, (loadi32 addr:$src2)), + (!cast<Instruction>(IMUL32rm#suffix) GR32:$src1, addr:$src2)>; + def : Pat<(mul GR64:$src1, (loadi64 addr:$src2)), + (!cast<Instruction>(IMUL64rm#suffix) GR64:$src1, addr:$src2)>; + + // or reg/reg. + def : Pat<(or GR8 :$src1, GR8 :$src2), (!cast<Instruction>(OR8rr#suffix) GR8 :$src1, GR8 :$src2)>; + def : Pat<(or GR16:$src1, GR16:$src2), (!cast<Instruction>(OR16rr#suffix) GR16:$src1, GR16:$src2)>; + def : Pat<(or GR32:$src1, GR32:$src2), (!cast<Instruction>(OR32rr#suffix) GR32:$src1, GR32:$src2)>; + def : Pat<(or GR64:$src1, GR64:$src2), (!cast<Instruction>(OR64rr#suffix) GR64:$src1, GR64:$src2)>; + + // or reg/mem + def : Pat<(or GR8:$src1, (loadi8 addr:$src2)), + (!cast<Instruction>(OR8rm#suffix) GR8:$src1, addr:$src2)>; + def : Pat<(or GR16:$src1, (loadi16 addr:$src2)), + (!cast<Instruction>(OR16rm#suffix) GR16:$src1, addr:$src2)>; + def : Pat<(or GR32:$src1, (loadi32 addr:$src2)), + (!cast<Instruction>(OR32rm#suffix) GR32:$src1, addr:$src2)>; + def : Pat<(or GR64:$src1, (loadi64 addr:$src2)), + (!cast<Instruction>(OR64rm#suffix) GR64:$src1, addr:$src2)>; + + // or reg/imm + def : Pat<(or GR8:$src1 , imm:$src2), (!cast<Instruction>(OR8ri#suffix) GR8 :$src1, imm:$src2)>; + def : Pat<(or GR16:$src1, imm:$src2), (!cast<Instruction>(OR16ri#suffix) GR16:$src1, imm:$src2)>; + def : Pat<(or GR32:$src1, imm:$src2), (!cast<Instruction>(OR32ri#suffix) GR32:$src1, imm:$src2)>; + def : Pat<(or GR64:$src1, i64immSExt32:$src2), + (!cast<Instruction>(OR64ri32#suffix) GR64:$src1, i64immSExt32:$src2)>; + + // xor reg/reg + def : Pat<(xor GR8 :$src1, GR8 :$src2), (!cast<Instruction>(XOR8rr#suffix) GR8 :$src1, GR8 :$src2)>; + def : Pat<(xor GR16:$src1, GR16:$src2), (!cast<Instruction>(XOR16rr#suffix) GR16:$src1, GR16:$src2)>; + def : Pat<(xor GR32:$src1, GR32:$src2), (!cast<Instruction>(XOR32rr#suffix) GR32:$src1, GR32:$src2)>; + def : Pat<(xor GR64:$src1, GR64:$src2), (!cast<Instruction>(XOR64rr#suffix) GR64:$src1, GR64:$src2)>; + + // xor reg/mem + def : Pat<(xor GR8:$src1, (loadi8 addr:$src2)), + (!cast<Instruction>(XOR8rm#suffix) GR8:$src1, addr:$src2)>; + def : Pat<(xor GR16:$src1, (loadi16 addr:$src2)), + (!cast<Instruction>(XOR16rm#suffix) GR16:$src1, addr:$src2)>; + def : Pat<(xor GR32:$src1, (loadi32 addr:$src2)), + (!cast<Instruction>(XOR32rm#suffix) GR32:$src1, addr:$src2)>; + def : Pat<(xor GR64:$src1, (loadi64 addr:$src2)), + (!cast<Instruction>(XOR64rm#suffix) GR64:$src1, addr:$src2)>; + + // xor reg/imm + def : Pat<(xor GR8:$src1, imm:$src2), + (!cast<Instruction>(XOR8ri#suffix) GR8:$src1, imm:$src2)>; + def : Pat<(xor GR16:$src1, imm:$src2), + (!cast<Instruction>(XOR16ri#suffix) GR16:$src1, imm:$src2)>; + def : Pat<(xor GR32:$src1, imm:$src2), + (!cast<Instruction>(XOR32ri#suffix) GR32:$src1, imm:$src2)>; + def : Pat<(xor GR64:$src1, i64immSExt32:$src2), + (!cast<Instruction>(XOR64ri32#suffix) GR64:$src1, i64immSExt32:$src2)>; + + // and reg/reg + def : Pat<(and GR8 :$src1, GR8 :$src2), (!cast<Instruction>(AND8rr#suffix) GR8 :$src1, GR8 :$src2)>; + def : Pat<(and GR16:$src1, GR16:$src2), (!cast<Instruction>(AND16rr#suffix) GR16:$src1, GR16:$src2)>; + def : Pat<(and GR32:$src1, GR32:$src2), (!cast<Instruction>(AND32rr#suffix) GR32:$src1, GR32:$src2)>; + def : Pat<(and GR64:$src1, GR64:$src2), (!cast<Instruction>(AND64rr#suffix) GR64:$src1, GR64:$src2)>; + + // and reg/mem + def : Pat<(and GR8:$src1, (loadi8 addr:$src2)), + (!cast<Instruction>(AND8rm#suffix) GR8:$src1, addr:$src2)>; + def : Pat<(and GR16:$src1, (loadi16 addr:$src2)), + (!cast<Instruction>(AND16rm#suffix) GR16:$src1, addr:$src2)>; + def : Pat<(and GR32:$src1, (loadi32 addr:$src2)), + (!cast<Instruction>(AND32rm#suffix) GR32:$src1, addr:$src2)>; + def : Pat<(and GR64:$src1, (loadi64 addr:$src2)), + (!cast<Instruction>(AND64rm#suffix) GR64:$src1, addr:$src2)>; + + // and reg/imm + def : Pat<(and GR8:$src1, imm:$src2), + (!cast<Instruction>(AND8ri#suffix) GR8:$src1, imm:$src2)>; + def : Pat<(and GR16:$src1, imm:$src2), + (!cast<Instruction>(AND16ri#suffix) GR16:$src1, imm:$src2)>; + def : Pat<(and GR32:$src1, imm:$src2), + (!cast<Instruction>(AND32ri#suffix) GR32:$src1, imm:$src2)>; + def : Pat<(and GR64:$src1, i64immSExt32:$src2), + (!cast<Instruction>(AND64ri32#suffix) GR64:$src1, i64immSExt32:$src2)>; + } + + // Increment/Decrement reg. + // Do not make INC/DEC if it is slow + let Predicates = [UseIncDec, p] in { + def : Pat<(add GR8:$src, 1), (!cast<Instruction>(INC8r#suffix) GR8:$src)>; + def : Pat<(add GR16:$src, 1), (!cast<Instruction>(INC16r#suffix) GR16:$src)>; + def : Pat<(add GR32:$src, 1), (!cast<Instruction>(INC32r#suffix) GR32:$src)>; + def : Pat<(add GR64:$src, 1), (!cast<Instruction>(INC64r#suffix) GR64:$src)>; + def : Pat<(add GR8:$src, -1), (!cast<Instruction>(DEC8r#suffix) GR8:$src)>; + def : Pat<(add GR16:$src, -1), (!cast<Instruction>(DEC16r#suffix) GR16:$src)>; + def : Pat<(add GR32:$src, -1), (!cast<Instruction>(DEC32r#suffix) GR32:$src)>; + def : Pat<(add GR64:$src, -1), (!cast<Instruction>(DEC64r#suffix) GR64:$src)>; + + def : Pat<(X86add_flag_nocf GR8:$src, -1), (!cast<Instruction>(DEC8r#suffix) GR8:$src)>; + def : Pat<(X86add_flag_nocf GR16:$src, -1), (!cast<Instruction>(DEC16r#suffix) GR16:$src)>; + def : Pat<(X86add_flag_nocf GR32:$src, -1), (!cast<Instruction>(DEC32r#suffix) GR32:$src)>; + def : Pat<(X86add_flag_nocf GR64:$src, -1), (!cast<Instruction>(DEC64r#suffix) GR64:$src)>; + def : Pat<(X86sub_flag_nocf GR8:$src, -1), (!cast<Instruction>(INC8r#suffix) GR8:$src)>; + def : Pat<(X86sub_flag_nocf GR16:$src, -1), (!cast<Instruction>(INC16r#suffix) GR16:$src)>; + def : Pat<(X86sub_flag_nocf GR32:$src, -1), (!cast<Instruction>(INC32r#suffix) GR32:$src)>; + def : Pat<(X86sub_flag_nocf GR64:$src, -1), (!cast<Instruction>(INC64r#suffix) GR64:$src)>; + } +} + +defm : EFLAGSDefiningPats<"", NoNDD>; +defm : EFLAGSDefiningPats<"_ND", HasNDD>; // mul reg, imm def : Pat<(mul GR16:$src1, imm:$src2), @@ -2023,103 +2146,6 @@ def : Pat<(mul (loadi32 addr:$src1), imm:$src2), def : Pat<(mul (loadi64 addr:$src1), i64immSExt32:$src2), (IMUL64rmi32 addr:$src1, i64immSExt32:$src2)>; -// Increment/Decrement reg. -// Do not make INC/DEC if it is slow -let Predicates = [UseIncDec] in { - def : Pat<(add GR8:$src, 1), (INC8r GR8:$src)>; - def : Pat<(add GR16:$src, 1), (INC16r GR16:$src)>; - def : Pat<(add GR32:$src, 1), (INC32r GR32:$src)>; - def : Pat<(add GR64:$src, 1), (INC64r GR64:$src)>; - def : Pat<(add GR8:$src, -1), (DEC8r GR8:$src)>; - def : Pat<(add GR16:$src, -1), (DEC16r GR16:$src)>; - def : Pat<(add GR32:$src, -1), (DEC32r GR32:$src)>; - def : Pat<(add GR64:$src, -1), (DEC64r GR64:$src)>; - - def : Pat<(X86add_flag_nocf GR8:$src, -1), (DEC8r GR8:$src)>; - def : Pat<(X86add_flag_nocf GR16:$src, -1), (DEC16r GR16:$src)>; - def : Pat<(X86add_flag_nocf GR32:$src, -1), (DEC32r GR32:$src)>; - def : Pat<(X86add_flag_nocf GR64:$src, -1), (DEC64r GR64:$src)>; - def : Pat<(X86sub_flag_nocf GR8:$src, -1), (INC8r GR8:$src)>; - def : Pat<(X86sub_flag_nocf GR16:$src, -1), (INC16r GR16:$src)>; - def : Pat<(X86sub_flag_nocf GR32:$src, -1), (INC32r GR32:$src)>; - def : Pat<(X86sub_flag_nocf GR64:$src, -1), (INC64r GR64:$src)>; -} - -// or reg/reg. -def : Pat<(or GR8 :$src1, GR8 :$src2), (OR8rr GR8 :$src1, GR8 :$src2)>; -def : Pat<(or GR16:$src1, GR16:$src2), (OR16rr GR16:$src1, GR16:$src2)>; -def : Pat<(or GR32:$src1, GR32:$src2), (OR32rr GR32:$src1, GR32:$src2)>; -def : Pat<(or GR64:$src1, GR64:$src2), (OR64rr GR64:$src1, GR64:$src2)>; - -// or reg/mem -def : Pat<(or GR8:$src1, (loadi8 addr:$src2)), - (OR8rm GR8:$src1, addr:$src2)>; -def : Pat<(or GR16:$src1, (loadi16 addr:$src2)), - (OR16rm GR16:$src1, addr:$src2)>; -def : Pat<(or GR32:$src1, (loadi32 addr:$src2)), - (OR32rm GR32:$src1, addr:$src2)>; -def : Pat<(or GR64:$src1, (loadi64 addr:$src2)), - (OR64rm GR64:$src1, addr:$src2)>; - -// or reg/imm -def : Pat<(or GR8:$src1 , imm:$src2), (OR8ri GR8 :$src1, imm:$src2)>; -def : Pat<(or GR16:$src1, imm:$src2), (OR16ri GR16:$src1, imm:$src2)>; -def : Pat<(or GR32:$src1, imm:$src2), (OR32ri GR32:$src1, imm:$src2)>; -def : Pat<(or GR64:$src1, i64immSExt32:$src2), - (OR64ri32 GR64:$src1, i64immSExt32:$src2)>; - -// xor reg/reg -def : Pat<(xor GR8 :$src1, GR8 :$src2), (XOR8rr GR8 :$src1, GR8 :$src2)>; -def : Pat<(xor GR16:$src1, GR16:$src2), (XOR16rr GR16:$src1, GR16:$src2)>; -def : Pat<(xor GR32:$src1, GR32:$src2), (XOR32rr GR32:$src1, GR32:$src2)>; -def : Pat<(xor GR64:$src1, GR64:$src2), (XOR64rr GR64:$src1, GR64:$src2)>; - -// xor reg/mem -def : Pat<(xor GR8:$src1, (loadi8 addr:$src2)), - (XOR8rm GR8:$src1, addr:$src2)>; -def : Pat<(xor GR16:$src1, (loadi16 addr:$src2)), - (XOR16rm GR16:$src1, addr:$src2)>; -def : Pat<(xor GR32:$src1, (loadi32 addr:$src2)), - (XOR32rm GR32:$src1, addr:$src2)>; -def : Pat<(xor GR64:$src1, (loadi64 addr:$src2)), - (XOR64rm GR64:$src1, addr:$src2)>; - -// xor reg/imm -def : Pat<(xor GR8:$src1, imm:$src2), - (XOR8ri GR8:$src1, imm:$src2)>; -def : Pat<(xor GR16:$src1, imm:$src2), - (XOR16ri GR16:$src1, imm:$src2)>; -def : Pat<(xor GR32:$src1, imm:$src2), - (XOR32ri GR32:$src1, imm:$src2)>; -def : Pat<(xor GR64:$src1, i64immSExt32:$src2), - (XOR64ri32 GR64:$src1, i64immSExt32:$src2)>; - -// and reg/reg -def : Pat<(and GR8 :$src1, GR8 :$src2), (AND8rr GR8 :$src1, GR8 :$src2)>; -def : Pat<(and GR16:$src1, GR16:$src2), (AND16rr GR16:$src1, GR16:$src2)>; -def : Pat<(and GR32:$src1, GR32:$src2), (AND32rr GR32:$src1, GR32:$src2)>; -def : Pat<(and GR64:$src1, GR64:$src2), (AND64rr GR64:$src1, GR64:$src2)>; - -// and reg/mem -def : Pat<(and GR8:$src1, (loadi8 addr:$src2)), - (AND8rm GR8:$src1, addr:$src2)>; -def : Pat<(and GR16:$src1, (loadi16 addr:$src2)), - (AND16rm GR16:$src1, addr:$src2)>; -def : Pat<(and GR32:$src1, (loadi32 addr:$src2)), - (AND32rm GR32:$src1, addr:$src2)>; -def : Pat<(and GR64:$src1, (loadi64 addr:$src2)), - (AND64rm GR64:$src1, addr:$src2)>; - -// and reg/imm -def : Pat<(and GR8:$src1, imm:$src2), - (AND8ri GR8:$src1, imm:$src2)>; -def : Pat<(and GR16:$src1, imm:$src2), - (AND16ri GR16:$src1, imm:$src2)>; -def : Pat<(and GR32:$src1, imm:$src2), - (AND32ri GR32:$src1, imm:$src2)>; -def : Pat<(and GR64:$src1, i64immSExt32:$src2), - (AND64ri32 GR64:$src1, i64immSExt32:$src2)>; - // Bit scan instruction patterns to match explicit zero-undef behavior. def : Pat<(cttz_zero_undef GR16:$src), (BSF16rr GR16:$src)>; def : Pat<(cttz_zero_undef GR32:$src), (BSF32rr GR32:$src)>; diff --git a/llvm/lib/Target/X86/X86InstrMisc.td b/llvm/lib/Target/X86/X86InstrMisc.td index 97c625a64cfc..753cf62392a1 100644 --- a/llvm/lib/Target/X86/X86InstrMisc.td +++ b/llvm/lib/Target/X86/X86InstrMisc.td @@ -1523,28 +1523,28 @@ def MOVDIR64B64_EVEX : I<0xF8, MRMSrcMem, (outs), (ins GR64:$dst, i512mem_GR64:$ // ENQCMD/S - Enqueue 64-byte command as user with 64-byte write atomicity // let SchedRW = [WriteStore], Defs = [EFLAGS] in { - def ENQCMD16 : I<0xF8, MRMSrcMem, (outs), (ins GR16:$dst, i512mem:$src), + def ENQCMD16 : I<0xF8, MRMSrcMem, (outs), (ins GR16:$dst, i512mem_GR16:$src), "enqcmd\t{$src, $dst|$dst, $src}", [(set EFLAGS, (X86enqcmd GR16:$dst, addr:$src))]>, T8, XD, AdSize16, Requires<[HasENQCMD, Not64BitMode]>; - def ENQCMD32 : I<0xF8, MRMSrcMem, (outs), (ins GR32:$dst, i512mem:$src), + def ENQCMD32 : I<0xF8, MRMSrcMem, (outs), (ins GR32:$dst, i512mem_GR32:$src), "enqcmd\t{$src, $dst|$dst, $src}", [(set EFLAGS, (X86enqcmd GR32:$dst, addr:$src))]>, T8, XD, AdSize32, Requires<[HasENQCMD]>; - def ENQCMD64 : I<0xF8, MRMSrcMem, (outs), (ins GR64:$dst, i512mem:$src), + def ENQCMD64 : I<0xF8, MRMSrcMem, (outs), (ins GR64:$dst, i512mem_GR64:$src), "enqcmd\t{$src, $dst|$dst, $src}", [(set EFLAGS, (X86enqcmd GR64:$dst, addr:$src))]>, T8, XD, AdSize64, Requires<[HasENQCMD, In64BitMode]>; - def ENQCMDS16 : I<0xF8, MRMSrcMem, (outs), (ins GR16:$dst, i512mem:$src), + def ENQCMDS16 : I<0xF8, MRMSrcMem, (outs), (ins GR16:$dst, i512mem_GR16:$src), "enqcmds\t{$src, $dst|$dst, $src}", [(set EFLAGS, (X86enqcmds GR16:$dst, addr:$src))]>, T8, XS, AdSize16, Requires<[HasENQCMD, Not64BitMode]>; - def ENQCMDS32 : I<0xF8, MRMSrcMem, (outs), (ins GR32:$dst, i512mem:$src), + def ENQCMDS32 : I<0xF8, MRMSrcMem, (outs), (ins GR32:$dst, i512mem_GR32:$src), "enqcmds\t{$src, $dst|$dst, $src}", [(set EFLAGS, (X86enqcmds GR32:$dst, addr:$src))]>, T8, XS, AdSize32, Requires<[HasENQCMD]>; - def ENQCMDS64 : I<0xF8, MRMSrcMem, (outs), (ins GR64:$dst, i512mem:$src), + def ENQCMDS64 : I<0xF8, MRMSrcMem, (outs), (ins GR64:$dst, i512mem_GR64:$src), "enqcmds\t{$src, $dst|$dst, $src}", [(set EFLAGS, (X86enqcmds GR64:$dst, addr:$src))]>, T8, XS, AdSize64, Requires<[HasENQCMD, In64BitMode]>; diff --git a/llvm/lib/Target/X86/X86PfmCounters.td b/llvm/lib/Target/X86/X86PfmCounters.td index 49ef6efc6aec..48d689549709 100644 --- a/llvm/lib/Target/X86/X86PfmCounters.td +++ b/llvm/lib/Target/X86/X86PfmCounters.td @@ -18,6 +18,10 @@ def DefaultPfmCounters : ProcPfmCounters {} def : PfmCountersDefaultBinding<DefaultPfmCounters>; // Intel X86 Counters. +defvar DefaultIntelPfmValidationCounters = [ + PfmValidationCounter<InstructionRetired, "INSTRUCTIONS_RETIRED"> +]; + def PentiumPfmCounters : ProcPfmCounters { let CycleCounter = PfmCounter<"cpu_clk_unhalted">; let UopsCounter = PfmCounter<"uops_retired">; @@ -100,6 +104,7 @@ def SandyBridgePfmCounters : ProcPfmCounters { PfmIssueCounter<"SBPort4", "uops_dispatched_port:port_4">, PfmIssueCounter<"SBPort5", "uops_dispatched_port:port_5"> ]; + let ValidationCounters = DefaultIntelPfmValidationCounters; } def : PfmCountersBinding<"sandybridge", SandyBridgePfmCounters>; def : PfmCountersBinding<"ivybridge", SandyBridgePfmCounters>; @@ -117,6 +122,7 @@ def HaswellPfmCounters : ProcPfmCounters { PfmIssueCounter<"HWPort6", "uops_executed_port:port_6">, PfmIssueCounter<"HWPort7", "uops_executed_port:port_7"> ]; + let ValidationCounters = DefaultIntelPfmValidationCounters; } def : PfmCountersBinding<"haswell", HaswellPfmCounters>; @@ -133,6 +139,7 @@ def BroadwellPfmCounters : ProcPfmCounters { PfmIssueCounter<"BWPort6", "uops_executed_port:port_6">, PfmIssueCounter<"BWPort7", "uops_executed_port:port_7"> ]; + let ValidationCounters = DefaultIntelPfmValidationCounters; } def : PfmCountersBinding<"broadwell", BroadwellPfmCounters>; @@ -149,6 +156,7 @@ def SkylakeClientPfmCounters : ProcPfmCounters { PfmIssueCounter<"SKLPort6", "uops_dispatched_port:port_6">, PfmIssueCounter<"SKLPort7", "uops_dispatched_port:port_7"> ]; + let ValidationCounters = DefaultIntelPfmValidationCounters; } def : PfmCountersBinding<"skylake", SkylakeClientPfmCounters>; @@ -165,6 +173,7 @@ def SkylakeServerPfmCounters : ProcPfmCounters { PfmIssueCounter<"SKXPort6", "uops_dispatched_port:port_6">, PfmIssueCounter<"SKXPort7", "uops_dispatched_port:port_7"> ]; + let ValidationCounters = DefaultIntelPfmValidationCounters; } def : PfmCountersBinding<"skylake-avx512", SkylakeServerPfmCounters>; def : PfmCountersBinding<"cascadelake", SkylakeServerPfmCounters>; @@ -182,6 +191,7 @@ def IceLakePfmCounters : ProcPfmCounters { PfmIssueCounter<"ICXPort6", "uops_dispatched_port:port_6">, PfmIssueCounter<"ICXPort78", "uops_dispatched_port:port_7_8"> ]; + let ValidationCounters = DefaultIntelPfmValidationCounters; } def : PfmCountersBinding<"icelake-client", IceLakePfmCounters>; def : PfmCountersBinding<"icelake-server", IceLakePfmCounters>; @@ -189,6 +199,10 @@ def : PfmCountersBinding<"rocketlake", IceLakePfmCounters>; def : PfmCountersBinding<"tigerlake", IceLakePfmCounters>; // AMD X86 Counters. +defvar DefaultAMDPfmValidationCounters = [ + PfmValidationCounter<InstructionRetired, "RETIRED_INSTRUCTIONS"> +]; + // Set basic counters for AMD cpus that we know libpfm4 supports. def DefaultAMDPfmCounters : ProcPfmCounters { let CycleCounter = PfmCounter<"cpu_clk_unhalted">; @@ -265,6 +279,7 @@ def ZnVer1PfmCounters : ProcPfmCounters { PfmIssueCounter<"ZnAGU", "ls_dispatch:ld_st_dispatch + ls_dispatch:ld_dispatch + ls_dispatch:store_dispatch">, PfmIssueCounter<"ZnDivider", "div_op_count"> ]; + let ValidationCounters = DefaultAMDPfmValidationCounters; } def : PfmCountersBinding<"znver1", ZnVer1PfmCounters>; @@ -275,6 +290,7 @@ def ZnVer2PfmCounters : ProcPfmCounters { PfmIssueCounter<"Zn2AGU", "ls_dispatch:ld_st_dispatch + ls_dispatch:ld_dispatch + ls_dispatch:store_dispatch">, PfmIssueCounter<"Zn2Divider", "div_op_count"> ]; + let ValidationCounters = DefaultAMDPfmValidationCounters; } def : PfmCountersBinding<"znver2", ZnVer2PfmCounters>; @@ -288,6 +304,7 @@ def ZnVer3PfmCounters : ProcPfmCounters { PfmIssueCounter<"Zn3Store", "ls_dispatch:store_dispatch">, PfmIssueCounter<"Zn3Divider", "div_op_count"> ]; + let ValidationCounters = DefaultAMDPfmValidationCounters; } def : PfmCountersBinding<"znver3", ZnVer3PfmCounters>; @@ -302,5 +319,6 @@ def ZnVer4PfmCounters : ProcPfmCounters { PfmIssueCounter<"Zn4Divider", "div_op_count">, PfmIssueCounter<"Zn4AGU", "ls_dispatch:ld_st_dispatch + ls_dispatch:ld_dispatch + ls_dispatch:store_dispatch"> ]; + let ValidationCounters = DefaultAMDPfmValidationCounters; } def : PfmCountersBinding<"znver4", ZnVer4PfmCounters>; diff --git a/llvm/lib/TargetParser/ARMTargetParserCommon.cpp b/llvm/lib/TargetParser/ARMTargetParserCommon.cpp index 6d3a59d532fd..45d04f9bcbfb 100644 --- a/llvm/lib/TargetParser/ARMTargetParserCommon.cpp +++ b/llvm/lib/TargetParser/ARMTargetParserCommon.cpp @@ -140,13 +140,14 @@ ARM::EndianKind ARM::parseArchEndian(StringRef Arch) { // an erroneous part of the spec. bool ARM::parseBranchProtection(StringRef Spec, ParsedBranchProtection &PBP, StringRef &Err) { - PBP = {"none", "a_key", false, false}; + PBP = {"none", "a_key", false, false, false}; if (Spec == "none") return true; // defaults are ok if (Spec == "standard") { PBP.Scope = "non-leaf"; PBP.BranchTargetEnforcement = true; + PBP.GuardedControlStack = true; return true; } @@ -173,6 +174,10 @@ bool ARM::parseBranchProtection(StringRef Spec, ParsedBranchProtection &PBP, } continue; } + if (Opt == "gcs") { + PBP.GuardedControlStack = true; + continue; + } if (Opt == "") Err = "<empty>"; else diff --git a/llvm/lib/TextAPI/InterfaceFile.cpp b/llvm/lib/TextAPI/InterfaceFile.cpp index 3689ab919191..d712ed386825 100644 --- a/llvm/lib/TextAPI/InterfaceFile.cpp +++ b/llvm/lib/TextAPI/InterfaceFile.cpp @@ -24,17 +24,23 @@ void InterfaceFileRef::addTarget(const Target &Target) { void InterfaceFile::addAllowableClient(StringRef InstallName, const Target &Target) { + if (InstallName.empty()) + return; auto Client = addEntry(AllowableClients, InstallName); Client->addTarget(Target); } void InterfaceFile::addReexportedLibrary(StringRef InstallName, const Target &Target) { + if (InstallName.empty()) + return; auto Lib = addEntry(ReexportedLibraries, InstallName); Lib->addTarget(Target); } void InterfaceFile::addParentUmbrella(const Target &Target_, StringRef Parent) { + if (Parent.empty()) + return; auto Iter = lower_bound(ParentUmbrellas, Target_, [](const std::pair<Target, std::string> &LHS, Target RHS) { return LHS.first < RHS; }); @@ -48,6 +54,8 @@ void InterfaceFile::addParentUmbrella(const Target &Target_, StringRef Parent) { } void InterfaceFile::addRPath(const Target &InputTarget, StringRef RPath) { + if (RPath.empty()) + return; using RPathEntryT = const std::pair<Target, std::string>; RPathEntryT Entry(InputTarget, RPath); auto Iter = diff --git a/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp b/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp index 70a3f3067d9d..0a6f69bc73d5 100644 --- a/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp +++ b/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp @@ -77,6 +77,16 @@ STATISTIC(MaxAllocVersionsThinBackend, "allocation during ThinLTO backend"); STATISTIC(UnclonableAllocsThinBackend, "Number of unclonable ambigous allocations during ThinLTO backend"); +STATISTIC(RemovedEdgesWithMismatchedCallees, + "Number of edges removed due to mismatched callees (profiled vs IR)"); +STATISTIC(FoundProfiledCalleeCount, + "Number of profiled callees found via tail calls"); +STATISTIC(FoundProfiledCalleeDepth, + "Aggregate depth of profiled callees found via tail calls"); +STATISTIC(FoundProfiledCalleeMaxDepth, + "Maximum depth of profiled callees found via tail calls"); +STATISTIC(FoundProfiledCalleeNonUniquelyCount, + "Number of profiled callees found via multiple tail call chains"); static cl::opt<std::string> DotFilePathPrefix( "memprof-dot-file-path-prefix", cl::init(""), cl::Hidden, @@ -104,6 +114,12 @@ static cl::opt<std::string> MemProfImportSummary( cl::desc("Import summary to use for testing the ThinLTO backend via opt"), cl::Hidden); +static cl::opt<unsigned> + TailCallSearchDepth("memprof-tail-call-search-depth", cl::init(5), + cl::Hidden, + cl::desc("Max depth to recursively search for missing " + "frames through tail calls.")); + namespace llvm { // Indicate we are linking with an allocator that supports hot/cold operator // new interfaces. @@ -365,8 +381,7 @@ protected: /// Save lists of calls with MemProf metadata in each function, for faster /// iteration. - std::vector<std::pair<FuncTy *, std::vector<CallInfo>>> - FuncToCallsWithMetadata; + MapVector<FuncTy *, std::vector<CallInfo>> FuncToCallsWithMetadata; /// Map from callsite node to the enclosing caller function. std::map<const ContextNode *, const FuncTy *> NodeToCallingFunc; @@ -411,9 +426,25 @@ private: return static_cast<const DerivedCCG *>(this)->getStackId(IdOrIndex); } - /// Returns true if the given call targets the given function. - bool calleeMatchesFunc(CallTy Call, const FuncTy *Func) { - return static_cast<DerivedCCG *>(this)->calleeMatchesFunc(Call, Func); + /// Returns true if the given call targets the callee of the given edge, or if + /// we were able to identify the call chain through intermediate tail calls. + /// In the latter case new context nodes are added to the graph for the + /// identified tail calls, and their synthesized nodes are added to + /// TailCallToContextNodeMap. The EdgeIter is updated in either case to the + /// next element after the input position (either incremented or updated after + /// removing the old edge). + bool + calleesMatch(CallTy Call, EdgeIter &EI, + MapVector<CallInfo, ContextNode *> &TailCallToContextNodeMap); + + /// Returns true if the given call targets the given function, or if we were + /// able to identify the call chain through intermediate tail calls (in which + /// case FoundCalleeChain will be populated). + bool calleeMatchesFunc( + CallTy Call, const FuncTy *Func, const FuncTy *CallerFunc, + std::vector<std::pair<CallTy, FuncTy *>> &FoundCalleeChain) { + return static_cast<DerivedCCG *>(this)->calleeMatchesFunc( + Call, Func, CallerFunc, FoundCalleeChain); } /// Get a list of nodes corresponding to the stack ids in the given @@ -553,7 +584,13 @@ private: Instruction *>; uint64_t getStackId(uint64_t IdOrIndex) const; - bool calleeMatchesFunc(Instruction *Call, const Function *Func); + bool calleeMatchesFunc( + Instruction *Call, const Function *Func, const Function *CallerFunc, + std::vector<std::pair<Instruction *, Function *>> &FoundCalleeChain); + bool findProfiledCalleeThroughTailCalls( + const Function *ProfiledCallee, Value *CurCallee, unsigned Depth, + std::vector<std::pair<Instruction *, Function *>> &FoundCalleeChain, + bool &FoundMultipleCalleeChains); uint64_t getLastStackId(Instruction *Call); std::vector<uint64_t> getStackIdsWithContextNodesForCall(Instruction *Call); void updateAllocationCall(CallInfo &Call, AllocationType AllocType); @@ -606,12 +643,31 @@ public: function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)> isPrevailing); + ~IndexCallsiteContextGraph() { + // Now that we are done with the graph it is safe to add the new + // CallsiteInfo structs to the function summary vectors. The graph nodes + // point into locations within these vectors, so we don't want to add them + // any earlier. + for (auto &I : FunctionCalleesToSynthesizedCallsiteInfos) { + auto *FS = I.first; + for (auto &Callsite : I.second) + FS->addCallsite(*Callsite.second); + } + } + private: friend CallsiteContextGraph<IndexCallsiteContextGraph, FunctionSummary, IndexCall>; uint64_t getStackId(uint64_t IdOrIndex) const; - bool calleeMatchesFunc(IndexCall &Call, const FunctionSummary *Func); + bool calleeMatchesFunc( + IndexCall &Call, const FunctionSummary *Func, + const FunctionSummary *CallerFunc, + std::vector<std::pair<IndexCall, FunctionSummary *>> &FoundCalleeChain); + bool findProfiledCalleeThroughTailCalls( + ValueInfo ProfiledCallee, ValueInfo CurCallee, unsigned Depth, + std::vector<std::pair<IndexCall, FunctionSummary *>> &FoundCalleeChain, + bool &FoundMultipleCalleeChains); uint64_t getLastStackId(IndexCall &Call); std::vector<uint64_t> getStackIdsWithContextNodesForCall(IndexCall &Call); void updateAllocationCall(CallInfo &Call, AllocationType AllocType); @@ -630,6 +686,16 @@ private: std::map<const FunctionSummary *, ValueInfo> FSToVIMap; const ModuleSummaryIndex &Index; + function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)> + isPrevailing; + + // Saves/owns the callsite info structures synthesized for missing tail call + // frames that we discover while building the graph. + // It maps from the summary of the function making the tail call, to a map + // of callee ValueInfo to corresponding synthesized callsite info. + std::unordered_map<FunctionSummary *, + std::map<ValueInfo, std::unique_ptr<CallsiteInfo>>> + FunctionCalleesToSynthesizedCallsiteInfos; }; } // namespace @@ -1493,7 +1559,7 @@ ModuleCallsiteContextGraph::ModuleCallsiteContextGraph( } } if (!CallsWithMetadata.empty()) - FuncToCallsWithMetadata.push_back({&F, CallsWithMetadata}); + FuncToCallsWithMetadata[&F] = CallsWithMetadata; } if (DumpCCG) { @@ -1518,7 +1584,7 @@ IndexCallsiteContextGraph::IndexCallsiteContextGraph( ModuleSummaryIndex &Index, function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)> isPrevailing) - : Index(Index) { + : Index(Index), isPrevailing(isPrevailing) { for (auto &I : Index) { auto VI = Index.getValueInfo(I); for (auto &S : VI.getSummaryList()) { @@ -1572,7 +1638,7 @@ IndexCallsiteContextGraph::IndexCallsiteContextGraph( CallsWithMetadata.push_back({&SN}); if (!CallsWithMetadata.empty()) - FuncToCallsWithMetadata.push_back({FS, CallsWithMetadata}); + FuncToCallsWithMetadata[FS] = CallsWithMetadata; if (!FS->allocs().empty() || !FS->callsites().empty()) FSToVIMap[FS] = VI; @@ -1604,6 +1670,11 @@ void CallsiteContextGraph<DerivedCCG, FuncTy, // this transformation for regular LTO, and for ThinLTO we can simulate that // effect in the summary and perform the actual speculative devirtualization // while cloning in the ThinLTO backend. + + // Keep track of the new nodes synthesized for discovered tail calls missing + // from the profiled contexts. + MapVector<CallInfo, ContextNode *> TailCallToContextNodeMap; + for (auto Entry = NonAllocationCallToContextNodeMap.begin(); Entry != NonAllocationCallToContextNodeMap.end();) { auto *Node = Entry->second; @@ -1611,13 +1682,17 @@ void CallsiteContextGraph<DerivedCCG, FuncTy, // Check all node callees and see if in the same function. bool Removed = false; auto Call = Node->Call.call(); - for (auto &Edge : Node->CalleeEdges) { - if (!Edge->Callee->hasCall()) + for (auto EI = Node->CalleeEdges.begin(); EI != Node->CalleeEdges.end();) { + auto Edge = *EI; + if (!Edge->Callee->hasCall()) { + ++EI; continue; + } assert(NodeToCallingFunc.count(Edge->Callee)); // Check if the called function matches that of the callee node. - if (calleeMatchesFunc(Call, NodeToCallingFunc[Edge->Callee])) + if (calleesMatch(Call, EI, TailCallToContextNodeMap)) continue; + RemovedEdgesWithMismatchedCallees++; // Work around by setting Node to have a null call, so it gets // skipped during cloning. Otherwise assignFunctions will assert // because its data structures are not designed to handle this case. @@ -1629,6 +1704,11 @@ void CallsiteContextGraph<DerivedCCG, FuncTy, if (!Removed) Entry++; } + + // Add the new nodes after the above loop so that the iteration is not + // invalidated. + for (auto &[Call, Node] : TailCallToContextNodeMap) + NonAllocationCallToContextNodeMap[Call] = Node; } uint64_t ModuleCallsiteContextGraph::getStackId(uint64_t IdOrIndex) const { @@ -1642,8 +1722,173 @@ uint64_t IndexCallsiteContextGraph::getStackId(uint64_t IdOrIndex) const { return Index.getStackIdAtIndex(IdOrIndex); } -bool ModuleCallsiteContextGraph::calleeMatchesFunc(Instruction *Call, - const Function *Func) { +template <typename DerivedCCG, typename FuncTy, typename CallTy> +bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::calleesMatch( + CallTy Call, EdgeIter &EI, + MapVector<CallInfo, ContextNode *> &TailCallToContextNodeMap) { + auto Edge = *EI; + const FuncTy *ProfiledCalleeFunc = NodeToCallingFunc[Edge->Callee]; + const FuncTy *CallerFunc = NodeToCallingFunc[Edge->Caller]; + // Will be populated in order of callee to caller if we find a chain of tail + // calls between the profiled caller and callee. + std::vector<std::pair<CallTy, FuncTy *>> FoundCalleeChain; + if (!calleeMatchesFunc(Call, ProfiledCalleeFunc, CallerFunc, + FoundCalleeChain)) { + ++EI; + return false; + } + + // The usual case where the profiled callee matches that of the IR/summary. + if (FoundCalleeChain.empty()) { + ++EI; + return true; + } + + auto AddEdge = [Edge, &EI](ContextNode *Caller, ContextNode *Callee) { + auto *CurEdge = Callee->findEdgeFromCaller(Caller); + // If there is already an edge between these nodes, simply update it and + // return. + if (CurEdge) { + CurEdge->ContextIds.insert(Edge->ContextIds.begin(), + Edge->ContextIds.end()); + CurEdge->AllocTypes |= Edge->AllocTypes; + return; + } + // Otherwise, create a new edge and insert it into the caller and callee + // lists. + auto NewEdge = std::make_shared<ContextEdge>( + Callee, Caller, Edge->AllocTypes, Edge->ContextIds); + Callee->CallerEdges.push_back(NewEdge); + if (Caller == Edge->Caller) { + // If we are inserting the new edge into the current edge's caller, insert + // the new edge before the current iterator position, and then increment + // back to the current edge. + EI = Caller->CalleeEdges.insert(EI, NewEdge); + ++EI; + assert(*EI == Edge && + "Iterator position not restored after insert and increment"); + } else + Caller->CalleeEdges.push_back(NewEdge); + }; + + // Create new nodes for each found callee and connect in between the profiled + // caller and callee. + auto *CurCalleeNode = Edge->Callee; + for (auto &[NewCall, Func] : FoundCalleeChain) { + ContextNode *NewNode = nullptr; + // First check if we have already synthesized a node for this tail call. + if (TailCallToContextNodeMap.count(NewCall)) { + NewNode = TailCallToContextNodeMap[NewCall]; + NewNode->ContextIds.insert(Edge->ContextIds.begin(), + Edge->ContextIds.end()); + NewNode->AllocTypes |= Edge->AllocTypes; + } else { + FuncToCallsWithMetadata[Func].push_back({NewCall}); + // Create Node and record node info. + NodeOwner.push_back( + std::make_unique<ContextNode>(/*IsAllocation=*/false, NewCall)); + NewNode = NodeOwner.back().get(); + NodeToCallingFunc[NewNode] = Func; + TailCallToContextNodeMap[NewCall] = NewNode; + NewNode->ContextIds = Edge->ContextIds; + NewNode->AllocTypes = Edge->AllocTypes; + } + + // Hook up node to its callee node + AddEdge(NewNode, CurCalleeNode); + + CurCalleeNode = NewNode; + } + + // Hook up edge's original caller to new callee node. + AddEdge(Edge->Caller, CurCalleeNode); + + // Remove old edge + Edge->Callee->eraseCallerEdge(Edge.get()); + EI = Edge->Caller->CalleeEdges.erase(EI); + + return true; +} + +bool ModuleCallsiteContextGraph::findProfiledCalleeThroughTailCalls( + const Function *ProfiledCallee, Value *CurCallee, unsigned Depth, + std::vector<std::pair<Instruction *, Function *>> &FoundCalleeChain, + bool &FoundMultipleCalleeChains) { + // Stop recursive search if we have already explored the maximum specified + // depth. + if (Depth > TailCallSearchDepth) + return false; + + auto SaveCallsiteInfo = [&](Instruction *Callsite, Function *F) { + FoundCalleeChain.push_back({Callsite, F}); + }; + + auto *CalleeFunc = dyn_cast<Function>(CurCallee); + if (!CalleeFunc) { + auto *Alias = dyn_cast<GlobalAlias>(CurCallee); + assert(Alias); + CalleeFunc = dyn_cast<Function>(Alias->getAliasee()); + assert(CalleeFunc); + } + + // Look for tail calls in this function, and check if they either call the + // profiled callee directly, or indirectly (via a recursive search). + // Only succeed if there is a single unique tail call chain found between the + // profiled caller and callee, otherwise we could perform incorrect cloning. + bool FoundSingleCalleeChain = false; + for (auto &BB : *CalleeFunc) { + for (auto &I : BB) { + auto *CB = dyn_cast<CallBase>(&I); + if (!CB || !CB->isTailCall()) + continue; + auto *CalledValue = CB->getCalledOperand(); + auto *CalledFunction = CB->getCalledFunction(); + if (CalledValue && !CalledFunction) { + CalledValue = CalledValue->stripPointerCasts(); + // Stripping pointer casts can reveal a called function. + CalledFunction = dyn_cast<Function>(CalledValue); + } + // Check if this is an alias to a function. If so, get the + // called aliasee for the checks below. + if (auto *GA = dyn_cast<GlobalAlias>(CalledValue)) { + assert(!CalledFunction && + "Expected null called function in callsite for alias"); + CalledFunction = dyn_cast<Function>(GA->getAliaseeObject()); + } + if (!CalledFunction) + continue; + if (CalledFunction == ProfiledCallee) { + if (FoundSingleCalleeChain) { + FoundMultipleCalleeChains = true; + return false; + } + FoundSingleCalleeChain = true; + FoundProfiledCalleeCount++; + FoundProfiledCalleeDepth += Depth; + if (Depth > FoundProfiledCalleeMaxDepth) + FoundProfiledCalleeMaxDepth = Depth; + SaveCallsiteInfo(&I, CalleeFunc); + } else if (findProfiledCalleeThroughTailCalls( + ProfiledCallee, CalledFunction, Depth + 1, + FoundCalleeChain, FoundMultipleCalleeChains)) { + if (FoundMultipleCalleeChains) + return false; + if (FoundSingleCalleeChain) { + FoundMultipleCalleeChains = true; + return false; + } + FoundSingleCalleeChain = true; + SaveCallsiteInfo(&I, CalleeFunc); + } + } + } + + return FoundSingleCalleeChain; +} + +bool ModuleCallsiteContextGraph::calleeMatchesFunc( + Instruction *Call, const Function *Func, const Function *CallerFunc, + std::vector<std::pair<Instruction *, Function *>> &FoundCalleeChain) { auto *CB = dyn_cast<CallBase>(Call); if (!CB->getCalledOperand()) return false; @@ -1652,11 +1897,117 @@ bool ModuleCallsiteContextGraph::calleeMatchesFunc(Instruction *Call, if (CalleeFunc == Func) return true; auto *Alias = dyn_cast<GlobalAlias>(CalleeVal); - return Alias && Alias->getAliasee() == Func; + if (Alias && Alias->getAliasee() == Func) + return true; + + // Recursively search for the profiled callee through tail calls starting with + // the actual Callee. The discovered tail call chain is saved in + // FoundCalleeChain, and we will fixup the graph to include these callsites + // after returning. + // FIXME: We will currently redo the same recursive walk if we find the same + // mismatched callee from another callsite. We can improve this with more + // bookkeeping of the created chain of new nodes for each mismatch. + unsigned Depth = 1; + bool FoundMultipleCalleeChains = false; + if (!findProfiledCalleeThroughTailCalls(Func, CalleeVal, Depth, + FoundCalleeChain, + FoundMultipleCalleeChains)) { + LLVM_DEBUG(dbgs() << "Not found through unique tail call chain: " + << Func->getName() << " from " << CallerFunc->getName() + << " that actually called " << CalleeVal->getName() + << (FoundMultipleCalleeChains + ? " (found multiple possible chains)" + : "") + << "\n"); + if (FoundMultipleCalleeChains) + FoundProfiledCalleeNonUniquelyCount++; + return false; + } + + return true; } -bool IndexCallsiteContextGraph::calleeMatchesFunc(IndexCall &Call, - const FunctionSummary *Func) { +bool IndexCallsiteContextGraph::findProfiledCalleeThroughTailCalls( + ValueInfo ProfiledCallee, ValueInfo CurCallee, unsigned Depth, + std::vector<std::pair<IndexCall, FunctionSummary *>> &FoundCalleeChain, + bool &FoundMultipleCalleeChains) { + // Stop recursive search if we have already explored the maximum specified + // depth. + if (Depth > TailCallSearchDepth) + return false; + + auto CreateAndSaveCallsiteInfo = [&](ValueInfo Callee, FunctionSummary *FS) { + // Make a CallsiteInfo for each discovered callee, if one hasn't already + // been synthesized. + if (!FunctionCalleesToSynthesizedCallsiteInfos.count(FS) || + !FunctionCalleesToSynthesizedCallsiteInfos[FS].count(Callee)) + // StackIds is empty (we don't have debug info available in the index for + // these callsites) + FunctionCalleesToSynthesizedCallsiteInfos[FS][Callee] = + std::make_unique<CallsiteInfo>(Callee, SmallVector<unsigned>()); + CallsiteInfo *NewCallsiteInfo = + FunctionCalleesToSynthesizedCallsiteInfos[FS][Callee].get(); + FoundCalleeChain.push_back({NewCallsiteInfo, FS}); + }; + + // Look for tail calls in this function, and check if they either call the + // profiled callee directly, or indirectly (via a recursive search). + // Only succeed if there is a single unique tail call chain found between the + // profiled caller and callee, otherwise we could perform incorrect cloning. + bool FoundSingleCalleeChain = false; + for (auto &S : CurCallee.getSummaryList()) { + if (!GlobalValue::isLocalLinkage(S->linkage()) && + !isPrevailing(CurCallee.getGUID(), S.get())) + continue; + auto *FS = dyn_cast<FunctionSummary>(S->getBaseObject()); + if (!FS) + continue; + auto FSVI = CurCallee; + auto *AS = dyn_cast<AliasSummary>(S.get()); + if (AS) + FSVI = AS->getAliaseeVI(); + for (auto &CallEdge : FS->calls()) { + if (!CallEdge.second.hasTailCall()) + continue; + if (CallEdge.first == ProfiledCallee) { + if (FoundSingleCalleeChain) { + FoundMultipleCalleeChains = true; + return false; + } + FoundSingleCalleeChain = true; + FoundProfiledCalleeCount++; + FoundProfiledCalleeDepth += Depth; + if (Depth > FoundProfiledCalleeMaxDepth) + FoundProfiledCalleeMaxDepth = Depth; + CreateAndSaveCallsiteInfo(CallEdge.first, FS); + // Add FS to FSToVIMap in case it isn't already there. + assert(!FSToVIMap.count(FS) || FSToVIMap[FS] == FSVI); + FSToVIMap[FS] = FSVI; + } else if (findProfiledCalleeThroughTailCalls( + ProfiledCallee, CallEdge.first, Depth + 1, + FoundCalleeChain, FoundMultipleCalleeChains)) { + if (FoundMultipleCalleeChains) + return false; + if (FoundSingleCalleeChain) { + FoundMultipleCalleeChains = true; + return false; + } + FoundSingleCalleeChain = true; + CreateAndSaveCallsiteInfo(CallEdge.first, FS); + // Add FS to FSToVIMap in case it isn't already there. + assert(!FSToVIMap.count(FS) || FSToVIMap[FS] == FSVI); + FSToVIMap[FS] = FSVI; + } + } + } + + return FoundSingleCalleeChain; +} + +bool IndexCallsiteContextGraph::calleeMatchesFunc( + IndexCall &Call, const FunctionSummary *Func, + const FunctionSummary *CallerFunc, + std::vector<std::pair<IndexCall, FunctionSummary *>> &FoundCalleeChain) { ValueInfo Callee = dyn_cast_if_present<CallsiteInfo *>(Call.getBase())->Callee; // If there is no summary list then this is a call to an externally defined @@ -1666,11 +2017,38 @@ bool IndexCallsiteContextGraph::calleeMatchesFunc(IndexCall &Call, ? nullptr : dyn_cast<AliasSummary>(Callee.getSummaryList()[0].get()); assert(FSToVIMap.count(Func)); - return Callee == FSToVIMap[Func] || - // If callee is an alias, check the aliasee, since only function - // summary base objects will contain the stack node summaries and thus - // get a context node. - (Alias && Alias->getAliaseeVI() == FSToVIMap[Func]); + auto FuncVI = FSToVIMap[Func]; + if (Callee == FuncVI || + // If callee is an alias, check the aliasee, since only function + // summary base objects will contain the stack node summaries and thus + // get a context node. + (Alias && Alias->getAliaseeVI() == FuncVI)) + return true; + + // Recursively search for the profiled callee through tail calls starting with + // the actual Callee. The discovered tail call chain is saved in + // FoundCalleeChain, and we will fixup the graph to include these callsites + // after returning. + // FIXME: We will currently redo the same recursive walk if we find the same + // mismatched callee from another callsite. We can improve this with more + // bookkeeping of the created chain of new nodes for each mismatch. + unsigned Depth = 1; + bool FoundMultipleCalleeChains = false; + if (!findProfiledCalleeThroughTailCalls( + FuncVI, Callee, Depth, FoundCalleeChain, FoundMultipleCalleeChains)) { + LLVM_DEBUG(dbgs() << "Not found through unique tail call chain: " << FuncVI + << " from " << FSToVIMap[CallerFunc] + << " that actually called " << Callee + << (FoundMultipleCalleeChains + ? " (found multiple possible chains)" + : "") + << "\n"); + if (FoundMultipleCalleeChains) + FoundProfiledCalleeNonUniquelyCount++; + return false; + } + + return true; } static std::string getAllocTypeString(uint8_t AllocTypes) { @@ -2533,6 +2911,9 @@ bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::assignFunctions() { // that were previously assigned to call PreviousAssignedFuncClone, // to record that they now call NewFuncClone. for (auto CE : Clone->CallerEdges) { + // Skip any that have been removed on an earlier iteration. + if (!CE) + continue; // Ignore any caller that does not have a recorded callsite Call. if (!CE->Caller->hasCall()) continue; @@ -2945,6 +3326,42 @@ bool MemProfContextDisambiguation::applyImport(Module &M) { NumClonesCreated = NumClones; }; + auto CloneCallsite = [&](const CallsiteInfo &StackNode, CallBase *CB, + Function *CalledFunction) { + // Perform cloning if not yet done. + CloneFuncIfNeeded(/*NumClones=*/StackNode.Clones.size()); + + // Should have skipped indirect calls via mayHaveMemprofSummary. + assert(CalledFunction); + assert(!IsMemProfClone(*CalledFunction)); + + // Update the calls per the summary info. + // Save orig name since it gets updated in the first iteration + // below. + auto CalleeOrigName = CalledFunction->getName(); + for (unsigned J = 0; J < StackNode.Clones.size(); J++) { + // Do nothing if this version calls the original version of its + // callee. + if (!StackNode.Clones[J]) + continue; + auto NewF = M.getOrInsertFunction( + getMemProfFuncName(CalleeOrigName, StackNode.Clones[J]), + CalledFunction->getFunctionType()); + CallBase *CBClone; + // Copy 0 is the original function. + if (!J) + CBClone = CB; + else + CBClone = cast<CallBase>((*VMaps[J - 1])[CB]); + CBClone->setCalledFunction(NewF); + ORE.emit(OptimizationRemark(DEBUG_TYPE, "MemprofCall", CBClone) + << ore::NV("Call", CBClone) << " in clone " + << ore::NV("Caller", CBClone->getFunction()) + << " assigned to call function clone " + << ore::NV("Callee", NewF.getCallee())); + } + }; + // Locate the summary for F. ValueInfo TheFnVI = findValueInfoForFunc(F, M, ImportSummary); // If not found, this could be an imported local (see comment in @@ -2974,6 +3391,23 @@ bool MemProfContextDisambiguation::applyImport(Module &M) { auto SI = FS->callsites().begin(); auto AI = FS->allocs().begin(); + // To handle callsite infos synthesized for tail calls which have missing + // frames in the profiled context, map callee VI to the synthesized callsite + // info. + DenseMap<ValueInfo, CallsiteInfo> MapTailCallCalleeVIToCallsite; + // Iterate the callsites for this function in reverse, since we place all + // those synthesized for tail calls at the end. + for (auto CallsiteIt = FS->callsites().rbegin(); + CallsiteIt != FS->callsites().rend(); CallsiteIt++) { + auto &Callsite = *CallsiteIt; + // Stop as soon as we see a non-synthesized callsite info (see comment + // above loop). All the entries added for discovered tail calls have empty + // stack ids. + if (!Callsite.StackIdIndices.empty()) + break; + MapTailCallCalleeVIToCallsite.insert({Callsite.Callee, Callsite}); + } + // Assume for now that the instructions are in the exact same order // as when the summary was created, but confirm this is correct by // matching the stack ids. @@ -3126,37 +3560,16 @@ bool MemProfContextDisambiguation::applyImport(Module &M) { } #endif - // Perform cloning if not yet done. - CloneFuncIfNeeded(/*NumClones=*/StackNode.Clones.size()); - - // Should have skipped indirect calls via mayHaveMemprofSummary. - assert(CalledFunction); - assert(!IsMemProfClone(*CalledFunction)); - - // Update the calls per the summary info. - // Save orig name since it gets updated in the first iteration - // below. - auto CalleeOrigName = CalledFunction->getName(); - for (unsigned J = 0; J < StackNode.Clones.size(); J++) { - // Do nothing if this version calls the original version of its - // callee. - if (!StackNode.Clones[J]) - continue; - auto NewF = M.getOrInsertFunction( - getMemProfFuncName(CalleeOrigName, StackNode.Clones[J]), - CalledFunction->getFunctionType()); - CallBase *CBClone; - // Copy 0 is the original function. - if (!J) - CBClone = CB; - else - CBClone = cast<CallBase>((*VMaps[J - 1])[CB]); - CBClone->setCalledFunction(NewF); - ORE.emit(OptimizationRemark(DEBUG_TYPE, "MemprofCall", CBClone) - << ore::NV("Call", CBClone) << " in clone " - << ore::NV("Caller", CBClone->getFunction()) - << " assigned to call function clone " - << ore::NV("Callee", NewF.getCallee())); + CloneCallsite(StackNode, CB, CalledFunction); + } else if (CB->isTailCall()) { + // Locate the synthesized callsite info for the callee VI, if any was + // created, and use that for cloning. + ValueInfo CalleeVI = + findValueInfoForFunc(*CalledFunction, M, ImportSummary); + if (CalleeVI && MapTailCallCalleeVIToCallsite.count(CalleeVI)) { + auto Callsite = MapTailCallCalleeVIToCallsite.find(CalleeVI); + assert(Callsite != MapTailCallCalleeVIToCallsite.end()); + CloneCallsite(Callsite->second, CB, CalledFunction); } } // Memprof and callsite metadata on memory allocations no longer needed. diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp index 96b612254ca5..c7e6f32c5406 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp @@ -1723,6 +1723,30 @@ Instruction *InstCombinerImpl::visitAdd(BinaryOperator &I) { I, Builder.CreateIntrinsic(Intrinsic::ctpop, {I.getType()}, {Builder.CreateOr(A, B)})); + // Fold the log2_ceil idiom: + // zext(ctpop(A) >u/!= 1) + (ctlz(A, true) ^ (BW - 1)) + // --> + // BW - ctlz(A - 1, false) + const APInt *XorC; + if (match(&I, + m_c_Add( + m_ZExt(m_ICmp(Pred, m_Intrinsic<Intrinsic::ctpop>(m_Value(A)), + m_One())), + m_OneUse(m_ZExtOrSelf(m_OneUse(m_Xor( + m_OneUse(m_TruncOrSelf(m_OneUse( + m_Intrinsic<Intrinsic::ctlz>(m_Deferred(A), m_One())))), + m_APInt(XorC))))))) && + (Pred == ICmpInst::ICMP_UGT || Pred == ICmpInst::ICMP_NE) && + *XorC == A->getType()->getScalarSizeInBits() - 1) { + Value *Sub = Builder.CreateAdd(A, Constant::getAllOnesValue(A->getType())); + Value *Ctlz = Builder.CreateIntrinsic(Intrinsic::ctlz, {A->getType()}, + {Sub, Builder.getFalse()}); + Value *Ret = Builder.CreateSub( + ConstantInt::get(A->getType(), A->getType()->getScalarSizeInBits()), + Ctlz, "", /*HasNUW*/ true, /*HasNSW*/ true); + return replaceInstUsesWith(I, Builder.CreateZExtOrTrunc(Ret, I.getType())); + } + if (Instruction *Res = foldSquareSumInt(I)) return Res; diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp index c03f50d75814..0620752e3213 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp @@ -46,44 +46,6 @@ static Value *getFCmpValue(unsigned Code, Value *LHS, Value *RHS, return Builder.CreateFCmp(NewPred, LHS, RHS); } -/// Transform BITWISE_OP(BSWAP(A),BSWAP(B)) or -/// BITWISE_OP(BSWAP(A), Constant) to BSWAP(BITWISE_OP(A, B)) -/// \param I Binary operator to transform. -/// \return Pointer to node that must replace the original binary operator, or -/// null pointer if no transformation was made. -static Value *SimplifyBSwap(BinaryOperator &I, - InstCombiner::BuilderTy &Builder) { - assert(I.isBitwiseLogicOp() && "Unexpected opcode for bswap simplifying"); - - Value *OldLHS = I.getOperand(0); - Value *OldRHS = I.getOperand(1); - - Value *NewLHS; - if (!match(OldLHS, m_BSwap(m_Value(NewLHS)))) - return nullptr; - - Value *NewRHS; - const APInt *C; - - if (match(OldRHS, m_BSwap(m_Value(NewRHS)))) { - // OP( BSWAP(x), BSWAP(y) ) -> BSWAP( OP(x, y) ) - if (!OldLHS->hasOneUse() && !OldRHS->hasOneUse()) - return nullptr; - // NewRHS initialized by the matcher. - } else if (match(OldRHS, m_APInt(C))) { - // OP( BSWAP(x), CONSTANT ) -> BSWAP( OP(x, BSWAP(CONSTANT) ) ) - if (!OldLHS->hasOneUse()) - return nullptr; - NewRHS = ConstantInt::get(I.getType(), C->byteSwap()); - } else - return nullptr; - - Value *BinOp = Builder.CreateBinOp(I.getOpcode(), NewLHS, NewRHS); - Function *F = Intrinsic::getDeclaration(I.getModule(), Intrinsic::bswap, - I.getType()); - return Builder.CreateCall(F, BinOp); -} - /// Emit a computation of: (V >= Lo && V < Hi) if Inside is true, otherwise /// (V < Lo || V >= Hi). This method expects that Lo < Hi. IsSigned indicates /// whether to treat V, Lo, and Hi as signed or not. @@ -2159,6 +2121,64 @@ Instruction *InstCombinerImpl::foldBinOpOfDisplacedShifts(BinaryOperator &I) { return BinaryOperator::Create(ShiftOp, NewC, ShAmt); } +// Fold and/or/xor with two equal intrinsic IDs: +// bitwise(fshl (A, B, ShAmt), fshl(C, D, ShAmt)) +// -> fshl(bitwise(A, C), bitwise(B, D), ShAmt) +// bitwise(fshr (A, B, ShAmt), fshr(C, D, ShAmt)) +// -> fshr(bitwise(A, C), bitwise(B, D), ShAmt) +// bitwise(bswap(A), bswap(B)) -> bswap(bitwise(A, B)) +// bitwise(bswap(A), C) -> bswap(bitwise(A, bswap(C))) +// bitwise(bitreverse(A), bitreverse(B)) -> bitreverse(bitwise(A, B)) +// bitwise(bitreverse(A), C) -> bitreverse(bitwise(A, bitreverse(C))) +static Instruction * +foldBitwiseLogicWithIntrinsics(BinaryOperator &I, + InstCombiner::BuilderTy &Builder) { + assert(I.isBitwiseLogicOp() && "Should and/or/xor"); + if (!I.getOperand(0)->hasOneUse()) + return nullptr; + IntrinsicInst *X = dyn_cast<IntrinsicInst>(I.getOperand(0)); + if (!X) + return nullptr; + + IntrinsicInst *Y = dyn_cast<IntrinsicInst>(I.getOperand(1)); + if (Y && (!Y->hasOneUse() || X->getIntrinsicID() != Y->getIntrinsicID())) + return nullptr; + + Intrinsic::ID IID = X->getIntrinsicID(); + const APInt *RHSC; + // Try to match constant RHS. + if (!Y && (!(IID == Intrinsic::bswap || IID == Intrinsic::bitreverse) || + !match(I.getOperand(1), m_APInt(RHSC)))) + return nullptr; + + switch (IID) { + case Intrinsic::fshl: + case Intrinsic::fshr: { + if (X->getOperand(2) != Y->getOperand(2)) + return nullptr; + Value *NewOp0 = + Builder.CreateBinOp(I.getOpcode(), X->getOperand(0), Y->getOperand(0)); + Value *NewOp1 = + Builder.CreateBinOp(I.getOpcode(), X->getOperand(1), Y->getOperand(1)); + Function *F = Intrinsic::getDeclaration(I.getModule(), IID, I.getType()); + return CallInst::Create(F, {NewOp0, NewOp1, X->getOperand(2)}); + } + case Intrinsic::bswap: + case Intrinsic::bitreverse: { + Value *NewOp0 = Builder.CreateBinOp( + I.getOpcode(), X->getOperand(0), + Y ? Y->getOperand(0) + : ConstantInt::get(I.getType(), IID == Intrinsic::bswap + ? RHSC->byteSwap() + : RHSC->reverseBits())); + Function *F = Intrinsic::getDeclaration(I.getModule(), IID, I.getType()); + return CallInst::Create(F, {NewOp0}); + } + default: + return nullptr; + } +} + // FIXME: We use commutative matchers (m_c_*) for some, but not all, matches // here. We should standardize that construct where it is needed or choose some // other way to ensure that commutated variants of patterns are not missed. @@ -2194,9 +2214,6 @@ Instruction *InstCombinerImpl::visitAnd(BinaryOperator &I) { if (Value *V = foldUsingDistributiveLaws(I)) return replaceInstUsesWith(I, V); - if (Value *V = SimplifyBSwap(I, Builder)) - return replaceInstUsesWith(I, V); - if (Instruction *R = foldBinOpShiftWithShift(I)) return R; @@ -2688,6 +2705,9 @@ Instruction *InstCombinerImpl::visitAnd(BinaryOperator &I) { if (Instruction *Res = foldBinOpOfDisplacedShifts(I)) return Res; + if (Instruction *Res = foldBitwiseLogicWithIntrinsics(I, Builder)) + return Res; + return nullptr; } @@ -3347,9 +3367,6 @@ Instruction *InstCombinerImpl::visitOr(BinaryOperator &I) { if (Value *V = foldUsingDistributiveLaws(I)) return replaceInstUsesWith(I, V); - if (Value *V = SimplifyBSwap(I, Builder)) - return replaceInstUsesWith(I, V); - Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); Type *Ty = I.getType(); if (Ty->isIntOrIntVectorTy(1)) { @@ -3884,6 +3901,9 @@ Instruction *InstCombinerImpl::visitOr(BinaryOperator &I) { return BinaryOperator::CreateAnd(X, ConstantInt::get(Ty, *C1 | *C2)); } + if (Instruction *Res = foldBitwiseLogicWithIntrinsics(I, Builder)) + return Res; + return nullptr; } @@ -4507,9 +4527,6 @@ Instruction *InstCombinerImpl::visitXor(BinaryOperator &I) { if (SimplifyDemandedInstructionBits(I)) return &I; - if (Value *V = SimplifyBSwap(I, Builder)) - return replaceInstUsesWith(I, V); - if (Instruction *R = foldNot(I)) return R; @@ -4799,5 +4816,8 @@ Instruction *InstCombinerImpl::visitXor(BinaryOperator &I) { if (Instruction *Res = foldBinOpOfDisplacedShifts(I)) return Res; + if (Instruction *Res = foldBitwiseLogicWithIntrinsics(I, Builder)) + return Res; + return nullptr; } diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp index 40b48699f758..64fbd5543a9e 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -1884,6 +1884,10 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) { return crossLogicOpFold; } + // Try to fold into bitreverse if bswap is the root of the expression tree. + if (Instruction *BitOp = matchBSwapOrBitReverse(*II, /*MatchBSwaps*/ false, + /*MatchBitReversals*/ true)) + return BitOp; break; } case Intrinsic::masked_load: diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp index ab55f235920a..21bfc91148bf 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp @@ -1704,11 +1704,11 @@ Instruction *InstCombinerImpl::foldSelectInstWithICmp(SelectInst &SI, if (CmpRHS != CmpLHS && isa<Constant>(CmpRHS) && !isa<Constant>(CmpLHS)) { if (CmpLHS == TrueVal && Pred == ICmpInst::ICMP_EQ) { // Transform (X == C) ? X : Y -> (X == C) ? C : Y - SI.setOperand(1, CmpRHS); + replaceOperand(SI, 1, CmpRHS); Changed = true; } else if (CmpLHS == FalseVal && Pred == ICmpInst::ICMP_NE) { // Transform (X != C) ? Y : X -> (X != C) ? Y : C - SI.setOperand(2, CmpRHS); + replaceOperand(SI, 2, CmpRHS); Changed = true; } } diff --git a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp index e3deafa49bd9..5e7e08eaa997 100644 --- a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp +++ b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp @@ -216,7 +216,7 @@ static cl::opt<bool> ClInstrumentWrites( cl::Hidden, cl::init(true)); static cl::opt<bool> - ClUseStackSafety("asan-use-stack-safety", cl::Hidden, cl::init(false), + ClUseStackSafety("asan-use-stack-safety", cl::Hidden, cl::init(true), cl::Hidden, cl::desc("Use Stack Safety analysis results"), cl::Optional); diff --git a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp index 6b95c7028d93..c20fc942eaf0 100644 --- a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp +++ b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp @@ -617,9 +617,7 @@ void FuncPGOInstrumentation<Edge, BBInfo>::computeCFGHash() { std::vector<uint8_t> Indexes; JamCRC JC; for (auto &BB : F) { - const Instruction *TI = BB.getTerminator(); - for (unsigned I = 0, E = TI->getNumSuccessors(); I != E; ++I) { - BasicBlock *Succ = TI->getSuccessor(I); + for (BasicBlock *Succ : successors(&BB)) { auto BI = findBBInfo(Succ); if (BI == nullptr) continue; @@ -658,10 +656,10 @@ void FuncPGOInstrumentation<Edge, BBInfo>::computeCFGHash() { << " CRC = " << JC.getCRC() << ", Selects = " << SIVisitor.getNumOfSelectInsts() << ", Edges = " << MST.numEdges() << ", ICSites = " - << ValueSites[IPVK_IndirectCallTarget].size()); - LLVM_DEBUG(dbgs() << ", Memops = " << ValueSites[IPVK_MemOPSize].size() - << ", High32 CRC = " << JCH.getCRC()); - LLVM_DEBUG(dbgs() << ", Hash = " << FunctionHash << "\n";); + << ValueSites[IPVK_IndirectCallTarget].size() + << ", Memops = " << ValueSites[IPVK_MemOPSize].size() + << ", High32 CRC = " << JCH.getCRC() + << ", Hash = " << FunctionHash << "\n";); if (PGOTraceFuncHash != "-" && F.getName().contains(PGOTraceFuncHash)) dbgs() << "Funcname=" << F.getName() << ", Hash=" << FunctionHash diff --git a/llvm/lib/Transforms/Scalar/LoopFlatten.cpp b/llvm/lib/Transforms/Scalar/LoopFlatten.cpp index eef94636578d..533cefaf1061 100644 --- a/llvm/lib/Transforms/Scalar/LoopFlatten.cpp +++ b/llvm/lib/Transforms/Scalar/LoopFlatten.cpp @@ -207,6 +207,12 @@ struct FlattenInfo { match(MatchedMul, m_c_Mul(m_Trunc(m_Specific(OuterInductionPHI)), m_Value(MatchedItCount))); + // Matches the pattern ptr+i*M+j, with the two additions being done via GEP. + bool IsGEP = match(U, m_GEP(m_GEP(m_Value(), m_Value(MatchedMul)), + m_Specific(InnerInductionPHI))) && + match(MatchedMul, m_c_Mul(m_Specific(OuterInductionPHI), + m_Value(MatchedItCount))); + if (!MatchedItCount) return false; @@ -224,7 +230,7 @@ struct FlattenInfo { // Look through extends if the IV has been widened. Don't look through // extends if we already looked through a trunc. - if (Widened && IsAdd && + if (Widened && (IsAdd || IsGEP) && (isa<SExtInst>(MatchedItCount) || isa<ZExtInst>(MatchedItCount))) { assert(MatchedItCount->getType() == InnerInductionPHI->getType() && "Unexpected type mismatch in types after widening"); @@ -236,7 +242,7 @@ struct FlattenInfo { LLVM_DEBUG(dbgs() << "Looking for inner trip count: "; InnerTripCount->dump()); - if ((IsAdd || IsAddTrunc) && MatchedItCount == InnerTripCount) { + if ((IsAdd || IsAddTrunc || IsGEP) && MatchedItCount == InnerTripCount) { LLVM_DEBUG(dbgs() << "Found. This sse is optimisable\n"); ValidOuterPHIUses.insert(MatchedMul); LinearIVUses.insert(U); @@ -646,33 +652,40 @@ static OverflowResult checkOverflow(FlattenInfo &FI, DominatorTree *DT, if (OR != OverflowResult::MayOverflow) return OR; - for (Value *V : FI.LinearIVUses) { - for (Value *U : V->users()) { - if (auto *GEP = dyn_cast<GetElementPtrInst>(U)) { - for (Value *GEPUser : U->users()) { - auto *GEPUserInst = cast<Instruction>(GEPUser); - if (!isa<LoadInst>(GEPUserInst) && - !(isa<StoreInst>(GEPUserInst) && - GEP == GEPUserInst->getOperand(1))) - continue; - if (!isGuaranteedToExecuteForEveryIteration(GEPUserInst, - FI.InnerLoop)) - continue; - // The IV is used as the operand of a GEP which dominates the loop - // latch, and the IV is at least as wide as the address space of the - // GEP. In this case, the GEP would wrap around the address space - // before the IV increment wraps, which would be UB. - if (GEP->isInBounds() && - V->getType()->getIntegerBitWidth() >= - DL.getPointerTypeSizeInBits(GEP->getType())) { - LLVM_DEBUG( - dbgs() << "use of linear IV would be UB if overflow occurred: "; - GEP->dump()); - return OverflowResult::NeverOverflows; - } - } + auto CheckGEP = [&](GetElementPtrInst *GEP, Value *GEPOperand) { + for (Value *GEPUser : GEP->users()) { + auto *GEPUserInst = cast<Instruction>(GEPUser); + if (!isa<LoadInst>(GEPUserInst) && + !(isa<StoreInst>(GEPUserInst) && GEP == GEPUserInst->getOperand(1))) + continue; + if (!isGuaranteedToExecuteForEveryIteration(GEPUserInst, FI.InnerLoop)) + continue; + // The IV is used as the operand of a GEP which dominates the loop + // latch, and the IV is at least as wide as the address space of the + // GEP. In this case, the GEP would wrap around the address space + // before the IV increment wraps, which would be UB. + if (GEP->isInBounds() && + GEPOperand->getType()->getIntegerBitWidth() >= + DL.getPointerTypeSizeInBits(GEP->getType())) { + LLVM_DEBUG( + dbgs() << "use of linear IV would be UB if overflow occurred: "; + GEP->dump()); + return true; } } + return false; + }; + + // Check if any IV user is, or is used by, a GEP that would cause UB if the + // multiply overflows. + for (Value *V : FI.LinearIVUses) { + if (auto *GEP = dyn_cast<GetElementPtrInst>(V)) + if (GEP->getNumIndices() == 1 && CheckGEP(GEP, GEP->getOperand(1))) + return OverflowResult::NeverOverflows; + for (Value *U : V->users()) + if (auto *GEP = dyn_cast<GetElementPtrInst>(U)) + if (CheckGEP(GEP, V)) + return OverflowResult::NeverOverflows; } return OverflowResult::MayOverflow; @@ -778,6 +791,18 @@ static bool DoFlattenLoopPair(FlattenInfo &FI, DominatorTree *DT, LoopInfo *LI, OuterValue = Builder.CreateTrunc(FI.OuterInductionPHI, V->getType(), "flatten.trunciv"); + if (auto *GEP = dyn_cast<GetElementPtrInst>(V)) { + // Replace the GEP with one that uses OuterValue as the offset. + auto *InnerGEP = cast<GetElementPtrInst>(GEP->getOperand(0)); + Value *Base = InnerGEP->getOperand(0); + // When the base of the GEP doesn't dominate the outer induction phi then + // we need to insert the new GEP where the old GEP was. + if (!DT->dominates(Base, &*Builder.GetInsertPoint())) + Builder.SetInsertPoint(cast<Instruction>(V)); + OuterValue = Builder.CreateGEP(GEP->getSourceElementType(), Base, + OuterValue, "flatten." + V->getName()); + } + LLVM_DEBUG(dbgs() << "Replacing: "; V->dump(); dbgs() << "with: "; OuterValue->dump()); V->replaceAllUsesWith(OuterValue); diff --git a/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp b/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp index 3f02441b74ba..b98f823ab00b 100644 --- a/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp +++ b/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp @@ -1975,19 +1975,10 @@ insertRelocationStores(iterator_range<Value::user_iterator> GCRelocs, assert(AllocaMap.count(OriginalValue)); Value *Alloca = AllocaMap[OriginalValue]; - // Emit store into the related alloca - // All gc_relocates are i8 addrspace(1)* typed, and it must be bitcasted to - // the correct type according to alloca. + // Emit store into the related alloca. assert(Relocate->getNextNode() && "Should always have one since it's not a terminator"); - IRBuilder<> Builder(Relocate->getNextNode()); - Value *CastedRelocatedValue = - Builder.CreateBitCast(Relocate, - cast<AllocaInst>(Alloca)->getAllocatedType(), - suffixed_name_or(Relocate, ".casted", "")); - - new StoreInst(CastedRelocatedValue, Alloca, - cast<Instruction>(CastedRelocatedValue)->getNextNode()); + new StoreInst(Relocate, Alloca, Relocate->getNextNode()); #ifndef NDEBUG VisitedLiveValues.insert(OriginalValue); @@ -2620,13 +2611,9 @@ static bool inlineGetBaseAndOffset(Function &F, Value *Base = findBasePointer(Callsite->getOperand(0), DVCache, KnownBases); assert(!DVCache.count(Callsite)); - auto *BaseBC = IRBuilder<>(Callsite).CreateBitCast( - Base, Callsite->getType(), suffixed_name_or(Base, ".cast", "")); - if (BaseBC != Base) - DVCache[BaseBC] = Base; - Callsite->replaceAllUsesWith(BaseBC); - if (!BaseBC->hasName()) - BaseBC->takeName(Callsite); + Callsite->replaceAllUsesWith(Base); + if (!Base->hasName()) + Base->takeName(Callsite); Callsite->eraseFromParent(); break; } diff --git a/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp b/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp index 225dd454068c..d2fed11445e4 100644 --- a/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp +++ b/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp @@ -1093,67 +1093,25 @@ bool SeparateConstOffsetFromGEP::splitGEP(GetElementPtrInst *GEP) { // => add the offset // // %gep2 ; clone of %gep - // %new.gep = gep %gep2, <offset / sizeof(*%gep)> + // %new.gep = gep i8, %gep2, %offset // %gep ; will be removed // ... %gep ... // // => replace all uses of %gep with %new.gep and remove %gep // // %gep2 ; clone of %gep - // %new.gep = gep %gep2, <offset / sizeof(*%gep)> - // ... %new.gep ... - // - // If AccumulativeByteOffset is not a multiple of sizeof(*%gep), we emit an - // uglygep (http://llvm.org/docs/GetElementPtr.html#what-s-an-uglygep): - // bitcast %gep2 to i8*, add the offset, and bitcast the result back to the - // type of %gep. - // - // %gep2 ; clone of %gep - // %0 = bitcast %gep2 to i8* - // %uglygep = gep %0, <offset> - // %new.gep = bitcast %uglygep to <type of %gep> + // %new.gep = gep i8, %gep2, %offset // ... %new.gep ... Instruction *NewGEP = GEP->clone(); NewGEP->insertBefore(GEP); - // Per ANSI C standard, signed / unsigned = unsigned and signed % unsigned = - // unsigned.. Therefore, we cast ElementTypeSizeOfGEP to signed because it is - // used with unsigned integers later. - int64_t ElementTypeSizeOfGEP = static_cast<int64_t>( - DL->getTypeAllocSize(GEP->getResultElementType())); Type *PtrIdxTy = DL->getIndexType(GEP->getType()); - if (AccumulativeByteOffset % ElementTypeSizeOfGEP == 0) { - // Very likely. As long as %gep is naturally aligned, the byte offset we - // extracted should be a multiple of sizeof(*%gep). - int64_t Index = AccumulativeByteOffset / ElementTypeSizeOfGEP; - NewGEP = GetElementPtrInst::Create(GEP->getResultElementType(), NewGEP, - ConstantInt::get(PtrIdxTy, Index, true), - GEP->getName(), GEP); - NewGEP->copyMetadata(*GEP); - // Inherit the inbounds attribute of the original GEP. - cast<GetElementPtrInst>(NewGEP)->setIsInBounds(GEPWasInBounds); - } else { - // Unlikely but possible. For example, - // #pragma pack(1) - // struct S { - // int a[3]; - // int64 b[8]; - // }; - // #pragma pack() - // - // Suppose the gep before extraction is &s[i + 1].b[j + 3]. After - // extraction, it becomes &s[i].b[j] and AccumulativeByteOffset is - // sizeof(S) + 3 * sizeof(int64) = 100, which is not a multiple of - // sizeof(int64). - // - // Emit an uglygep in this case. - IRBuilder<> Builder(GEP); - NewGEP = cast<Instruction>(Builder.CreateGEP( - Builder.getInt8Ty(), NewGEP, - {ConstantInt::get(PtrIdxTy, AccumulativeByteOffset, true)}, "uglygep", - GEPWasInBounds)); - NewGEP->copyMetadata(*GEP); - } + IRBuilder<> Builder(GEP); + NewGEP = cast<Instruction>(Builder.CreateGEP( + Builder.getInt8Ty(), NewGEP, + {ConstantInt::get(PtrIdxTy, AccumulativeByteOffset, true)}, + GEP->getName(), GEPWasInBounds)); + NewGEP->copyMetadata(*GEP); GEP->replaceAllUsesWith(NewGEP); GEP->eraseFromParent(); diff --git a/llvm/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp b/llvm/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp index ca1f3a0c0ae3..2cce6eb22341 100644 --- a/llvm/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp +++ b/llvm/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp @@ -233,13 +233,9 @@ private: void factorArrayIndex(Value *ArrayIdx, const SCEV *Base, uint64_t ElementSize, GetElementPtrInst *GEP); - // Emit code that computes the "bump" from Basis to C. If the candidate is a - // GEP and the bump is not divisible by the element size of the GEP, this - // function sets the BumpWithUglyGEP flag to notify its caller to bump the - // basis using an ugly GEP. + // Emit code that computes the "bump" from Basis to C. static Value *emitBump(const Candidate &Basis, const Candidate &C, - IRBuilder<> &Builder, const DataLayout *DL, - bool &BumpWithUglyGEP); + IRBuilder<> &Builder, const DataLayout *DL); const DataLayout *DL = nullptr; DominatorTree *DT = nullptr; @@ -581,26 +577,11 @@ static void unifyBitWidth(APInt &A, APInt &B) { Value *StraightLineStrengthReduce::emitBump(const Candidate &Basis, const Candidate &C, IRBuilder<> &Builder, - const DataLayout *DL, - bool &BumpWithUglyGEP) { + const DataLayout *DL) { APInt Idx = C.Index->getValue(), BasisIdx = Basis.Index->getValue(); unifyBitWidth(Idx, BasisIdx); APInt IndexOffset = Idx - BasisIdx; - BumpWithUglyGEP = false; - if (Basis.CandidateKind == Candidate::GEP) { - APInt ElementSize( - IndexOffset.getBitWidth(), - DL->getTypeAllocSize( - cast<GetElementPtrInst>(Basis.Ins)->getResultElementType())); - APInt Q, R; - APInt::sdivrem(IndexOffset, ElementSize, Q, R); - if (R == 0) - IndexOffset = Q; - else - BumpWithUglyGEP = true; - } - // Compute Bump = C - Basis = (i' - i) * S. // Common case 1: if (i' - i) is 1, Bump = S. if (IndexOffset == 1) @@ -645,8 +626,7 @@ void StraightLineStrengthReduce::rewriteCandidateWithBasis( return; IRBuilder<> Builder(C.Ins); - bool BumpWithUglyGEP; - Value *Bump = emitBump(Basis, C, Builder, DL, BumpWithUglyGEP); + Value *Bump = emitBump(Basis, C, Builder, DL); Value *Reduced = nullptr; // equivalent to but weaker than C.Ins switch (C.CandidateKind) { case Candidate::Add: @@ -673,28 +653,13 @@ void StraightLineStrengthReduce::rewriteCandidateWithBasis( } break; } - case Candidate::GEP: - { - Type *OffsetTy = DL->getIndexType(C.Ins->getType()); + case Candidate::GEP: { bool InBounds = cast<GetElementPtrInst>(C.Ins)->isInBounds(); - if (BumpWithUglyGEP) { - // C = (char *)Basis + Bump - unsigned AS = Basis.Ins->getType()->getPointerAddressSpace(); - Type *CharTy = PointerType::get(Basis.Ins->getContext(), AS); - Reduced = Builder.CreateBitCast(Basis.Ins, CharTy); - Reduced = - Builder.CreateGEP(Builder.getInt8Ty(), Reduced, Bump, "", InBounds); - Reduced = Builder.CreateBitCast(Reduced, C.Ins->getType()); - } else { - // C = gep Basis, Bump - // Canonicalize bump to pointer size. - Bump = Builder.CreateSExtOrTrunc(Bump, OffsetTy); - Reduced = Builder.CreateGEP( - cast<GetElementPtrInst>(Basis.Ins)->getResultElementType(), Basis.Ins, - Bump, "", InBounds); - } - break; - } + // C = (char *)Basis + Bump + Reduced = + Builder.CreateGEP(Builder.getInt8Ty(), Basis.Ins, Bump, "", InBounds); + break; + } default: llvm_unreachable("C.CandidateKind is invalid"); }; diff --git a/llvm/lib/Transforms/Utils/Local.cpp b/llvm/lib/Transforms/Utils/Local.cpp index c76cc9db16d7..b9cad764aaef 100644 --- a/llvm/lib/Transforms/Utils/Local.cpp +++ b/llvm/lib/Transforms/Utils/Local.cpp @@ -3905,7 +3905,8 @@ bool llvm::recognizeBSwapOrBitReverseIdiom( SmallVectorImpl<Instruction *> &InsertedInsts) { if (!match(I, m_Or(m_Value(), m_Value())) && !match(I, m_FShl(m_Value(), m_Value(), m_Value())) && - !match(I, m_FShr(m_Value(), m_Value(), m_Value()))) + !match(I, m_FShr(m_Value(), m_Value(), m_Value())) && + !match(I, m_BSwap(m_Value()))) return false; if (!MatchBSwaps && !MatchBitReversals) return false; diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp index 61d891d65346..7515e539e7fb 100644 --- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp @@ -6919,18 +6919,17 @@ static bool ReduceSwitchRange(SwitchInst *SI, IRBuilder<> &Builder, auto *Ty = cast<IntegerType>(SI->getCondition()->getType()); Builder.SetInsertPoint(SI); - auto *ShiftC = ConstantInt::get(Ty, Shift); - auto *Sub = Builder.CreateSub(SI->getCondition(), ConstantInt::get(Ty, Base)); - auto *LShr = Builder.CreateLShr(Sub, ShiftC); - auto *Shl = Builder.CreateShl(Sub, Ty->getBitWidth() - Shift); - auto *Rot = Builder.CreateOr(LShr, Shl); + Value *Sub = + Builder.CreateSub(SI->getCondition(), ConstantInt::get(Ty, Base)); + Value *Rot = Builder.CreateIntrinsic( + Ty, Intrinsic::fshl, + {Sub, Sub, ConstantInt::get(Ty, Ty->getBitWidth() - Shift)}); SI->replaceUsesOfWith(SI->getCondition(), Rot); for (auto Case : SI->cases()) { auto *Orig = Case.getCaseValue(); auto Sub = Orig->getValue() - APInt(Ty->getBitWidth(), Base); - Case.setValue( - cast<ConstantInt>(ConstantInt::get(Ty, Sub.lshr(ShiftC->getValue())))); + Case.setValue(cast<ConstantInt>(ConstantInt::get(Ty, Sub.lshr(Shift)))); } return true; } diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 51ce88480c08..9743fa0e7402 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -5004,9 +5004,8 @@ VectorizationFactor LoopVectorizationPlanner::selectVectorizationFactor( VectorizationFactor Candidate(i, C.first, ScalarCost.ScalarCost); #ifndef NDEBUG - unsigned AssumedMinimumVscale = 1; - if (std::optional<unsigned> VScale = getVScaleForTuning(OrigLoop, TTI)) - AssumedMinimumVscale = *VScale; + unsigned AssumedMinimumVscale = + getVScaleForTuning(OrigLoop, TTI).value_or(1); unsigned Width = Candidate.Width.isScalable() ? Candidate.Width.getKnownMinValue() * AssumedMinimumVscale @@ -8031,6 +8030,7 @@ void VPRecipeBuilder::createBlockInMask(BasicBlock *BB, VPlan &Plan) { VPValue *EdgeMask = createEdgeMask(Predecessor, BB, Plan); if (!EdgeMask) { // Mask of predecessor is all-one so mask of block is too. BlockMaskCache[BB] = EdgeMask; + return; } if (!BlockMask) { // BlockMask has its initialized nullptr value. diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 8e22b54f002d..055fbb00871f 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -6894,6 +6894,31 @@ protected: }; } // namespace +/// Returns the cost of the shuffle instructions with the given \p Kind, vector +/// type \p Tp and optional \p Mask. Adds SLP-specifc cost estimation for insert +/// subvector pattern. +static InstructionCost +getShuffleCost(const TargetTransformInfo &TTI, TTI::ShuffleKind Kind, + VectorType *Tp, ArrayRef<int> Mask = std::nullopt, + TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput, + int Index = 0, VectorType *SubTp = nullptr, + ArrayRef<const Value *> Args = std::nullopt) { + if (Kind != TTI::SK_PermuteTwoSrc) + return TTI.getShuffleCost(Kind, Tp, Mask, CostKind, Index, SubTp, Args); + int NumSrcElts = Tp->getElementCount().getKnownMinValue(); + int NumSubElts; + if (Mask.size() > 2 && ShuffleVectorInst::isInsertSubvectorMask( + Mask, NumSrcElts, NumSubElts, Index)) { + if (Index + NumSubElts > NumSrcElts && + Index + NumSrcElts <= static_cast<int>(Mask.size())) + return TTI.getShuffleCost( + TTI::SK_InsertSubvector, + FixedVectorType::get(Tp->getElementType(), Mask.size()), std::nullopt, + TTI::TCK_RecipThroughput, Index, Tp); + } + return TTI.getShuffleCost(Kind, Tp, Mask, CostKind, Index, SubTp, Args); +} + /// Merges shuffle masks and emits final shuffle instruction, if required. It /// supports shuffling of 2 input vectors. It implements lazy shuffles emission, /// when the actual shuffle instruction is generated only if this is actually @@ -7141,15 +7166,15 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis { std::optional<TTI::ShuffleKind> RegShuffleKind = CheckPerRegistersShuffle(SubMask); if (!RegShuffleKind) { - Cost += TTI.getShuffleCost( - *ShuffleKinds[Part], + Cost += ::getShuffleCost( + TTI, *ShuffleKinds[Part], FixedVectorType::get(VL.front()->getType(), NumElts), MaskSlice); continue; } if (*RegShuffleKind != TTI::SK_PermuteSingleSrc || !ShuffleVectorInst::isIdentityMask(SubMask, EltsPerVector)) { - Cost += TTI.getShuffleCost( - *RegShuffleKind, + Cost += ::getShuffleCost( + TTI, *RegShuffleKind, FixedVectorType::get(VL.front()->getType(), EltsPerVector), SubMask); } @@ -7222,8 +7247,8 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis { cast<VectorType>(V1->getType())->getElementCount().getKnownMinValue(); if (isEmptyOrIdentity(Mask, VF)) return TTI::TCC_Free; - return TTI.getShuffleCost(TTI::SK_PermuteTwoSrc, - cast<VectorType>(V1->getType()), Mask); + return ::getShuffleCost(TTI, TTI::SK_PermuteTwoSrc, + cast<VectorType>(V1->getType()), Mask); } InstructionCost createShuffleVector(Value *V1, ArrayRef<int> Mask) const { // Empty mask or identity mask are free. @@ -8101,7 +8126,8 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef<Value *> VectorizedVals, for (unsigned I = OffsetEnd + 1 - Offset; I < VecSz; ++I) Mask[I] = ((I >= InMask.size()) || InMask.test(I)) ? PoisonMaskElem : I; - Cost += TTI->getShuffleCost(TTI::SK_PermuteTwoSrc, InsertVecTy, Mask); + Cost += + ::getShuffleCost(*TTI, TTI::SK_PermuteTwoSrc, InsertVecTy, Mask); } } return Cost; @@ -8428,8 +8454,8 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef<Value *> VectorizedVals, return I->getOpcode() == E->getAltOpcode(); }, Mask); - VecCost += TTIRef.getShuffleCost(TargetTransformInfo::SK_PermuteTwoSrc, - FinalVecTy, Mask); + VecCost += ::getShuffleCost(TTIRef, TargetTransformInfo::SK_PermuteTwoSrc, + FinalVecTy, Mask); // Patterns like [fadd,fsub] can be combined into a single instruction // in x86. Reordering them into [fsub,fadd] blocks this pattern. So we // need to take into account their order when looking for the most used @@ -9133,7 +9159,7 @@ InstructionCost BoUpSLP::getTreeCost(ArrayRef<Value *> VectorizedVals) { auto *FTy = FixedVectorType::get(TEs.back()->Scalars.front()->getType(), VF); InstructionCost C = - TTI->getShuffleCost(TTI::SK_PermuteTwoSrc, FTy, Mask); + ::getShuffleCost(*TTI, TTI::SK_PermuteTwoSrc, FTy, Mask); LLVM_DEBUG(dbgs() << "SLP: Adding cost " << C << " for final shuffle of vector node and external " "insertelement users.\n"; @@ -11991,8 +12017,12 @@ Value *BoUpSLP::vectorizeTree( IRBuilder<>::InsertPointGuard Guard(Builder); if (auto *IVec = dyn_cast<Instruction>(Vec)) Builder.SetInsertPoint(IVec->getNextNonDebugInstruction()); - Vec = Builder.CreateIntCast(Vec, VU->getType(), - BWIt->second.second); + Vec = Builder.CreateIntCast( + Vec, + FixedVectorType::get( + cast<VectorType>(VU->getType())->getElementType(), + cast<FixedVectorType>(Vec->getType())->getNumElements()), + BWIt->second.second); VectorCasts.try_emplace(Scalar, Vec); } else { Vec = VecIt->second; @@ -13070,10 +13100,14 @@ bool BoUpSLP::collectValuesToDemote( if (isa<Constant>(V)) return true; - // If the value is not a vectorized instruction in the expression with only - // one use, it cannot be demoted. + // If the value is not a vectorized instruction in the expression and not used + // by the insertelement instruction and not used in multiple vector nodes, it + // cannot be demoted. auto *I = dyn_cast<Instruction>(V); - if (!I || !I->hasOneUse() || !getTreeEntry(I) || !Visited.insert(I).second) + if (!I || !getTreeEntry(I) || MultiNodeScalars.contains(I) || + !Visited.insert(I).second || all_of(I->users(), [&](User *U) { + return isa<InsertElementInst>(U) && !getTreeEntry(U); + })) return false; unsigned Start = 0; @@ -13144,11 +13178,6 @@ bool BoUpSLP::collectValuesToDemote( } void BoUpSLP::computeMinimumValueSizes() { - // If there are no external uses, the expression tree must be rooted by a - // store. We can't demote in-memory values, so there is nothing to do here. - if (ExternalUses.empty()) - return; - // We only attempt to truncate integer expressions. auto &TreeRoot = VectorizableTree[0]->Scalars; auto *TreeRootIT = dyn_cast<IntegerType>(TreeRoot[0]->getType()); diff --git a/llvm/utils/TableGen/CodeGenDAGPatterns.cpp b/llvm/utils/TableGen/CodeGenDAGPatterns.cpp index e481f7e38e6a..f88e25ea1d16 100644 --- a/llvm/utils/TableGen/CodeGenDAGPatterns.cpp +++ b/llvm/utils/TableGen/CodeGenDAGPatterns.cpp @@ -1368,7 +1368,7 @@ std::string TreePredicateFn::getCodeToRunOnSDNode() const { if (immCodeUsesAPFloat()) Result += "cast<ConstantFPSDNode>(Node)->getValueAPF();\n"; else if (immCodeUsesAPInt()) - Result += "cast<ConstantSDNode>(Node)->getAPIntValue();\n"; + Result += "Node->getAsAPIntVal();\n"; else Result += "cast<ConstantSDNode>(Node)->getSExtValue();\n"; return Result + ImmCode; diff --git a/llvm/utils/TableGen/DAGISelMatcherEmitter.cpp b/llvm/utils/TableGen/DAGISelMatcherEmitter.cpp index 6fd5698e7372..a3e2facf948e 100644 --- a/llvm/utils/TableGen/DAGISelMatcherEmitter.cpp +++ b/llvm/utils/TableGen/DAGISelMatcherEmitter.cpp @@ -60,10 +60,8 @@ class MatcherTableEmitter { // all the patterns with "identical" predicates. StringMap<TinyPtrVector<TreePattern *>> NodePredicatesByCodeToRun; - StringMap<unsigned> PatternPredicateMap; std::vector<std::string> PatternPredicates; - DenseMap<const ComplexPattern*, unsigned> ComplexPatternMap; std::vector<const ComplexPattern*> ComplexPatterns; @@ -84,8 +82,50 @@ class MatcherTableEmitter { } public: - MatcherTableEmitter(const CodeGenDAGPatterns &cgp) - : CGP(cgp), OpcodeCounts(Matcher::HighestKind + 1, 0) {} + MatcherTableEmitter(const Matcher *TheMatcher, const CodeGenDAGPatterns &cgp) + : CGP(cgp), OpcodeCounts(Matcher::HighestKind + 1, 0) { + // Record the usage of ComplexPattern. + DenseMap<const ComplexPattern *, unsigned> ComplexPatternUsage; + // Record the usage of PatternPredicate. + std::map<StringRef, unsigned> PatternPredicateUsage; + + // Iterate the whole MatcherTable once and do some statistics. + std::function<void(const Matcher *)> Statistic = [&](const Matcher *N) { + while (N) { + if (auto *SM = dyn_cast<ScopeMatcher>(N)) + for (unsigned I = 0; I < SM->getNumChildren(); I++) + Statistic(SM->getChild(I)); + else if (auto *SOM = dyn_cast<SwitchOpcodeMatcher>(N)) + for (unsigned I = 0; I < SOM->getNumCases(); I++) + Statistic(SOM->getCaseMatcher(I)); + else if (auto *STM = dyn_cast<SwitchTypeMatcher>(N)) + for (unsigned I = 0; I < STM->getNumCases(); I++) + Statistic(STM->getCaseMatcher(I)); + else if (auto *CPM = dyn_cast<CheckComplexPatMatcher>(N)) + ++ComplexPatternUsage[&CPM->getPattern()]; + else if (auto *CPPM = dyn_cast<CheckPatternPredicateMatcher>(N)) + ++PatternPredicateUsage[CPPM->getPredicate()]; + N = N->getNext(); + } + }; + Statistic(TheMatcher); + + // Sort ComplexPatterns by usage. + std::vector<std::pair<const ComplexPattern *, unsigned>> ComplexPatternList( + ComplexPatternUsage.begin(), ComplexPatternUsage.end()); + sort(ComplexPatternList, + [](const auto &A, const auto &B) { return A.second > B.second; }); + for (const auto &ComplexPattern : ComplexPatternList) + ComplexPatterns.push_back(ComplexPattern.first); + + // Sort PatternPredicates by usage. + std::vector<std::pair<std::string, unsigned>> PatternPredicateList( + PatternPredicateUsage.begin(), PatternPredicateUsage.end()); + sort(PatternPredicateList, + [](const auto &A, const auto &B) { return A.second > B.second; }); + for (const auto &PatternPredicate : PatternPredicateList) + PatternPredicates.push_back(PatternPredicate.first); + } unsigned EmitMatcherList(const Matcher *N, const unsigned Indent, unsigned StartIdx, raw_ostream &OS); @@ -138,20 +178,10 @@ private: } unsigned getPatternPredicate(StringRef PredName) { - unsigned &Entry = PatternPredicateMap[PredName]; - if (Entry == 0) { - PatternPredicates.push_back(PredName.str()); - Entry = PatternPredicates.size(); - } - return Entry-1; + return llvm::find(PatternPredicates, PredName) - PatternPredicates.begin(); } unsigned getComplexPat(const ComplexPattern &P) { - unsigned &Entry = ComplexPatternMap[&P]; - if (Entry == 0) { - ComplexPatterns.push_back(&P); - Entry = ComplexPatterns.size(); - } - return Entry-1; + return llvm::find(ComplexPatterns, &P) - ComplexPatterns.begin(); } unsigned getNodeXFormID(Record *Rec) { @@ -486,13 +516,15 @@ EmitMatcher(const Matcher *N, const unsigned Indent, unsigned CurrentIdx, StringRef Pred = cast<CheckPatternPredicateMatcher>(N)->getPredicate(); unsigned PredNo = getPatternPredicate(Pred); if (PredNo > 255) - OS << "OPC_CheckPatternPredicate2, TARGET_VAL(" << PredNo << "),"; + OS << "OPC_CheckPatternPredicateTwoByte, TARGET_VAL(" << PredNo << "),"; + else if (PredNo < 8) + OS << "OPC_CheckPatternPredicate" << PredNo << ','; else OS << "OPC_CheckPatternPredicate, " << PredNo << ','; if (!OmitComments) OS << " // " << Pred; OS << '\n'; - return 2 + (PredNo > 255); + return 2 + (PredNo > 255) - (PredNo < 8); } case Matcher::CheckPredicate: { TreePredicateFn Pred = cast<CheckPredicateMatcher>(N)->getPredicate(); @@ -652,8 +684,13 @@ EmitMatcher(const Matcher *N, const unsigned Indent, unsigned CurrentIdx, case Matcher::CheckComplexPat: { const CheckComplexPatMatcher *CCPM = cast<CheckComplexPatMatcher>(N); const ComplexPattern &Pattern = CCPM->getPattern(); - OS << "OPC_CheckComplexPat, /*CP*/" << getComplexPat(Pattern) << ", /*#*/" - << CCPM->getMatchNumber() << ','; + unsigned PatternNo = getComplexPat(Pattern); + if (PatternNo < 8) + OS << "OPC_CheckComplexPat" << PatternNo << ", /*#*/" + << CCPM->getMatchNumber() << ','; + else + OS << "OPC_CheckComplexPat, /*CP*/" << PatternNo << ", /*#*/" + << CCPM->getMatchNumber() << ','; if (!OmitComments) { OS << " // " << Pattern.getSelectFunc(); @@ -665,7 +702,7 @@ EmitMatcher(const Matcher *N, const unsigned Indent, unsigned CurrentIdx, OS << " + chain result"; } OS << '\n'; - return 3; + return PatternNo < 8 ? 2 : 3; } case Matcher::CheckAndImm: { @@ -1267,7 +1304,7 @@ void llvm::EmitMatcherTable(Matcher *TheMatcher, OS << "#endif\n\n"; BeginEmitFunction(OS, "void", "SelectCode(SDNode *N)", false/*AddOverride*/); - MatcherTableEmitter MatcherEmitter(CGP); + MatcherTableEmitter MatcherEmitter(TheMatcher, CGP); // First we size all the children of the three kinds of matchers that have // them. This is done by sharing the code in EmitMatcher(). but we don't diff --git a/llvm/utils/TableGen/ExegesisEmitter.cpp b/llvm/utils/TableGen/ExegesisEmitter.cpp index 736f1220be14..d48c7f3a480f 100644 --- a/llvm/utils/TableGen/ExegesisEmitter.cpp +++ b/llvm/utils/TableGen/ExegesisEmitter.cpp @@ -81,6 +81,11 @@ collectPfmCounters(const RecordKeeper &Records) { "duplicate ResourceName " + ResourceName); AddPfmCounterName(IssueCounter); } + + for (const Record *ValidationCounter : + Def->getValueAsListOfDefs("ValidationCounters")) + AddPfmCounterName(ValidationCounter); + AddPfmCounterName(Def->getValueAsDef("CycleCounter")); AddPfmCounterName(Def->getValueAsDef("UopsCounter")); } @@ -100,6 +105,17 @@ ExegesisEmitter::ExegesisEmitter(RecordKeeper &RK) Target = std::string(Targets[0]->getName()); } +struct ValidationCounterInfo { + int64_t EventNumber; + StringRef EventName; + unsigned PfmCounterID; +}; + +bool EventNumberLess(const ValidationCounterInfo &LHS, + const ValidationCounterInfo &RHS) { + return LHS.EventNumber < RHS.EventNumber; +} + void ExegesisEmitter::emitPfmCountersInfo(const Record &Def, unsigned &IssueCountersTableOffset, raw_ostream &OS) const { @@ -109,6 +125,31 @@ void ExegesisEmitter::emitPfmCountersInfo(const Record &Def, Def.getValueAsDef("UopsCounter")->getValueAsString("Counter"); const size_t NumIssueCounters = Def.getValueAsListOfDefs("IssueCounters").size(); + const size_t NumValidationCounters = + Def.getValueAsListOfDefs("ValidationCounters").size(); + + // Emit Validation Counters Array + if (NumValidationCounters != 0) { + std::vector<ValidationCounterInfo> ValidationCounters; + ValidationCounters.reserve(NumValidationCounters); + for (const Record *ValidationCounter : + Def.getValueAsListOfDefs("ValidationCounters")) { + ValidationCounters.push_back( + {ValidationCounter->getValueAsDef("EventType") + ->getValueAsInt("EventNumber"), + ValidationCounter->getValueAsDef("EventType")->getName(), + getPfmCounterId(ValidationCounter->getValueAsString("Counter"))}); + } + std::sort(ValidationCounters.begin(), ValidationCounters.end(), + EventNumberLess); + OS << "\nstatic const std::pair<ValidationEvent, const char*> " << Target + << Def.getName() << "ValidationCounters[] = {\n"; + for (const ValidationCounterInfo &VCI : ValidationCounters) { + OS << " { " << VCI.EventName << ", " << Target << "PfmCounterNames[" + << VCI.PfmCounterID << "]},\n"; + } + OS << "};\n"; + } OS << "\nstatic const PfmCountersInfo " << Target << Def.getName() << " = {\n"; @@ -129,10 +170,17 @@ void ExegesisEmitter::emitPfmCountersInfo(const Record &Def, // Issue Counters if (NumIssueCounters == 0) - OS << " nullptr, // No issue counters.\n 0\n"; + OS << " nullptr, 0, // No issue counters\n"; else OS << " " << Target << "PfmIssueCounters + " << IssueCountersTableOffset - << ", " << NumIssueCounters << " // Issue counters.\n"; + << ", " << NumIssueCounters << ", // Issue counters.\n"; + + // Validation Counters + if (NumValidationCounters == 0) + OS << " nullptr, 0 // No validation counters.\n"; + else + OS << " " << Target << Def.getName() << "ValidationCounters, " + << NumValidationCounters << " // Validation counters.\n"; OS << "};\n"; IssueCountersTableOffset += NumIssueCounters; diff --git a/llvm/utils/TableGen/GlobalISelCombinerEmitter.cpp b/llvm/utils/TableGen/GlobalISelCombinerEmitter.cpp index 348b3b3e0898..c092772386ec 100644 --- a/llvm/utils/TableGen/GlobalISelCombinerEmitter.cpp +++ b/llvm/utils/TableGen/GlobalISelCombinerEmitter.cpp @@ -2318,7 +2318,7 @@ bool CombineRuleBuilder::emitInstructionApplyPattern( M.actions_begin(), getLLTCodeGenOrTempType(Ty, M), TempRegID); } - DstMI.addRenderer<TempRegRenderer>(TempRegID); + DstMI.addRenderer<TempRegRenderer>(TempRegID, /*IsDef=*/true); } // Render MIFlags diff --git a/openmp/runtime/src/z_Linux_util.cpp b/openmp/runtime/src/z_Linux_util.cpp index f01fa647c4d4..513ec6517d00 100644 --- a/openmp/runtime/src/z_Linux_util.cpp +++ b/openmp/runtime/src/z_Linux_util.cpp @@ -422,7 +422,7 @@ void __kmp_terminate_thread(int gtid) { static kmp_int32 __kmp_set_stack_info(int gtid, kmp_info_t *th) { int stack_data; #if KMP_OS_LINUX || KMP_OS_DRAGONFLY || KMP_OS_FREEBSD || KMP_OS_NETBSD || \ - KMP_OS_HURD || KMP_OS_SOLARIS + KMP_OS_HURD || KMP_OS_SOLARIS || KMP_OS_AIX pthread_attr_t attr; int status; size_t size = 0; |