diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2024-02-07 14:37:28 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2024-02-07 14:37:28 +0000 |
commit | 4fdf604ba667503ae582304cebdd3df426778a6b (patch) | |
tree | aae65a5d1b0a1b1acd9389fc36a0baf5fc1918c7 | |
parent | 2d835ae8657273e3aa8b9ef3201fb8df5563af9d (diff) |
Vendor import of llvm-project branch release/18.x llvmorg-18.1.0-rc2-0-gc6c86965d967.vendor/llvm-project/llvmorg-18.1.0-rc2-0-gc6c86965d967
347 files changed, 4546 insertions, 1341 deletions
diff --git a/clang/include/clang/AST/Type.h b/clang/include/clang/AST/Type.h index ea425791fc97..6384cf9420b8 100644 --- a/clang/include/clang/AST/Type.h +++ b/clang/include/clang/AST/Type.h @@ -3495,6 +3495,9 @@ enum class VectorKind { /// is RISC-V RVV fixed-length data vector RVVFixedLengthData, + + /// is RISC-V RVV fixed-length mask vector + RVVFixedLengthMask, }; /// Represents a GCC generic vector type. This type is created using diff --git a/clang/include/clang/Basic/AttrDocs.td b/clang/include/clang/Basic/AttrDocs.td index 7e633f8e2635..e02a1201e2ad 100644 --- a/clang/include/clang/Basic/AttrDocs.td +++ b/clang/include/clang/Basic/AttrDocs.td @@ -2424,7 +2424,10 @@ only be a power of 2 between 64 and 65536. For types where LMUL!=1, ``__riscv_v_fixed_vlen`` needs to be scaled by the LMUL of the type before passing to the attribute. -``vbool*_t`` types are not supported at this time. +For ``vbool*_t`` types, ``__riscv_v_fixed_vlen`` needs to be divided by the +number from the type name. For example, ``vbool8_t`` needs to use +``__riscv_v_fixed_vlen`` / 8. If the resulting value is not a multiple of 8, +the type is not supported for that value of ``__riscv_v_fixed_vlen``. }]; } diff --git a/clang/include/clang/Basic/BuiltinsAMDGPU.def b/clang/include/clang/Basic/BuiltinsAMDGPU.def index d208342d9c51..74dfd1d214e8 100644 --- a/clang/include/clang/Basic/BuiltinsAMDGPU.def +++ b/clang/include/clang/Basic/BuiltinsAMDGPU.def @@ -436,5 +436,67 @@ TARGET_BUILTIN(__builtin_amdgcn_global_load_tr_i32, "ii*1", "nc", "gfx12-insts,w TARGET_BUILTIN(__builtin_amdgcn_global_load_tr_v4i16, "V4sV4s*1", "nc", "gfx12-insts,wavefrontsize64") TARGET_BUILTIN(__builtin_amdgcn_global_load_tr_v4f16, "V4hV4h*1", "nc", "gfx12-insts,wavefrontsize64") +//===----------------------------------------------------------------------===// +// WMMA builtins. +// Postfix w32 indicates the builtin requires wavefront size of 32. +// Postfix w64 indicates the builtin requires wavefront size of 64. +// +// Some of these are very similar to their GFX11 counterparts, but they don't +// require replication of the A,B matrices, so they use fewer vector elements. +// Therefore, we add an "_gfx12" suffix to distinguish them from the existing +// builtins. +//===----------------------------------------------------------------------===// +TARGET_BUILTIN(__builtin_amdgcn_wmma_f32_16x16x16_f16_w32_gfx12, "V8fV8hV8hV8f", "nc", "gfx12-insts,wavefrontsize32") +TARGET_BUILTIN(__builtin_amdgcn_wmma_f32_16x16x16_bf16_w32_gfx12, "V8fV8sV8sV8f", "nc", "gfx12-insts,wavefrontsize32") +TARGET_BUILTIN(__builtin_amdgcn_wmma_f16_16x16x16_f16_w32_gfx12, "V8hV8hV8hV8h", "nc", "gfx12-insts,wavefrontsize32") +TARGET_BUILTIN(__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w32_gfx12, "V8sV8sV8sV8s", "nc", "gfx12-insts,wavefrontsize32") +TARGET_BUILTIN(__builtin_amdgcn_wmma_i32_16x16x16_iu8_w32_gfx12, "V8iIbV2iIbV2iV8iIb", "nc", "gfx12-insts,wavefrontsize32") +TARGET_BUILTIN(__builtin_amdgcn_wmma_i32_16x16x16_iu4_w32_gfx12, "V8iIbiIbiV8iIb", "nc", "gfx12-insts,wavefrontsize32") +// These are gfx12-only, but for consistency with the other WMMA variants we're +// keeping the "_gfx12" suffix. +TARGET_BUILTIN(__builtin_amdgcn_wmma_f32_16x16x16_fp8_fp8_w32_gfx12, "V8fV2iV2iV8f", "nc", "gfx12-insts,wavefrontsize32") +TARGET_BUILTIN(__builtin_amdgcn_wmma_f32_16x16x16_fp8_bf8_w32_gfx12, "V8fV2iV2iV8f", "nc", "gfx12-insts,wavefrontsize32") +TARGET_BUILTIN(__builtin_amdgcn_wmma_f32_16x16x16_bf8_fp8_w32_gfx12, "V8fV2iV2iV8f", "nc", "gfx12-insts,wavefrontsize32") +TARGET_BUILTIN(__builtin_amdgcn_wmma_f32_16x16x16_bf8_bf8_w32_gfx12, "V8fV2iV2iV8f", "nc", "gfx12-insts,wavefrontsize32") +TARGET_BUILTIN(__builtin_amdgcn_wmma_i32_16x16x32_iu4_w32_gfx12, "V8iIbV2iIbV2iV8iIb", "nc", "gfx12-insts,wavefrontsize32") + +TARGET_BUILTIN(__builtin_amdgcn_wmma_f32_16x16x16_f16_w64_gfx12, "V4fV4hV4hV4f", "nc", "gfx12-insts,wavefrontsize64") +TARGET_BUILTIN(__builtin_amdgcn_wmma_f32_16x16x16_bf16_w64_gfx12, "V4fV4sV4sV4f", "nc", "gfx12-insts,wavefrontsize64") +TARGET_BUILTIN(__builtin_amdgcn_wmma_f16_16x16x16_f16_w64_gfx12, "V4hV4hV4hV4h", "nc", "gfx12-insts,wavefrontsize64") +TARGET_BUILTIN(__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w64_gfx12, "V4sV4sV4sV4s", "nc", "gfx12-insts,wavefrontsize64") +TARGET_BUILTIN(__builtin_amdgcn_wmma_i32_16x16x16_iu8_w64_gfx12, "V4iIbiIbiV4iIb", "nc", "gfx12-insts,wavefrontsize64") +TARGET_BUILTIN(__builtin_amdgcn_wmma_i32_16x16x16_iu4_w64_gfx12, "V4iIbiIbiV4iIb", "nc", "gfx12-insts,wavefrontsize64") +// These are gfx12-only, but for consistency with the other WMMA variants we're +// keeping the "_gfx12" suffix. +TARGET_BUILTIN(__builtin_amdgcn_wmma_f32_16x16x16_fp8_fp8_w64_gfx12, "V4fiiV4f", "nc", "gfx12-insts,wavefrontsize64") +TARGET_BUILTIN(__builtin_amdgcn_wmma_f32_16x16x16_fp8_bf8_w64_gfx12, "V4fiiV4f", "nc", "gfx12-insts,wavefrontsize64") +TARGET_BUILTIN(__builtin_amdgcn_wmma_f32_16x16x16_bf8_fp8_w64_gfx12, "V4fiiV4f", "nc", "gfx12-insts,wavefrontsize64") +TARGET_BUILTIN(__builtin_amdgcn_wmma_f32_16x16x16_bf8_bf8_w64_gfx12, "V4fiiV4f", "nc", "gfx12-insts,wavefrontsize64") +TARGET_BUILTIN(__builtin_amdgcn_wmma_i32_16x16x32_iu4_w64_gfx12, "V4iIbiIbiV4iIb", "nc", "gfx12-insts,wavefrontsize64") + +TARGET_BUILTIN(__builtin_amdgcn_swmmac_f32_16x16x32_f16_w32, "V8fV8hV16hV8fs", "nc", "gfx12-insts,wavefrontsize32") +TARGET_BUILTIN(__builtin_amdgcn_swmmac_f32_16x16x32_bf16_w32, "V8fV8sV16sV8fs", "nc", "gfx12-insts,wavefrontsize32") +TARGET_BUILTIN(__builtin_amdgcn_swmmac_f16_16x16x32_f16_w32, "V8hV8hV16hV8hs", "nc", "gfx12-insts,wavefrontsize32") +TARGET_BUILTIN(__builtin_amdgcn_swmmac_bf16_16x16x32_bf16_w32, "V8sV8sV16sV8ss", "nc", "gfx12-insts,wavefrontsize32") +TARGET_BUILTIN(__builtin_amdgcn_swmmac_i32_16x16x32_iu8_w32, "V8iIbV2iIbV4iV8isIb", "nc", "gfx12-insts,wavefrontsize32") +TARGET_BUILTIN(__builtin_amdgcn_swmmac_i32_16x16x32_iu4_w32, "V8iIbiIbV2iV8isIb", "nc", "gfx12-insts,wavefrontsize32") +TARGET_BUILTIN(__builtin_amdgcn_swmmac_i32_16x16x64_iu4_w32, "V8iIbV2iIbV4iV8isIb", "nc", "gfx12-insts,wavefrontsize32") +TARGET_BUILTIN(__builtin_amdgcn_swmmac_f32_16x16x32_fp8_fp8_w32, "V8fV2iV4iV8fs", "nc", "gfx12-insts,wavefrontsize32") +TARGET_BUILTIN(__builtin_amdgcn_swmmac_f32_16x16x32_fp8_bf8_w32, "V8fV2iV4iV8fs", "nc", "gfx12-insts,wavefrontsize32") +TARGET_BUILTIN(__builtin_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w32, "V8fV2iV4iV8fs", "nc", "gfx12-insts,wavefrontsize32") +TARGET_BUILTIN(__builtin_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w32, "V8fV2iV4iV8fs", "nc", "gfx12-insts,wavefrontsize32") + +TARGET_BUILTIN(__builtin_amdgcn_swmmac_f32_16x16x32_f16_w64, "V4fV4hV8hV4fs", "nc", "gfx12-insts,wavefrontsize64") +TARGET_BUILTIN(__builtin_amdgcn_swmmac_f32_16x16x32_bf16_w64, "V4fV4sV8sV4fs", "nc", "gfx12-insts,wavefrontsize64") +TARGET_BUILTIN(__builtin_amdgcn_swmmac_f16_16x16x32_f16_w64, "V4hV4hV8hV4hs", "nc", "gfx12-insts,wavefrontsize64") +TARGET_BUILTIN(__builtin_amdgcn_swmmac_bf16_16x16x32_bf16_w64, "V4sV4sV8sV4ss", "nc", "gfx12-insts,wavefrontsize64") +TARGET_BUILTIN(__builtin_amdgcn_swmmac_i32_16x16x32_iu8_w64, "V4iIbiIbV2iV4isIb", "nc", "gfx12-insts,wavefrontsize64") +TARGET_BUILTIN(__builtin_amdgcn_swmmac_i32_16x16x32_iu4_w64, "V4iIbiIbiV4isIb", "nc", "gfx12-insts,wavefrontsize64") +TARGET_BUILTIN(__builtin_amdgcn_swmmac_i32_16x16x64_iu4_w64, "V4iIbiIbV2iV4isIb", "nc", "gfx12-insts,wavefrontsize64") +TARGET_BUILTIN(__builtin_amdgcn_swmmac_f32_16x16x32_fp8_fp8_w64, "V4fiV2iV4fs", "nc", "gfx12-insts,wavefrontsize64") +TARGET_BUILTIN(__builtin_amdgcn_swmmac_f32_16x16x32_fp8_bf8_w64, "V4fiV2iV4fs", "nc", "gfx12-insts,wavefrontsize64") +TARGET_BUILTIN(__builtin_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w64, "V4fiV2iV4fs", "nc", "gfx12-insts,wavefrontsize64") +TARGET_BUILTIN(__builtin_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w64, "V4fiV2iV4fs", "nc", "gfx12-insts,wavefrontsize64") + #undef BUILTIN #undef TARGET_BUILTIN diff --git a/clang/include/clang/Basic/CodeGenOptions.def b/clang/include/clang/Basic/CodeGenOptions.def index 2f2e45d5cf63..7c0bfe328496 100644 --- a/clang/include/clang/Basic/CodeGenOptions.def +++ b/clang/include/clang/Basic/CodeGenOptions.def @@ -369,6 +369,9 @@ ENUM_CODEGENOPT(VecLib, llvm::driver::VectorLibrary, 3, llvm::driver::VectorLibr /// The default TLS model to use. ENUM_CODEGENOPT(DefaultTLSModel, TLSModel, 2, GeneralDynamicTLSModel) +/// Whether to enable TLSDESC. AArch64 enables TLSDESC regardless of this value. +CODEGENOPT(EnableTLSDESC, 1, 0) + /// Bit size of immediate TLS offsets (0 == use the default). VALUE_CODEGENOPT(TLSSize, 8, 0) diff --git a/clang/include/clang/Basic/DiagnosticCommonKinds.td b/clang/include/clang/Basic/DiagnosticCommonKinds.td index b1bada65cb6b..08bb1d81ba29 100644 --- a/clang/include/clang/Basic/DiagnosticCommonKinds.td +++ b/clang/include/clang/Basic/DiagnosticCommonKinds.td @@ -73,7 +73,7 @@ def warn_pragma_debug_unexpected_argument : Warning< def warn_fp_nan_inf_when_disabled : Warning< "use of %select{infinity|NaN}0%select{| via a macro}1 is undefined behavior " "due to the currently enabled floating-point options">, - InGroup<DiagGroup<"nan-infinity-disabled">>; + InGroup<DiagGroup<"nan-infinity-disabled", [], NanInfDisabledDocs>>; } // Parse && Sema diff --git a/clang/include/clang/Basic/DiagnosticDocs.td b/clang/include/clang/Basic/DiagnosticDocs.td index e9862422b499..8c024b5cad74 100644 --- a/clang/include/clang/Basic/DiagnosticDocs.td +++ b/clang/include/clang/Basic/DiagnosticDocs.td @@ -87,3 +87,12 @@ program by treating all string literals as having type ``const char *`` instead of ``char *``. This can cause unexpected behaviors with type-sensitive constructs like ``_Generic``. }]; + +defvar NanInfDisabledDocs = [{ +This warning is enabled when source code using the macros ``INFINITY`` or ``NAN`` +is compiled with floating-point options preventing these two values. This can +lead to undefined behavior. Check the order of command line arguments that modify +this behavior, such as ``-ffast-math``, ``-fhonor-infinities``, and +``-fhonor-nans`` (etc), as well as ``#pragma`` directives if this diagnostic is +generated unexpectedly. +}]; diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index a1c32abb4dcd..ef8c111b1d8c 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -3711,6 +3711,12 @@ def err_sme_za_call_no_za_state : Error< "call to a shared ZA function requires the caller to have ZA state">; def err_sme_zt0_call_no_zt0_state : Error< "call to a shared ZT0 function requires the caller to have ZT0 state">; +def err_sme_unimplemented_za_save_restore : Error< + "call to a function that shares state other than 'za' from a " + "function that has live 'za' state requires a spill/fill of ZA, which is not yet " + "implemented">; +def note_sme_use_preserves_za : Note< + "add '__arm_preserves(\"za\")' to the callee if it preserves ZA">; def err_sme_definition_using_sm_in_non_sme_target : Error< "function executed in streaming-SVE mode requires 'sme'">; def err_sme_definition_using_za_in_non_sme_target : Error< diff --git a/clang/include/clang/Basic/LangOptions.def b/clang/include/clang/Basic/LangOptions.def index 8fc75e1cca03..4942dcaa086e 100644 --- a/clang/include/clang/Basic/LangOptions.def +++ b/clang/include/clang/Basic/LangOptions.def @@ -174,6 +174,7 @@ LANGOPT(MathErrno , 1, 1, "errno in math functions") BENIGN_LANGOPT(HeinousExtensions , 1, 0, "extensions that we really don't like and may be ripped out at any time") LANGOPT(Modules , 1, 0, "modules semantics") COMPATIBLE_LANGOPT(CPlusPlusModules, 1, 0, "C++ modules syntax") +LANGOPT(SkipODRCheckInGMF, 1, 0, "Skip ODR checks for decls in the global module fragment") LANGOPT(BuiltinHeadersInSystemModules, 1, 0, "builtin headers belong to system modules, and _Builtin_ modules are ignored for cstdlib headers") BENIGN_ENUM_LANGOPT(CompilingModule, CompilingModuleKind, 3, CMK_None, "compiling a module interface") diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index 7f4fa33748fa..e8d03fc26902 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -2985,6 +2985,14 @@ def fmodule_output : Flag<["-"], "fmodule-output">, Flags<[NoXarchOption]>, Visibility<[ClangOption, CC1Option]>, HelpText<"Save intermediate module file results when compiling a standard C++ module unit.">; +defm skip_odr_check_in_gmf : BoolOption<"f", "skip-odr-check-in-gmf", + LangOpts<"SkipODRCheckInGMF">, DefaultFalse, + PosFlag<SetTrue, [], [CC1Option], + "Skip ODR checks for decls in the global module fragment.">, + NegFlag<SetFalse, [], [CC1Option], + "Perform ODR checks for decls in the global module fragment.">>, + Group<f_Group>; + def fmodules_prune_interval : Joined<["-"], "fmodules-prune-interval=">, Group<i_Group>, Visibility<[ClangOption, CC1Option]>, MetaVarName<"<seconds>">, HelpText<"Specify the interval (in seconds) between attempts to prune the module cache">, @@ -4419,6 +4427,8 @@ def mtls_size_EQ : Joined<["-"], "mtls-size=">, Group<m_Group>, HelpText<"Specify bit size of immediate TLS offsets (AArch64 ELF only): " "12 (for 4KB) | 24 (for 16MB, default) | 32 (for 4GB) | 48 (for 256TB, needs -mcmodel=large)">, MarshallingInfoInt<CodeGenOpts<"TLSSize">>; +def mtls_dialect_EQ : Joined<["-"], "mtls-dialect=">, Group<m_Group>, + Flags<[TargetSpecific]>, HelpText<"Which thread-local storage dialect to use for dynamic accesses of TLS variables">; def mimplicit_it_EQ : Joined<["-"], "mimplicit-it=">, Group<m_Group>; def mdefault_build_attributes : Joined<["-"], "mdefault-build-attributes">, Group<m_Group>; def mno_default_build_attributes : Joined<["-"], "mno-default-build-attributes">, Group<m_Group>; @@ -7066,6 +7076,9 @@ def fexperimental_assignment_tracking_EQ : Joined<["-"], "fexperimental-assignme Values<"disabled,enabled,forced">, NormalizedValues<["Disabled","Enabled","Forced"]>, MarshallingInfoEnum<CodeGenOpts<"AssignmentTrackingMode">, "Enabled">; +def enable_tlsdesc : Flag<["-"], "enable-tlsdesc">, + MarshallingInfoFlag<CodeGenOpts<"EnableTLSDESC">>; + } // let Visibility = [CC1Option] //===----------------------------------------------------------------------===// diff --git a/clang/include/clang/Format/Format.h b/clang/include/clang/Format/Format.h index bc9eecd42f9e..efcb4e1d87ea 100644 --- a/clang/include/clang/Format/Format.h +++ b/clang/include/clang/Format/Format.h @@ -4157,14 +4157,9 @@ struct FormatStyle { /// Different ways to put a space before opening parentheses. enum SpaceBeforeParensStyle : int8_t { - /// Never put a space before opening parentheses. - /// \code - /// void f() { - /// if(true) { - /// f(); - /// } - /// } - /// \endcode + /// This is **deprecated** and replaced by ``Custom`` below, with all + /// ``SpaceBeforeParensOptions`` but ``AfterPlacementOperator`` set to + /// ``false``. SBPO_Never, /// Put a space before opening parentheses only after control statement /// keywords (``for/if/while...``). @@ -4273,28 +4268,14 @@ struct FormatStyle { /// object.operator++ (10); object.operator++(10); /// \endcode bool AfterOverloadedOperator; - /// Styles for adding spacing between ``new/delete`` operators and opening - /// parentheses. - enum AfterPlacementOperatorStyle : int8_t { - /// Remove space after ``new/delete`` operators and before ``(``. - /// \code - /// new(buf) T; - /// delete(buf) T; - /// \endcode - APO_Never, - /// Always add space after ``new/delete`` operators and before ``(``. - /// \code - /// new (buf) T; - /// delete (buf) T; - /// \endcode - APO_Always, - /// Leave placement ``new/delete`` expressions as they are. - APO_Leave, - }; - /// Defines in which cases to put a space between ``new/delete`` operators - /// and opening parentheses. - /// \version 18 - AfterPlacementOperatorStyle AfterPlacementOperator; + /// If ``true``, put a space between operator ``new``/``delete`` and opening + /// parenthesis. + /// \code + /// true: false: + /// new (buf) T; vs. new(buf) T; + /// delete (buf) T; delete(buf) T; + /// \endcode + bool AfterPlacementOperator; /// If ``true``, put space between requires keyword in a requires clause and /// opening parentheses, if there is one. /// \code @@ -4327,7 +4308,7 @@ struct FormatStyle { : AfterControlStatements(false), AfterForeachMacros(false), AfterFunctionDeclarationName(false), AfterFunctionDefinitionName(false), AfterIfMacros(false), - AfterOverloadedOperator(false), AfterPlacementOperator(APO_Leave), + AfterOverloadedOperator(false), AfterPlacementOperator(true), AfterRequiresInClause(false), AfterRequiresInExpression(false), BeforeNonEmptyParentheses(false) {} diff --git a/clang/include/clang/Lex/Preprocessor.h b/clang/include/clang/Lex/Preprocessor.h index 2d9c53cdf5bd..b0a8ec0fec5e 100644 --- a/clang/include/clang/Lex/Preprocessor.h +++ b/clang/include/clang/Lex/Preprocessor.h @@ -2828,7 +2828,8 @@ public: return AnnotationInfos.find(II)->second; } - void emitMacroExpansionWarnings(const Token &Identifier) const { + void emitMacroExpansionWarnings(const Token &Identifier, + bool IsIfnDef = false) const { IdentifierInfo *Info = Identifier.getIdentifierInfo(); if (Info->isDeprecatedMacro()) emitMacroDeprecationWarning(Identifier); @@ -2837,12 +2838,12 @@ public: !SourceMgr.isInMainFile(Identifier.getLocation())) emitRestrictExpansionWarning(Identifier); - if (Info->getName() == "INFINITY") - if (getLangOpts().NoHonorInfs) + if (!IsIfnDef) { + if (Info->getName() == "INFINITY" && getLangOpts().NoHonorInfs) emitRestrictInfNaNWarning(Identifier, 0); - if (Info->getName() == "NAN") - if (getLangOpts().NoHonorNaNs) + if (Info->getName() == "NAN" && getLangOpts().NoHonorNaNs) emitRestrictInfNaNWarning(Identifier, 1); + } } static void processPathForFileMacro(SmallVectorImpl<char> &Path, diff --git a/clang/include/clang/Sema/Lookup.h b/clang/include/clang/Sema/Lookup.h index 9c93bf1e6fb4..2f2f2607a937 100644 --- a/clang/include/clang/Sema/Lookup.h +++ b/clang/include/clang/Sema/Lookup.h @@ -754,7 +754,8 @@ public: private: void diagnoseAccess() { - if (isClassLookup() && getSema().getLangOpts().AccessControl) + if (!isAmbiguous() && isClassLookup() && + getSema().getLangOpts().AccessControl) getSema().CheckLookupAccess(*this); } diff --git a/clang/include/clang/Serialization/ASTReader.h b/clang/include/clang/Serialization/ASTReader.h index dd1451bbf2d2..cd28226c295b 100644 --- a/clang/include/clang/Serialization/ASTReader.h +++ b/clang/include/clang/Serialization/ASTReader.h @@ -2452,6 +2452,12 @@ private: uint32_t CurrentBitsIndex = ~0; }; +inline bool shouldSkipCheckingODR(const Decl *D) { + return D->getOwningModule() && + D->getASTContext().getLangOpts().SkipODRCheckInGMF && + D->getOwningModule()->isExplicitGlobalModule(); +} + } // namespace clang #endif // LLVM_CLANG_SERIALIZATION_ASTREADER_H diff --git a/clang/lib/AST/ASTContext.cpp b/clang/lib/AST/ASTContext.cpp index 5eb7aa366456..ab16ca10395f 100644 --- a/clang/lib/AST/ASTContext.cpp +++ b/clang/lib/AST/ASTContext.cpp @@ -1945,7 +1945,8 @@ TypeInfo ASTContext::getTypeInfoImpl(const Type *T) const { else if (VT->getVectorKind() == VectorKind::SveFixedLengthPredicate) // Adjust the alignment for fixed-length SVE predicates. Align = 16; - else if (VT->getVectorKind() == VectorKind::RVVFixedLengthData) + else if (VT->getVectorKind() == VectorKind::RVVFixedLengthData || + VT->getVectorKind() == VectorKind::RVVFixedLengthMask) // Adjust the alignment for fixed-length RVV vectors. Align = std::min<unsigned>(64, Width); break; @@ -9416,7 +9417,9 @@ bool ASTContext::areCompatibleVectorTypes(QualType FirstVec, Second->getVectorKind() != VectorKind::SveFixedLengthData && Second->getVectorKind() != VectorKind::SveFixedLengthPredicate && First->getVectorKind() != VectorKind::RVVFixedLengthData && - Second->getVectorKind() != VectorKind::RVVFixedLengthData) + Second->getVectorKind() != VectorKind::RVVFixedLengthData && + First->getVectorKind() != VectorKind::RVVFixedLengthMask && + Second->getVectorKind() != VectorKind::RVVFixedLengthMask) return true; return false; @@ -9522,8 +9525,11 @@ static uint64_t getRVVTypeSize(ASTContext &Context, const BuiltinType *Ty) { ASTContext::BuiltinVectorTypeInfo Info = Context.getBuiltinVectorTypeInfo(Ty); - uint64_t EltSize = Context.getTypeSize(Info.ElementType); - uint64_t MinElts = Info.EC.getKnownMinValue(); + unsigned EltSize = Context.getTypeSize(Info.ElementType); + if (Info.ElementType == Context.BoolTy) + EltSize = 1; + + unsigned MinElts = Info.EC.getKnownMinValue(); return VScale->first * MinElts * EltSize; } @@ -9537,6 +9543,12 @@ bool ASTContext::areCompatibleRVVTypes(QualType FirstType, auto IsValidCast = [this](QualType FirstType, QualType SecondType) { if (const auto *BT = FirstType->getAs<BuiltinType>()) { if (const auto *VT = SecondType->getAs<VectorType>()) { + if (VT->getVectorKind() == VectorKind::RVVFixedLengthMask) { + BuiltinVectorTypeInfo Info = getBuiltinVectorTypeInfo(BT); + return FirstType->isRVVVLSBuiltinType() && + Info.ElementType == BoolTy && + getTypeSize(SecondType) == getRVVTypeSize(*this, BT); + } if (VT->getVectorKind() == VectorKind::RVVFixedLengthData || VT->getVectorKind() == VectorKind::Generic) return FirstType->isRVVVLSBuiltinType() && diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index f1d07d022b25..edf9b5e2d52b 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -7951,7 +7951,8 @@ public: // Overloaded operator calls to member functions are represented as normal // calls with '*this' as the first argument. const CXXMethodDecl *MD = dyn_cast<CXXMethodDecl>(FD); - if (MD && MD->isImplicitObjectMemberFunction()) { + if (MD && + (MD->isImplicitObjectMemberFunction() || (OCE && MD->isStatic()))) { // FIXME: When selecting an implicit conversion for an overloaded // operator delete, we sometimes try to evaluate calls to conversion // operators without a 'this' parameter! @@ -7960,7 +7961,11 @@ public: if (!EvaluateObjectArgument(Info, Args[0], ThisVal)) return false; - This = &ThisVal; + + // If we are calling a static operator, the 'this' argument needs to be + // ignored after being evaluated. + if (MD->isInstance()) + This = &ThisVal; // If this is syntactically a simple assignment using a trivial // assignment operator, start the lifetimes of union members as needed, diff --git a/clang/lib/AST/ItaniumMangle.cpp b/clang/lib/AST/ItaniumMangle.cpp index 40b1e086ddd0..688141b30441 100644 --- a/clang/lib/AST/ItaniumMangle.cpp +++ b/clang/lib/AST/ItaniumMangle.cpp @@ -3994,7 +3994,8 @@ void CXXNameMangler::mangleAArch64FixedSveVectorType( } void CXXNameMangler::mangleRISCVFixedRVVVectorType(const VectorType *T) { - assert(T->getVectorKind() == VectorKind::RVVFixedLengthData && + assert((T->getVectorKind() == VectorKind::RVVFixedLengthData || + T->getVectorKind() == VectorKind::RVVFixedLengthMask) && "expected fixed-length RVV vector!"); QualType EltType = T->getElementType(); @@ -4009,7 +4010,10 @@ void CXXNameMangler::mangleRISCVFixedRVVVectorType(const VectorType *T) { TypeNameOS << "int8"; break; case BuiltinType::UChar: - TypeNameOS << "uint8"; + if (T->getVectorKind() == VectorKind::RVVFixedLengthData) + TypeNameOS << "uint8"; + else + TypeNameOS << "bool"; break; case BuiltinType::Short: TypeNameOS << "int16"; @@ -4048,12 +4052,16 @@ void CXXNameMangler::mangleRISCVFixedRVVVectorType(const VectorType *T) { auto VScale = getASTContext().getTargetInfo().getVScaleRange( getASTContext().getLangOpts()); unsigned VLen = VScale->first * llvm::RISCV::RVVBitsPerBlock; - TypeNameOS << 'm'; - if (VecSizeInBits >= VLen) - TypeNameOS << (VecSizeInBits / VLen); - else - TypeNameOS << 'f' << (VLen / VecSizeInBits); + if (T->getVectorKind() == VectorKind::RVVFixedLengthData) { + TypeNameOS << 'm'; + if (VecSizeInBits >= VLen) + TypeNameOS << (VecSizeInBits / VLen); + else + TypeNameOS << 'f' << (VLen / VecSizeInBits); + } else { + TypeNameOS << (VLen / VecSizeInBits); + } TypeNameOS << "_t"; Out << "9__RVV_VLSI" << 'u' << TypeNameStr.size() << TypeNameStr << "Lj" @@ -4093,7 +4101,8 @@ void CXXNameMangler::mangleType(const VectorType *T) { T->getVectorKind() == VectorKind::SveFixedLengthPredicate) { mangleAArch64FixedSveVectorType(T); return; - } else if (T->getVectorKind() == VectorKind::RVVFixedLengthData) { + } else if (T->getVectorKind() == VectorKind::RVVFixedLengthData || + T->getVectorKind() == VectorKind::RVVFixedLengthMask) { mangleRISCVFixedRVVVectorType(T); return; } diff --git a/clang/lib/AST/JSONNodeDumper.cpp b/clang/lib/AST/JSONNodeDumper.cpp index 3daba13d0fce..3c11b75d7472 100644 --- a/clang/lib/AST/JSONNodeDumper.cpp +++ b/clang/lib/AST/JSONNodeDumper.cpp @@ -703,6 +703,9 @@ void JSONNodeDumper::VisitVectorType(const VectorType *VT) { case VectorKind::RVVFixedLengthData: JOS.attribute("vectorKind", "fixed-length rvv data vector"); break; + case VectorKind::RVVFixedLengthMask: + JOS.attribute("vectorKind", "fixed-length rvv mask vector"); + break; } } diff --git a/clang/lib/AST/ODRHash.cpp b/clang/lib/AST/ODRHash.cpp index 5b98646a1e8d..2dbc259138a8 100644 --- a/clang/lib/AST/ODRHash.cpp +++ b/clang/lib/AST/ODRHash.cpp @@ -745,55 +745,8 @@ void ODRHash::AddEnumDecl(const EnumDecl *Enum) { if (Enum->isScoped()) AddBoolean(Enum->isScopedUsingClassTag()); - if (Enum->getIntegerTypeSourceInfo()) { - // FIMXE: This allows two enums with different spellings to have the same - // hash. - // - // // mod1.cppm - // module; - // extern "C" { - // typedef unsigned __int64 size_t; - // } - // namespace std { - // using :: size_t; - // } - // - // extern "C++" { - // namespace std { - // enum class align_val_t : std::size_t {}; - // } - // } - // - // export module mod1; - // export using std::align_val_t; - // - // // mod2.cppm - // module; - // extern "C" { - // typedef unsigned __int64 size_t; - // } - // - // extern "C++" { - // namespace std { - // enum class align_val_t : size_t {}; - // } - // } - // - // export module mod2; - // import mod1; - // export using std::align_val_t; - // - // The above example should be disallowed since it violates - // [basic.def.odr]p14: - // - // Each such definition shall consist of the same sequence of tokens - // - // The definitions of `std::align_val_t` in two module units have different - // spellings but we failed to give an error here. - // - // See https://github.com/llvm/llvm-project/issues/76638 for details. + if (Enum->getIntegerTypeSourceInfo()) AddQualType(Enum->getIntegerType().getCanonicalType()); - } // Filter out sub-Decls which will not be processed in order to get an // accurate count of Decl's. diff --git a/clang/lib/AST/TemplateBase.cpp b/clang/lib/AST/TemplateBase.cpp index 2bdbeb08ef20..3310d7dc24c5 100644 --- a/clang/lib/AST/TemplateBase.cpp +++ b/clang/lib/AST/TemplateBase.cpp @@ -450,7 +450,8 @@ bool TemplateArgument::structurallyEquals(const TemplateArgument &Other) const { getAsIntegral() == Other.getAsIntegral(); case StructuralValue: { - if (getStructuralValueType() != Other.getStructuralValueType()) + if (getStructuralValueType().getCanonicalType() != + Other.getStructuralValueType().getCanonicalType()) return false; llvm::FoldingSetNodeID A, B; diff --git a/clang/lib/AST/TextNodeDumper.cpp b/clang/lib/AST/TextNodeDumper.cpp index 48c6729a6738..ecf5de0be543 100644 --- a/clang/lib/AST/TextNodeDumper.cpp +++ b/clang/lib/AST/TextNodeDumper.cpp @@ -1623,6 +1623,9 @@ void TextNodeDumper::VisitVectorType(const VectorType *T) { case VectorKind::RVVFixedLengthData: OS << " fixed-length rvv data vector"; break; + case VectorKind::RVVFixedLengthMask: + OS << " fixed-length rvv mask vector"; + break; } OS << " " << T->getNumElements(); } diff --git a/clang/lib/AST/Type.cpp b/clang/lib/AST/Type.cpp index 3db5ae182f32..d4103025591e 100644 --- a/clang/lib/AST/Type.cpp +++ b/clang/lib/AST/Type.cpp @@ -2479,6 +2479,9 @@ bool Type::isRVVVLSBuiltinType() const { IsFP, IsBF) \ case BuiltinType::Id: \ return NF == 1; +#define RVV_PREDICATE_TYPE(Name, Id, SingletonId, NumEls) \ + case BuiltinType::Id: \ + return true; #include "clang/Basic/RISCVVTypes.def" default: return false; @@ -2491,7 +2494,17 @@ QualType Type::getRVVEltType(const ASTContext &Ctx) const { assert(isRVVVLSBuiltinType() && "unsupported type!"); const BuiltinType *BTy = castAs<BuiltinType>(); - return Ctx.getBuiltinVectorTypeInfo(BTy).ElementType; + + switch (BTy->getKind()) { +#define RVV_PREDICATE_TYPE(Name, Id, SingletonId, NumEls) \ + case BuiltinType::Id: \ + return Ctx.UnsignedCharTy; + default: + return Ctx.getBuiltinVectorTypeInfo(BTy).ElementType; +#include "clang/Basic/RISCVVTypes.def" + } + + llvm_unreachable("Unhandled type"); } bool QualType::isPODType(const ASTContext &Context) const { diff --git a/clang/lib/AST/TypePrinter.cpp b/clang/lib/AST/TypePrinter.cpp index 80b42c8f84a0..e9b6e810b02e 100644 --- a/clang/lib/AST/TypePrinter.cpp +++ b/clang/lib/AST/TypePrinter.cpp @@ -694,6 +694,7 @@ void TypePrinter::printVectorBefore(const VectorType *T, raw_ostream &OS) { printBefore(T->getElementType(), OS); break; case VectorKind::RVVFixedLengthData: + case VectorKind::RVVFixedLengthMask: // FIXME: We prefer to print the size directly here, but have no way // to get the size of the type. OS << "__attribute__((__riscv_rvv_vector_bits__("; @@ -773,6 +774,7 @@ void TypePrinter::printDependentVectorBefore( printBefore(T->getElementType(), OS); break; case VectorKind::RVVFixedLengthData: + case VectorKind::RVVFixedLengthMask: // FIXME: We prefer to print the size directly here, but have no way // to get the size of the type. OS << "__attribute__((__riscv_rvv_vector_bits__("; diff --git a/clang/lib/Basic/Targets/AArch64.cpp b/clang/lib/Basic/Targets/AArch64.cpp index d47181bfca4f..336b7a5e3d72 100644 --- a/clang/lib/Basic/Targets/AArch64.cpp +++ b/clang/lib/Basic/Targets/AArch64.cpp @@ -387,6 +387,11 @@ void AArch64TargetInfo::getTargetDefines(const LangOptions &Opts, Builder.defineMacro("__ARM_ALIGN_MAX_STACK_PWR", "4"); + // These macros are set when Clang can parse declarations with these + // attributes. + Builder.defineMacro("__ARM_STATE_ZA", "1"); + Builder.defineMacro("__ARM_STATE_ZT0", "1"); + // 0xe implies support for half, single and double precision operations. if (FPU & FPUMode) Builder.defineMacro("__ARM_FP", "0xE"); @@ -431,6 +436,17 @@ void AArch64TargetInfo::getTargetDefines(const LangOptions &Opts, if (HasSVE2 && HasSVE2SM4) Builder.defineMacro("__ARM_FEATURE_SVE2_SM4", "1"); + if (HasSME) { + Builder.defineMacro("__ARM_FEATURE_SME"); + Builder.defineMacro("__ARM_FEATURE_LOCALLY_STREAMING", "1"); + } + + if (HasSME2) { + Builder.defineMacro("__ARM_FEATURE_SME"); + Builder.defineMacro("__ARM_FEATURE_SME2"); + Builder.defineMacro("__ARM_FEATURE_LOCALLY_STREAMING", "1"); + } + if (HasCRC) Builder.defineMacro("__ARM_FEATURE_CRC32", "1"); @@ -686,6 +702,7 @@ bool AArch64TargetInfo::hasFeature(StringRef Feature) const { .Case("sve2-sha3", FPU & SveMode && HasSVE2SHA3) .Case("sve2-sm4", FPU & SveMode && HasSVE2SM4) .Case("sme", HasSME) + .Case("sme2", HasSME2) .Case("sme-f64f64", HasSMEF64F64) .Case("sme-i16i64", HasSMEI16I64) .Case("sme-fa64", HasSMEFA64) @@ -806,6 +823,12 @@ bool AArch64TargetInfo::handleTargetFeatures(std::vector<std::string> &Features, HasBFloat16 = true; HasFullFP16 = true; } + if (Feature == "+sme2") { + HasSME = true; + HasSME2 = true; + HasBFloat16 = true; + HasFullFP16 = true; + } if (Feature == "+sme-f64f64") { HasSME = true; HasSMEF64F64 = true; diff --git a/clang/lib/Basic/Targets/AArch64.h b/clang/lib/Basic/Targets/AArch64.h index f0e0782e7abe..9699222b0bf7 100644 --- a/clang/lib/Basic/Targets/AArch64.h +++ b/clang/lib/Basic/Targets/AArch64.h @@ -68,6 +68,7 @@ class LLVM_LIBRARY_VISIBILITY AArch64TargetInfo : public TargetInfo { bool HasCCDP = false; bool HasFRInt3264 = false; bool HasSME = false; + bool HasSME2 = false; bool HasSMEF64F64 = false; bool HasSMEI16I64 = false; bool HasSB = false; diff --git a/clang/lib/CodeGen/BackendUtil.cpp b/clang/lib/CodeGen/BackendUtil.cpp index ec203f6f28bc..7877e20d77f7 100644 --- a/clang/lib/CodeGen/BackendUtil.cpp +++ b/clang/lib/CodeGen/BackendUtil.cpp @@ -401,6 +401,7 @@ static bool initTargetOptions(DiagnosticsEngine &Diags, Options.UniqueBasicBlockSectionNames = CodeGenOpts.UniqueBasicBlockSectionNames; Options.TLSSize = CodeGenOpts.TLSSize; + Options.EnableTLSDESC = CodeGenOpts.EnableTLSDESC; Options.EmulatedTLS = CodeGenOpts.EmulatedTLS; Options.DebuggerTuning = CodeGenOpts.getDebuggerTuning(); Options.EmitStackSizeSection = CodeGenOpts.StackSizeSection; diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 7ef764b8e1ac..a4f26a6f0eb1 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -18279,65 +18279,216 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w32: case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w64: case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w32: - case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w64: { + case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w64: + case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w32_gfx12: + case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w64_gfx12: + case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w32_gfx12: + case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w64_gfx12: + case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w32_gfx12: + case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w64_gfx12: + case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w32_gfx12: + case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w64_gfx12: + case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w32_gfx12: + case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w64_gfx12: + case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w32_gfx12: + case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w64_gfx12: + case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_fp8_w32_gfx12: + case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_fp8_w64_gfx12: + case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_bf8_w32_gfx12: + case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_bf8_w64_gfx12: + case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_fp8_w32_gfx12: + case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_fp8_w64_gfx12: + case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_bf8_w32_gfx12: + case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_bf8_w64_gfx12: + case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x32_iu4_w32_gfx12: + case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x32_iu4_w64_gfx12: + case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_f16_w32: + case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_f16_w64: + case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf16_w32: + case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf16_w64: + case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x32_f16_w32: + case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x32_f16_w64: + case AMDGPU::BI__builtin_amdgcn_swmmac_bf16_16x16x32_bf16_w32: + case AMDGPU::BI__builtin_amdgcn_swmmac_bf16_16x16x32_bf16_w64: + case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu8_w32: + case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu8_w64: + case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu4_w32: + case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu4_w64: + case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x64_iu4_w32: + case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x64_iu4_w64: + case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_fp8_w32: + case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_fp8_w64: + case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_bf8_w32: + case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_bf8_w64: + case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w32: + case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w64: + case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w32: + case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w64: { // These operations perform a matrix multiplication and accumulation of // the form: // D = A * B + C - // The return type always matches the type of matrix C. - unsigned ArgForMatchingRetType; + // We need to specify one type for matrices AB and one for matrices CD. + // Sparse matrix operations can have different types for A and B as well as + // an additional type for sparsity index. + // Destination type should be put before types used for source operands. + SmallVector<unsigned, 2> ArgsForMatchingMatrixTypes; + // On GFX12, the intrinsics with 16-bit accumulator use a packed layout. + // There is no need for the variable opsel argument, so always set it to + // "false". + bool AppendFalseForOpselArg = false; unsigned BuiltinWMMAOp; switch (BuiltinID) { case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w32: case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w64: - ArgForMatchingRetType = 2; + case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w32_gfx12: + case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w64_gfx12: + ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x16_f16; break; case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w32: case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w64: - ArgForMatchingRetType = 2; + case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w32_gfx12: + case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w64_gfx12: + ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x16_bf16; break; + case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w32_gfx12: + case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w64_gfx12: + AppendFalseForOpselArg = true; + LLVM_FALLTHROUGH; case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w32: case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w64: - ArgForMatchingRetType = 2; + ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f16_16x16x16_f16; break; + case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w32_gfx12: + case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w64_gfx12: + AppendFalseForOpselArg = true; + LLVM_FALLTHROUGH; case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w32: case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w64: - ArgForMatchingRetType = 2; + ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB BuiltinWMMAOp = Intrinsic::amdgcn_wmma_bf16_16x16x16_bf16; break; case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_tied_w32: case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_tied_w64: - ArgForMatchingRetType = 2; + ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f16_16x16x16_f16_tied; break; case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_tied_w32: case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_tied_w64: - ArgForMatchingRetType = 2; + ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB BuiltinWMMAOp = Intrinsic::amdgcn_wmma_bf16_16x16x16_bf16_tied; break; case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w32: case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w64: - ArgForMatchingRetType = 4; + case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w32_gfx12: + case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w64_gfx12: + ArgsForMatchingMatrixTypes = {4, 1}; // CD, AB BuiltinWMMAOp = Intrinsic::amdgcn_wmma_i32_16x16x16_iu8; break; case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w32: case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w64: - ArgForMatchingRetType = 4; + case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w32_gfx12: + case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w64_gfx12: + ArgsForMatchingMatrixTypes = {4, 1}; // CD, AB BuiltinWMMAOp = Intrinsic::amdgcn_wmma_i32_16x16x16_iu4; break; + case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_fp8_w32_gfx12: + case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_fp8_w64_gfx12: + ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB + BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x16_fp8_fp8; + break; + case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_bf8_w32_gfx12: + case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_bf8_w64_gfx12: + ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB + BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x16_fp8_bf8; + break; + case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_fp8_w32_gfx12: + case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_fp8_w64_gfx12: + ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB + BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x16_bf8_fp8; + break; + case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_bf8_w32_gfx12: + case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_bf8_w64_gfx12: + ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB + BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x16_bf8_bf8; + break; + case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x32_iu4_w32_gfx12: + case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x32_iu4_w64_gfx12: + ArgsForMatchingMatrixTypes = {4, 1}; // CD, AB + BuiltinWMMAOp = Intrinsic::amdgcn_wmma_i32_16x16x32_iu4; + break; + case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_f16_w32: + case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_f16_w64: + ArgsForMatchingMatrixTypes = {2, 0, 1, 3}; // CD, A, B, Index + BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x32_f16; + break; + case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf16_w32: + case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf16_w64: + ArgsForMatchingMatrixTypes = {2, 0, 1, 3}; // CD, A, B, Index + BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x32_bf16; + break; + case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x32_f16_w32: + case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x32_f16_w64: + ArgsForMatchingMatrixTypes = {2, 0, 1, 3}; // CD, A, B, Index + BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f16_16x16x32_f16; + break; + case AMDGPU::BI__builtin_amdgcn_swmmac_bf16_16x16x32_bf16_w32: + case AMDGPU::BI__builtin_amdgcn_swmmac_bf16_16x16x32_bf16_w64: + ArgsForMatchingMatrixTypes = {2, 0, 1, 3}; // CD, A, B, Index + BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_bf16_16x16x32_bf16; + break; + case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu8_w32: + case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu8_w64: + ArgsForMatchingMatrixTypes = {4, 1, 3, 5}; // CD, A, B, Index + BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_i32_16x16x32_iu8; + break; + case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu4_w32: + case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu4_w64: + ArgsForMatchingMatrixTypes = {4, 1, 3, 5}; // CD, A, B, Index + BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_i32_16x16x32_iu4; + break; + case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x64_iu4_w32: + case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x64_iu4_w64: + ArgsForMatchingMatrixTypes = {4, 1, 3, 5}; // CD, A, B, Index + BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_i32_16x16x64_iu4; + break; + case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_fp8_w32: + case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_fp8_w64: + ArgsForMatchingMatrixTypes = {2, 0, 1, 3}; // CD, A, B, Index + BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x32_fp8_fp8; + break; + case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_bf8_w32: + case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_bf8_w64: + ArgsForMatchingMatrixTypes = {2, 0, 1, 3}; // CD, A, B, Index + BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x32_fp8_bf8; + break; + case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w32: + case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w64: + ArgsForMatchingMatrixTypes = {2, 0, 1, 3}; // CD, A, B, Index + BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x32_bf8_fp8; + break; + case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w32: + case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w64: + ArgsForMatchingMatrixTypes = {2, 0, 1, 3}; // CD, A, B, Index + BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x32_bf8_bf8; + break; } SmallVector<Value *, 6> Args; for (int i = 0, e = E->getNumArgs(); i != e; ++i) Args.push_back(EmitScalarExpr(E->getArg(i))); + if (AppendFalseForOpselArg) + Args.push_back(Builder.getFalse()); - Function *F = CGM.getIntrinsic(BuiltinWMMAOp, - {Args[ArgForMatchingRetType]->getType()}); + SmallVector<llvm::Type *, 6> ArgTypes; + for (auto ArgIdx : ArgsForMatchingMatrixTypes) + ArgTypes.push_back(Args[ArgIdx]->getType()); + Function *F = CGM.getIntrinsic(BuiltinWMMAOp, ArgTypes); return Builder.CreateCall(F, Args); } diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp index c5f6b6d3a99f..f8f997909977 100644 --- a/clang/lib/CodeGen/CGExpr.cpp +++ b/clang/lib/CodeGen/CGExpr.cpp @@ -5846,6 +5846,7 @@ RValue CodeGenFunction::EmitCall(QualType CalleeType, const CGCallee &OrigCallee // destruction order is not necessarily reverse construction order. // FIXME: Revisit this based on C++ committee response to unimplementability. EvaluationOrder Order = EvaluationOrder::Default; + bool StaticOperator = false; if (auto *OCE = dyn_cast<CXXOperatorCallExpr>(E)) { if (OCE->isAssignmentOp()) Order = EvaluationOrder::ForceRightToLeft; @@ -5863,10 +5864,22 @@ RValue CodeGenFunction::EmitCall(QualType CalleeType, const CGCallee &OrigCallee break; } } + + if (const auto *MD = + dyn_cast_if_present<CXXMethodDecl>(OCE->getCalleeDecl()); + MD && MD->isStatic()) + StaticOperator = true; } - EmitCallArgs(Args, dyn_cast<FunctionProtoType>(FnType), E->arguments(), - E->getDirectCallee(), /*ParamsToSkip*/ 0, Order); + auto Arguments = E->arguments(); + if (StaticOperator) { + // If we're calling a static operator, we need to emit the object argument + // and ignore it. + EmitIgnoredExpr(E->getArg(0)); + Arguments = drop_begin(Arguments, 1); + } + EmitCallArgs(Args, dyn_cast<FunctionProtoType>(FnType), Arguments, + E->getDirectCallee(), /*ParamsToSkip=*/0, Order); const CGFunctionInfo &FnInfo = CGM.getTypes().arrangeFreeFunctionCall( Args, FnType, /*ChainCall=*/Chain); diff --git a/clang/lib/CodeGen/CoverageMappingGen.cpp b/clang/lib/CodeGen/CoverageMappingGen.cpp index 5eca00f22bb8..0c43317642bc 100644 --- a/clang/lib/CodeGen/CoverageMappingGen.cpp +++ b/clang/lib/CodeGen/CoverageMappingGen.cpp @@ -1812,8 +1812,10 @@ struct CounterCoverageMappingBuilder assert(S->isConstexpr()); // evaluate constant condition... - const auto *E = cast<ConstantExpr>(S->getCond()); - const bool isTrue = E->getResultAsAPSInt().getExtValue(); + const bool isTrue = + S->getCond() + ->EvaluateKnownConstInt(CVM.getCodeGenModule().getContext()) + .getBoolValue(); extendRegion(S); diff --git a/clang/lib/CodeGen/Targets/RISCV.cpp b/clang/lib/CodeGen/Targets/RISCV.cpp index 0851d1993d0c..02c86ad2e58c 100644 --- a/clang/lib/CodeGen/Targets/RISCV.cpp +++ b/clang/lib/CodeGen/Targets/RISCV.cpp @@ -321,20 +321,28 @@ ABIArgInfo RISCVABIInfo::coerceVLSVector(QualType Ty) const { assert(Ty->isVectorType() && "expected vector type!"); const auto *VT = Ty->castAs<VectorType>(); - assert(VT->getVectorKind() == VectorKind::RVVFixedLengthData && - "Unexpected vector kind"); - assert(VT->getElementType()->isBuiltinType() && "expected builtin type!"); auto VScale = getContext().getTargetInfo().getVScaleRange(getContext().getLangOpts()); + + unsigned NumElts = VT->getNumElements(); + llvm::Type *EltType; + if (VT->getVectorKind() == VectorKind::RVVFixedLengthMask) { + NumElts *= 8; + EltType = llvm::Type::getInt1Ty(getVMContext()); + } else { + assert(VT->getVectorKind() == VectorKind::RVVFixedLengthData && + "Unexpected vector kind"); + EltType = CGT.ConvertType(VT->getElementType()); + } + // The MinNumElts is simplified from equation: // NumElts / VScale = // (EltSize * NumElts / (VScale * RVVBitsPerBlock)) // * (RVVBitsPerBlock / EltSize) llvm::ScalableVectorType *ResType = - llvm::ScalableVectorType::get(CGT.ConvertType(VT->getElementType()), - VT->getNumElements() / VScale->first); + llvm::ScalableVectorType::get(EltType, NumElts / VScale->first); return ABIArgInfo::getDirect(ResType); } @@ -437,7 +445,8 @@ ABIArgInfo RISCVABIInfo::classifyArgumentType(QualType Ty, bool IsFixed, } if (const VectorType *VT = Ty->getAs<VectorType>()) - if (VT->getVectorKind() == VectorKind::RVVFixedLengthData) + if (VT->getVectorKind() == VectorKind::RVVFixedLengthData || + VT->getVectorKind() == VectorKind::RVVFixedLengthMask) return coerceVLSVector(Ty); // Aggregates which are <= 2*XLen will be passed in registers if possible, diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp index 7109faa1072d..93cddf742d52 100644 --- a/clang/lib/Driver/Driver.cpp +++ b/clang/lib/Driver/Driver.cpp @@ -4764,9 +4764,9 @@ Action *Driver::ConstructPhaseAction( case phases::Backend: { if (isUsingLTO() && TargetDeviceOffloadKind == Action::OFK_None) { types::ID Output; - if (Args.hasArg(options::OPT_ffat_lto_objects)) - Output = Args.hasArg(options::OPT_emit_llvm) ? types::TY_LTO_IR - : types::TY_PP_Asm; + if (Args.hasArg(options::OPT_ffat_lto_objects) && + !Args.hasArg(options::OPT_emit_llvm)) + Output = types::TY_PP_Asm; else if (Args.hasArg(options::OPT_S)) Output = types::TY_LTO_IR; else diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index 5dc614e11aab..aa344b3465ab 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -3942,6 +3942,10 @@ static bool RenderModulesOptions(Compilation &C, const Driver &D, Args.ClaimAllArgs(options::OPT_fmodules_disable_diagnostic_validation); } + // FIXME: We provisionally don't check ODR violations for decls in the global + // module fragment. + CmdArgs.push_back("-fskip-odr-check-in-gmf"); + // Claim `-fmodule-output` and `-fmodule-output=` to avoid unused warnings. Args.ClaimAllArgs(options::OPT_fmodule_output); Args.ClaimAllArgs(options::OPT_fmodule_output_EQ); @@ -5779,6 +5783,14 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, // NVPTX/AMDGPU does not care about the code model and will accept // whatever works for the host. Ok = true; + } else if (Triple.isSPARC64()) { + if (CM == "medlow") + CM = "small"; + else if (CM == "medmid") + CM = "medium"; + else if (CM == "medany") + CM = "large"; + Ok = CM == "small" || CM == "medium" || CM == "large"; } if (Ok) { CmdArgs.push_back(Args.MakeArgString("-mcmodel=" + CM)); @@ -5822,6 +5834,9 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, Args.AddLastArg(CmdArgs, options::OPT_mtls_size_EQ); } + if (isTLSDESCEnabled(TC, Args)) + CmdArgs.push_back("-enable-tlsdesc"); + // Add the target cpu std::string CPU = getCPUName(D, Args, Triple, /*FromAs*/ false); if (!CPU.empty()) { diff --git a/clang/lib/Driver/ToolChains/CommonArgs.cpp b/clang/lib/Driver/ToolChains/CommonArgs.cpp index fadaf3e60c66..2b916f000336 100644 --- a/clang/lib/Driver/ToolChains/CommonArgs.cpp +++ b/clang/lib/Driver/ToolChains/CommonArgs.cpp @@ -729,6 +729,33 @@ bool tools::isUseSeparateSections(const llvm::Triple &Triple) { return Triple.isPS(); } +bool tools::isTLSDESCEnabled(const ToolChain &TC, + const llvm::opt::ArgList &Args) { + const llvm::Triple &Triple = TC.getEffectiveTriple(); + Arg *A = Args.getLastArg(options::OPT_mtls_dialect_EQ); + if (!A) + return Triple.hasDefaultTLSDESC(); + StringRef V = A->getValue(); + bool SupportedArgument = false, EnableTLSDESC = false; + bool Unsupported = !Triple.isOSBinFormatELF(); + if (Triple.isRISCV()) { + SupportedArgument = V == "desc" || V == "trad"; + EnableTLSDESC = V == "desc"; + } else if (Triple.isX86()) { + SupportedArgument = V == "gnu"; + } else { + Unsupported = true; + } + if (Unsupported) { + TC.getDriver().Diag(diag::err_drv_unsupported_opt_for_target) + << A->getSpelling() << Triple.getTriple(); + } else if (!SupportedArgument) { + TC.getDriver().Diag(diag::err_drv_unsupported_option_argument_for_target) + << A->getSpelling() << V << Triple.getTriple(); + } + return EnableTLSDESC; +} + void tools::addLTOOptions(const ToolChain &ToolChain, const ArgList &Args, ArgStringList &CmdArgs, const InputInfo &Output, const InputInfo &Input, bool IsThinLTO) { @@ -783,6 +810,28 @@ void tools::addLTOOptions(const ToolChain &ToolChain, const ArgList &Args, "-generate-arange-section")); } + // Pass vector library arguments to LTO. + Arg *ArgVecLib = Args.getLastArg(options::OPT_fveclib); + if (ArgVecLib && ArgVecLib->getNumValues() == 1) { + // Map the vector library names from clang front-end to opt front-end. The + // values are taken from the TargetLibraryInfo class command line options. + std::optional<StringRef> OptVal = + llvm::StringSwitch<std::optional<StringRef>>(ArgVecLib->getValue()) + .Case("Accelerate", "Accelerate") + .Case("LIBMVEC", "LIBMVEC-X86") + .Case("MASSV", "MASSV") + .Case("SVML", "SVML") + .Case("SLEEF", "sleefgnuabi") + .Case("Darwin_libsystem_m", "Darwin_libsystem_m") + .Case("ArmPL", "ArmPL") + .Case("none", "none") + .Default(std::nullopt); + + if (OptVal) + CmdArgs.push_back(Args.MakeArgString( + Twine(PluginOptPrefix) + "-vector-library=" + OptVal.value())); + } + // Try to pass driver level flags relevant to LTO code generation down to // the plugin. @@ -988,6 +1037,9 @@ void tools::addLTOOptions(const ToolChain &ToolChain, const ArgList &Args, CmdArgs.push_back( Args.MakeArgString(Twine(PluginOptPrefix) + "-emulated-tls")); } + if (isTLSDESCEnabled(ToolChain, Args)) + CmdArgs.push_back( + Args.MakeArgString(Twine(PluginOptPrefix) + "-enable-tlsdesc")); if (Args.hasFlag(options::OPT_fstack_size_section, options::OPT_fno_stack_size_section, false)) diff --git a/clang/lib/Driver/ToolChains/CommonArgs.h b/clang/lib/Driver/ToolChains/CommonArgs.h index 25d68345a9f9..807867f13a5c 100644 --- a/clang/lib/Driver/ToolChains/CommonArgs.h +++ b/clang/lib/Driver/ToolChains/CommonArgs.h @@ -144,6 +144,9 @@ llvm::StringRef getLTOParallelism(const llvm::opt::ArgList &Args, bool areOptimizationsEnabled(const llvm::opt::ArgList &Args); bool isUseSeparateSections(const llvm::Triple &Triple); +// Parse -mtls-dialect=. Return true if the target supports both general-dynamic +// and TLSDESC, and TLSDESC is requested. +bool isTLSDESCEnabled(const ToolChain &TC, const llvm::opt::ArgList &Args); /// \p EnvVar is split by system delimiter for environment variables. /// If \p ArgName is "-I", "-L", or an empty string, each entry from \p EnvVar diff --git a/clang/lib/Format/Format.cpp b/clang/lib/Format/Format.cpp index ff326dc78478..10fe35c79a4f 100644 --- a/clang/lib/Format/Format.cpp +++ b/clang/lib/Format/Format.cpp @@ -504,22 +504,6 @@ struct ScalarEnumerationTraits<FormatStyle::QualifierAlignmentStyle> { } }; -template <> -struct MappingTraits< - FormatStyle::SpaceBeforeParensCustom::AfterPlacementOperatorStyle> { - static void - mapping(IO &IO, - FormatStyle::SpaceBeforeParensCustom::AfterPlacementOperatorStyle - &Value) { - IO.enumCase(Value, "Always", - FormatStyle::SpaceBeforeParensCustom::APO_Always); - IO.enumCase(Value, "Never", - FormatStyle::SpaceBeforeParensCustom::APO_Never); - IO.enumCase(Value, "Leave", - FormatStyle::SpaceBeforeParensCustom::APO_Leave); - } -}; - template <> struct MappingTraits<FormatStyle::RawStringFormat> { static void mapping(IO &IO, FormatStyle::RawStringFormat &Format) { IO.mapOptional("Language", Format.Language); @@ -1388,12 +1372,9 @@ static void expandPresetsSpaceBeforeParens(FormatStyle &Expanded) { return; // Reset all flags Expanded.SpaceBeforeParensOptions = {}; + Expanded.SpaceBeforeParensOptions.AfterPlacementOperator = true; switch (Expanded.SpaceBeforeParens) { - case FormatStyle::SBPO_Never: - Expanded.SpaceBeforeParensOptions.AfterPlacementOperator = - FormatStyle::SpaceBeforeParensCustom::APO_Never; - break; case FormatStyle::SBPO_ControlStatements: Expanded.SpaceBeforeParensOptions.AfterControlStatements = true; Expanded.SpaceBeforeParensOptions.AfterForeachMacros = true; @@ -1405,8 +1386,6 @@ static void expandPresetsSpaceBeforeParens(FormatStyle &Expanded) { case FormatStyle::SBPO_NonEmptyParentheses: Expanded.SpaceBeforeParensOptions.BeforeNonEmptyParentheses = true; break; - case FormatStyle::SBPO_Always: - break; default: break; } diff --git a/clang/lib/Format/TokenAnnotator.cpp b/clang/lib/Format/TokenAnnotator.cpp index 25fcceb87864..d0c4273cfc7e 100644 --- a/clang/lib/Format/TokenAnnotator.cpp +++ b/clang/lib/Format/TokenAnnotator.cpp @@ -2488,6 +2488,8 @@ private: (Tok.Next->Next->is(tok::numeric_constant) || Line.InPPDirective)) { return false; } + if (Line.InPPDirective && Tok.Next->is(tok::minus)) + return false; // Search for unexpected tokens. for (FormatToken *Prev = Tok.Previous; Prev != Tok.MatchingParen; Prev = Prev->Previous) { @@ -4272,14 +4274,7 @@ bool TokenAnnotator::spaceRequiredBetween(const AnnotatedLine &Line, Left.isOneOf(tok::kw_new, tok::kw_delete) && Right.isNot(TT_OverloadedOperatorLParen) && !(Line.MightBeFunctionDecl && Left.is(TT_FunctionDeclarationName))) { - if (Style.SpaceBeforeParensOptions.AfterPlacementOperator == - FormatStyle::SpaceBeforeParensCustom::APO_Always || - (Style.SpaceBeforeParensOptions.AfterPlacementOperator == - FormatStyle::SpaceBeforeParensCustom::APO_Leave && - Right.hasWhitespaceBefore())) { - return true; - } - return false; + return Style.SpaceBeforeParensOptions.AfterPlacementOperator; } if (Line.Type == LT_ObjCDecl) return true; diff --git a/clang/lib/Lex/PPDirectives.cpp b/clang/lib/Lex/PPDirectives.cpp index 9f82a6d073e3..a980f4bcbae1 100644 --- a/clang/lib/Lex/PPDirectives.cpp +++ b/clang/lib/Lex/PPDirectives.cpp @@ -3288,7 +3288,7 @@ void Preprocessor::HandleIfdefDirective(Token &Result, return; } - emitMacroExpansionWarnings(MacroNameTok); + emitMacroExpansionWarnings(MacroNameTok, /*IsIfnDef=*/true); // Check to see if this is the last token on the #if[n]def line. CheckEndOfDirective(isIfndef ? "ifndef" : "ifdef"); diff --git a/clang/lib/Lex/PPExpressions.cpp b/clang/lib/Lex/PPExpressions.cpp index 1feb0eb18d71..8f25c67ec9df 100644 --- a/clang/lib/Lex/PPExpressions.cpp +++ b/clang/lib/Lex/PPExpressions.cpp @@ -133,7 +133,9 @@ static bool EvaluateDefined(PPValue &Result, Token &PeekTok, DefinedTracker &DT, Result.Val.setIsUnsigned(false); // Result is signed intmax_t. DT.IncludedUndefinedIds = !Macro; - PP.emitMacroExpansionWarnings(PeekTok); + PP.emitMacroExpansionWarnings( + PeekTok, + (II->getName() == "INFINITY" || II->getName() == "NAN") ? true : false); // If there is a macro, mark it used. if (Result.Val != 0 && ValueLive) diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp index 7833d5a2ea20..09b7e1c62fbd 100644 --- a/clang/lib/Sema/SemaChecking.cpp +++ b/clang/lib/Sema/SemaChecking.cpp @@ -7545,47 +7545,43 @@ void Sema::checkCall(NamedDecl *FDecl, const FunctionProtoType *Proto, } } - // If the callee uses AArch64 SME ZA state but the caller doesn't define - // any, then this is an error. - FunctionType::ArmStateValue ArmZAState = + FunctionType::ArmStateValue CalleeArmZAState = FunctionType::getArmZAState(ExtInfo.AArch64SMEAttributes); - if (ArmZAState != FunctionType::ARM_None) { + FunctionType::ArmStateValue CalleeArmZT0State = + FunctionType::getArmZT0State(ExtInfo.AArch64SMEAttributes); + if (CalleeArmZAState != FunctionType::ARM_None || + CalleeArmZT0State != FunctionType::ARM_None) { bool CallerHasZAState = false; + bool CallerHasZT0State = false; if (const auto *CallerFD = dyn_cast<FunctionDecl>(CurContext)) { auto *Attr = CallerFD->getAttr<ArmNewAttr>(); if (Attr && Attr->isNewZA()) CallerHasZAState = true; - else if (const auto *FPT = - CallerFD->getType()->getAs<FunctionProtoType>()) - CallerHasZAState = FunctionType::getArmZAState( - FPT->getExtProtoInfo().AArch64SMEAttributes) != - FunctionType::ARM_None; - } - - if (!CallerHasZAState) - Diag(Loc, diag::err_sme_za_call_no_za_state); - } - - // If the callee uses AArch64 SME ZT0 state but the caller doesn't define - // any, then this is an error. - FunctionType::ArmStateValue ArmZT0State = - FunctionType::getArmZT0State(ExtInfo.AArch64SMEAttributes); - if (ArmZT0State != FunctionType::ARM_None) { - bool CallerHasZT0State = false; - if (const auto *CallerFD = dyn_cast<FunctionDecl>(CurContext)) { - auto *Attr = CallerFD->getAttr<ArmNewAttr>(); if (Attr && Attr->isNewZT0()) CallerHasZT0State = true; - else if (const auto *FPT = - CallerFD->getType()->getAs<FunctionProtoType>()) - CallerHasZT0State = + if (const auto *FPT = CallerFD->getType()->getAs<FunctionProtoType>()) { + CallerHasZAState |= + FunctionType::getArmZAState( + FPT->getExtProtoInfo().AArch64SMEAttributes) != + FunctionType::ARM_None; + CallerHasZT0State |= FunctionType::getArmZT0State( FPT->getExtProtoInfo().AArch64SMEAttributes) != FunctionType::ARM_None; + } } - if (!CallerHasZT0State) + if (CalleeArmZAState != FunctionType::ARM_None && !CallerHasZAState) + Diag(Loc, diag::err_sme_za_call_no_za_state); + + if (CalleeArmZT0State != FunctionType::ARM_None && !CallerHasZT0State) Diag(Loc, diag::err_sme_zt0_call_no_zt0_state); + + if (CallerHasZAState && CalleeArmZAState == FunctionType::ARM_None && + CalleeArmZT0State != FunctionType::ARM_None) { + Diag(Loc, diag::err_sme_unimplemented_za_save_restore); + Diag(Loc, diag::note_sme_use_preserves_za); + } } } @@ -7643,9 +7639,8 @@ bool Sema::CheckFunctionCall(FunctionDecl *FDecl, CallExpr *TheCall, unsigned NumArgs = TheCall->getNumArgs(); Expr *ImplicitThis = nullptr; - if (IsMemberOperatorCall && !FDecl->isStatic() && - !FDecl->hasCXXExplicitFunctionObjectParameter()) { - // If this is a call to a non-static member operator, hide the first + if (IsMemberOperatorCall && !FDecl->hasCXXExplicitFunctionObjectParameter()) { + // If this is a call to a member operator, hide the first // argument from checkCall. // FIXME: Our choice of AST representation here is less than ideal. ImplicitThis = Args[0]; diff --git a/clang/lib/Sema/SemaConcept.cpp b/clang/lib/Sema/SemaConcept.cpp index acfc00f41254..88fc846c89e4 100644 --- a/clang/lib/Sema/SemaConcept.cpp +++ b/clang/lib/Sema/SemaConcept.cpp @@ -612,8 +612,12 @@ bool Sema::SetupConstraintScope( // If this is a member function, make sure we get the parameters that // reference the original primary template. - if (const auto *FromMemTempl = - PrimaryTemplate->getInstantiatedFromMemberTemplate()) { + // We walk up the instantiated template chain so that nested lambdas get + // handled properly. + for (FunctionTemplateDecl *FromMemTempl = + PrimaryTemplate->getInstantiatedFromMemberTemplate(); + FromMemTempl; + FromMemTempl = FromMemTempl->getInstantiatedFromMemberTemplate()) { if (addInstantiatedParametersToScope(FD, FromMemTempl->getTemplatedDecl(), Scope, MLTAL)) return true; diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp index f9bf1d14bdc4..a300badc6d02 100644 --- a/clang/lib/Sema/SemaDecl.cpp +++ b/clang/lib/Sema/SemaDecl.cpp @@ -12752,7 +12752,8 @@ namespace { } if (OpaqueValueExpr *OVE = dyn_cast<OpaqueValueExpr>(E)) { - HandleValue(OVE->getSourceExpr()); + if (Expr *SE = OVE->getSourceExpr()) + HandleValue(SE); return; } diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp index 6413a48f809a..4efcb3590355 100644 --- a/clang/lib/Sema/SemaExpr.cpp +++ b/clang/lib/Sema/SemaExpr.cpp @@ -11142,7 +11142,8 @@ QualType Sema::CheckVectorOperands(ExprResult &LHS, ExprResult &RHS, if (VecType->getVectorKind() == VectorKind::SveFixedLengthData || VecType->getVectorKind() == VectorKind::SveFixedLengthPredicate) return true; - if (VecType->getVectorKind() == VectorKind::RVVFixedLengthData) { + if (VecType->getVectorKind() == VectorKind::RVVFixedLengthData || + VecType->getVectorKind() == VectorKind::RVVFixedLengthMask) { SVEorRVV = 1; return true; } @@ -11173,7 +11174,8 @@ QualType Sema::CheckVectorOperands(ExprResult &LHS, ExprResult &RHS, SecondVecType->getVectorKind() == VectorKind::SveFixedLengthPredicate) return true; - if (SecondVecType->getVectorKind() == VectorKind::RVVFixedLengthData) { + if (SecondVecType->getVectorKind() == VectorKind::RVVFixedLengthData || + SecondVecType->getVectorKind() == VectorKind::RVVFixedLengthMask) { SVEorRVV = 1; return true; } diff --git a/clang/lib/Sema/SemaInit.cpp b/clang/lib/Sema/SemaInit.cpp index 91e4cb7b68a2..457fa377355a 100644 --- a/clang/lib/Sema/SemaInit.cpp +++ b/clang/lib/Sema/SemaInit.cpp @@ -4200,7 +4200,7 @@ static OverloadingResult ResolveConstructorOverload( /// \param IsListInit Is this list-initialization? /// \param IsInitListCopy Is this non-list-initialization resulting from a /// list-initialization from {x} where x is the same -/// aggregate type as the entity? +/// type as the entity? static void TryConstructorInitialization(Sema &S, const InitializedEntity &Entity, const InitializationKind &Kind, @@ -4230,14 +4230,6 @@ static void TryConstructorInitialization(Sema &S, Entity.getKind() != InitializedEntity::EK_LambdaToBlockConversionBlockElement); - bool CopyElisionPossible = false; - auto ElideConstructor = [&] { - // Convert qualifications if necessary. - Sequence.AddQualificationConversionStep(DestType, VK_PRValue); - if (ILE) - Sequence.RewrapReferenceInitList(DestType, ILE); - }; - // C++17 [dcl.init]p17: // - If the initializer expression is a prvalue and the cv-unqualified // version of the source type is the same class as the class of the @@ -4250,17 +4242,11 @@ static void TryConstructorInitialization(Sema &S, if (S.getLangOpts().CPlusPlus17 && !RequireActualConstructor && UnwrappedArgs.size() == 1 && UnwrappedArgs[0]->isPRValue() && S.Context.hasSameUnqualifiedType(UnwrappedArgs[0]->getType(), DestType)) { - if (ILE && !DestType->isAggregateType()) { - // CWG2311: T{ prvalue_of_type_T } is not eligible for copy elision - // Make this an elision if this won't call an initializer-list - // constructor. (Always on an aggregate type or check constructors first.) - assert(!IsInitListCopy && - "IsInitListCopy only possible with aggregate types"); - CopyElisionPossible = true; - } else { - ElideConstructor(); - return; - } + // Convert qualifications if necessary. + Sequence.AddQualificationConversionStep(DestType, VK_PRValue); + if (ILE) + Sequence.RewrapReferenceInitList(DestType, ILE); + return; } const RecordType *DestRecordType = DestType->getAs<RecordType>(); @@ -4305,12 +4291,6 @@ static void TryConstructorInitialization(Sema &S, S, Kind.getLocation(), Args, CandidateSet, DestType, Ctors, Best, CopyInitialization, AllowExplicit, /*OnlyListConstructors=*/true, IsListInit, RequireActualConstructor); - - if (CopyElisionPossible && Result == OR_No_Viable_Function) { - // No initializer list candidate - ElideConstructor(); - return; - } } // C++11 [over.match.list]p1: @@ -4592,9 +4572,9 @@ static void TryListInitialization(Sema &S, return; } - // C++11 [dcl.init.list]p3, per DR1467 and DR2137: - // - If T is an aggregate class and the initializer list has a single element - // of type cv U, where U is T or a class derived from T, the object is + // C++11 [dcl.init.list]p3, per DR1467: + // - If T is a class type and the initializer list has a single element of + // type cv U, where U is T or a class derived from T, the object is // initialized from that element (by copy-initialization for // copy-list-initialization, or by direct-initialization for // direct-list-initialization). @@ -4605,7 +4585,7 @@ static void TryListInitialization(Sema &S, // - Otherwise, if T is an aggregate, [...] (continue below). if (S.getLangOpts().CPlusPlus11 && InitList->getNumInits() == 1 && !IsDesignatedInit) { - if (DestType->isRecordType() && DestType->isAggregateType()) { + if (DestType->isRecordType()) { QualType InitType = InitList->getInit(0)->getType(); if (S.Context.hasSameUnqualifiedType(InitType, DestType) || S.IsDerivedFrom(InitList->getBeginLoc(), InitType, DestType)) { diff --git a/clang/lib/Sema/SemaOverload.cpp b/clang/lib/Sema/SemaOverload.cpp index 030878899b81..940bcccb9e26 100644 --- a/clang/lib/Sema/SemaOverload.cpp +++ b/clang/lib/Sema/SemaOverload.cpp @@ -1568,37 +1568,19 @@ TryUserDefinedConversion(Sema &S, Expr *From, QualType ToType, // called for those cases. if (CXXConstructorDecl *Constructor = dyn_cast<CXXConstructorDecl>(ICS.UserDefined.ConversionFunction)) { - QualType FromType; - SourceLocation FromLoc; - // C++11 [over.ics.list]p6, per DR2137: - // C++17 [over.ics.list]p6: - // If C is not an initializer-list constructor and the initializer list - // has a single element of type cv U, where U is X or a class derived - // from X, the implicit conversion sequence has Exact Match rank if U is - // X, or Conversion rank if U is derived from X. - if (const auto *InitList = dyn_cast<InitListExpr>(From); - InitList && InitList->getNumInits() == 1 && - !S.isInitListConstructor(Constructor)) { - const Expr *SingleInit = InitList->getInit(0); - FromType = SingleInit->getType(); - FromLoc = SingleInit->getBeginLoc(); - } else { - FromType = From->getType(); - FromLoc = From->getBeginLoc(); - } - QualType FromCanon = - S.Context.getCanonicalType(FromType.getUnqualifiedType()); + QualType FromCanon + = S.Context.getCanonicalType(From->getType().getUnqualifiedType()); QualType ToCanon = S.Context.getCanonicalType(ToType).getUnqualifiedType(); if (Constructor->isCopyConstructor() && (FromCanon == ToCanon || - S.IsDerivedFrom(FromLoc, FromCanon, ToCanon))) { + S.IsDerivedFrom(From->getBeginLoc(), FromCanon, ToCanon))) { // Turn this into a "standard" conversion sequence, so that it // gets ranked with standard conversion sequences. DeclAccessPair Found = ICS.UserDefined.FoundConversionFunction; ICS.setStandard(); ICS.Standard.setAsIdentityConversion(); - ICS.Standard.setFromType(FromType); + ICS.Standard.setFromType(From->getType()); ICS.Standard.setAllToTypes(ToType); ICS.Standard.CopyConstructor = Constructor; ICS.Standard.FoundCopyConstructor = Found; @@ -5324,18 +5306,18 @@ TryListConversion(Sema &S, InitListExpr *From, QualType ToType, IsDesignatedInit) return Result; - // Per DR1467 and DR2137: - // If the parameter type is an aggregate class X and the initializer list - // has a single element of type cv U, where U is X or a class derived from - // X, the implicit conversion sequence is the one required to convert the - // element to the parameter type. + // Per DR1467: + // If the parameter type is a class X and the initializer list has a single + // element of type cv U, where U is X or a class derived from X, the + // implicit conversion sequence is the one required to convert the element + // to the parameter type. // // Otherwise, if the parameter type is a character array [... ] // and the initializer list has a single element that is an // appropriately-typed string literal (8.5.2 [dcl.init.string]), the // implicit conversion sequence is the identity conversion. if (From->getNumInits() == 1 && !IsDesignatedInit) { - if (ToType->isRecordType() && ToType->isAggregateType()) { + if (ToType->isRecordType()) { QualType InitType = From->getInit(0)->getType(); if (S.Context.hasSameUnqualifiedType(InitType, ToType) || S.IsDerivedFrom(From->getBeginLoc(), InitType, ToType)) @@ -5682,10 +5664,15 @@ static ImplicitConversionSequence TryObjectArgumentInitialization( assert(FromType->isRecordType()); QualType ClassType = S.Context.getTypeDeclType(ActingContext); - // [class.dtor]p2: A destructor can be invoked for a const, volatile or - // const volatile object. + // C++98 [class.dtor]p2: + // A destructor can be invoked for a const, volatile or const volatile + // object. + // C++98 [over.match.funcs]p4: + // For static member functions, the implicit object parameter is considered + // to match any object (since if the function is selected, the object is + // discarded). Qualifiers Quals = Method->getMethodQualifiers(); - if (isa<CXXDestructorDecl>(Method)) { + if (isa<CXXDestructorDecl>(Method) || Method->isStatic()) { Quals.addConst(); Quals.addVolatile(); } @@ -15079,7 +15066,7 @@ ExprResult Sema::CreateOverloadedArraySubscriptExpr(SourceLocation LLoc, CXXMethodDecl *Method = cast<CXXMethodDecl>(FnDecl); SmallVector<Expr *, 2> MethodArgs; - // Handle 'this' parameter if the selected function is not static. + // Initialize the object parameter. if (Method->isExplicitObjectMemberFunction()) { ExprResult Res = InitializeExplicitObjectArgument(*this, Args[0], Method); @@ -15087,7 +15074,7 @@ ExprResult Sema::CreateOverloadedArraySubscriptExpr(SourceLocation LLoc, return ExprError(); Args[0] = Res.get(); ArgExpr = Args; - } else if (Method->isInstance()) { + } else { ExprResult Arg0 = PerformImplicitObjectArgumentInitialization( Args[0], /*Qualifier=*/nullptr, Best->FoundDecl, Method); if (Arg0.isInvalid()) @@ -15115,15 +15102,9 @@ ExprResult Sema::CreateOverloadedArraySubscriptExpr(SourceLocation LLoc, ExprValueKind VK = Expr::getValueKindForType(ResultTy); ResultTy = ResultTy.getNonLValueExprType(Context); - CallExpr *TheCall; - if (Method->isInstance()) - TheCall = CXXOperatorCallExpr::Create( - Context, OO_Subscript, FnExpr.get(), MethodArgs, ResultTy, VK, - RLoc, CurFPFeatureOverrides()); - else - TheCall = - CallExpr::Create(Context, FnExpr.get(), MethodArgs, ResultTy, VK, - RLoc, CurFPFeatureOverrides()); + CallExpr *TheCall = CXXOperatorCallExpr::Create( + Context, OO_Subscript, FnExpr.get(), MethodArgs, ResultTy, VK, RLoc, + CurFPFeatureOverrides()); if (CheckCallReturnType(FnDecl->getReturnType(), LLoc, TheCall, FnDecl)) return ExprError(); @@ -15751,15 +15732,13 @@ Sema::BuildCallToObjectOfClassType(Scope *S, Expr *Obj, bool IsError = false; - // Initialize the implicit object parameter if needed. - // Since C++23, this could also be a call to a static call operator - // which we emit as a regular CallExpr. + // Initialize the object parameter. llvm::SmallVector<Expr *, 8> NewArgs; if (Method->isExplicitObjectMemberFunction()) { // FIXME: we should do that during the definition of the lambda when we can. DiagnoseInvalidExplicitObjectParameterInLambda(Method); PrepareExplicitObjectArgument(*this, Method, Obj, Args, NewArgs); - } else if (Method->isInstance()) { + } else { ExprResult ObjRes = PerformImplicitObjectArgumentInitialization( Object.get(), /*Qualifier=*/nullptr, Best->FoundDecl, Method); if (ObjRes.isInvalid()) @@ -15793,14 +15772,9 @@ Sema::BuildCallToObjectOfClassType(Scope *S, Expr *Obj, ExprValueKind VK = Expr::getValueKindForType(ResultTy); ResultTy = ResultTy.getNonLValueExprType(Context); - CallExpr *TheCall; - if (Method->isInstance()) - TheCall = CXXOperatorCallExpr::Create(Context, OO_Call, NewFn.get(), - MethodArgs, ResultTy, VK, RParenLoc, - CurFPFeatureOverrides()); - else - TheCall = CallExpr::Create(Context, NewFn.get(), MethodArgs, ResultTy, VK, - RParenLoc, CurFPFeatureOverrides()); + CallExpr *TheCall = CXXOperatorCallExpr::Create( + Context, OO_Call, NewFn.get(), MethodArgs, ResultTy, VK, RParenLoc, + CurFPFeatureOverrides()); if (CheckCallReturnType(Method->getReturnType(), LParenLoc, TheCall, Method)) return true; diff --git a/clang/lib/Sema/SemaTemplate.cpp b/clang/lib/Sema/SemaTemplate.cpp index 9bfa71dc8bcf..a381d876a54c 100644 --- a/clang/lib/Sema/SemaTemplate.cpp +++ b/clang/lib/Sema/SemaTemplate.cpp @@ -7412,9 +7412,9 @@ ExprResult Sema::CheckTemplateArgument(NonTypeTemplateParmDecl *Param, if (ArgResult.isInvalid()) return ExprError(); - // Prior to C++20, enforce restrictions on possible template argument - // values. - if (!getLangOpts().CPlusPlus20 && Value.isLValue()) { + if (Value.isLValue()) { + APValue::LValueBase Base = Value.getLValueBase(); + auto *VD = const_cast<ValueDecl *>(Base.dyn_cast<const ValueDecl *>()); // For a non-type template-parameter of pointer or reference type, // the value of the constant expression shall not refer to assert(ParamType->isPointerType() || ParamType->isReferenceType() || @@ -7423,8 +7423,6 @@ ExprResult Sema::CheckTemplateArgument(NonTypeTemplateParmDecl *Param, // -- a string literal // -- the result of a typeid expression, or // -- a predefined __func__ variable - APValue::LValueBase Base = Value.getLValueBase(); - auto *VD = const_cast<ValueDecl *>(Base.dyn_cast<const ValueDecl *>()); if (Base && (!VD || isa<LifetimeExtendedTemporaryDecl, UnnamedGlobalConstantDecl>(VD))) { @@ -7432,24 +7430,30 @@ ExprResult Sema::CheckTemplateArgument(NonTypeTemplateParmDecl *Param, << Arg->getSourceRange(); return ExprError(); } - // -- a subobject [until C++20] - if (Value.hasLValuePath() && Value.getLValuePath().size() == 1 && - VD && VD->getType()->isArrayType() && + + if (Value.hasLValuePath() && Value.getLValuePath().size() == 1 && VD && + VD->getType()->isArrayType() && Value.getLValuePath()[0].getAsArrayIndex() == 0 && !Value.isLValueOnePastTheEnd() && ParamType->isPointerType()) { - // Per defect report (no number yet): - // ... other than a pointer to the first element of a complete array - // object. - } else if (!Value.hasLValuePath() || Value.getLValuePath().size() || - Value.isLValueOnePastTheEnd()) { - Diag(StartLoc, diag::err_non_type_template_arg_subobject) - << Value.getAsString(Context, ParamType); - return ExprError(); + SugaredConverted = TemplateArgument(VD, ParamType); + CanonicalConverted = TemplateArgument( + cast<ValueDecl>(VD->getCanonicalDecl()), CanonParamType); + return ArgResult.get(); + } + + // -- a subobject [until C++20] + if (!getLangOpts().CPlusPlus20) { + if (!Value.hasLValuePath() || Value.getLValuePath().size() || + Value.isLValueOnePastTheEnd()) { + Diag(StartLoc, diag::err_non_type_template_arg_subobject) + << Value.getAsString(Context, ParamType); + return ExprError(); + } + assert((VD || !ParamType->isReferenceType()) && + "null reference should not be a constant expression"); + assert((!VD || !ParamType->isNullPtrType()) && + "non-null value of type nullptr_t?"); } - assert((VD || !ParamType->isReferenceType()) && - "null reference should not be a constant expression"); - assert((!VD || !ParamType->isNullPtrType()) && - "non-null value of type nullptr_t?"); } if (Value.isAddrLabelDiff()) diff --git a/clang/lib/Sema/SemaType.cpp b/clang/lib/Sema/SemaType.cpp index 9cb6c0a4ef24..92086d7277fd 100644 --- a/clang/lib/Sema/SemaType.cpp +++ b/clang/lib/Sema/SemaType.cpp @@ -8646,21 +8646,30 @@ static void HandleRISCVRVVVectorBitsTypeAttr(QualType &CurType, ASTContext::BuiltinVectorTypeInfo Info = S.Context.getBuiltinVectorTypeInfo(CurType->castAs<BuiltinType>()); - unsigned EltSize = S.Context.getTypeSize(Info.ElementType); unsigned MinElts = Info.EC.getKnownMinValue(); + VectorKind VecKind = VectorKind::RVVFixedLengthData; + unsigned ExpectedSize = VScale->first * MinElts; + QualType EltType = CurType->getRVVEltType(S.Context); + unsigned EltSize = S.Context.getTypeSize(EltType); + unsigned NumElts; + if (Info.ElementType == S.Context.BoolTy) { + NumElts = VecSize / S.Context.getCharWidth(); + VecKind = VectorKind::RVVFixedLengthMask; + } else { + ExpectedSize *= EltSize; + NumElts = VecSize / EltSize; + } + // The attribute vector size must match -mrvv-vector-bits. - unsigned ExpectedSize = VScale->first * MinElts * EltSize; - if (VecSize != ExpectedSize) { + if (ExpectedSize % 8 != 0 || VecSize != ExpectedSize) { S.Diag(Attr.getLoc(), diag::err_attribute_bad_rvv_vector_size) << VecSize << ExpectedSize; Attr.setInvalid(); return; } - VectorKind VecKind = VectorKind::RVVFixedLengthData; - VecSize /= EltSize; - CurType = S.Context.getVectorType(Info.ElementType, VecSize, VecKind); + CurType = S.Context.getVectorType(EltType, NumElts, VecKind); } /// Handle OpenCL Access Qualifier Attribute. diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp index fecd94e875f6..028610deb300 100644 --- a/clang/lib/Serialization/ASTReader.cpp +++ b/clang/lib/Serialization/ASTReader.cpp @@ -9743,6 +9743,9 @@ void ASTReader::finishPendingActions() { if (!FD->isLateTemplateParsed() && !NonConstDefn->isLateTemplateParsed() && + // We only perform ODR checks for decls not in the explicit + // global module fragment. + !shouldSkipCheckingODR(FD) && FD->getODRHash() != NonConstDefn->getODRHash()) { if (!isa<CXXMethodDecl>(FD)) { PendingFunctionOdrMergeFailures[FD].push_back(NonConstDefn); diff --git a/clang/lib/Serialization/ASTReaderDecl.cpp b/clang/lib/Serialization/ASTReaderDecl.cpp index a149d8215303..1fadd8039462 100644 --- a/clang/lib/Serialization/ASTReaderDecl.cpp +++ b/clang/lib/Serialization/ASTReaderDecl.cpp @@ -804,8 +804,10 @@ void ASTDeclReader::VisitEnumDecl(EnumDecl *ED) { ED->setScopedUsingClassTag(EnumDeclBits.getNextBit()); ED->setFixed(EnumDeclBits.getNextBit()); - ED->setHasODRHash(true); - ED->ODRHash = Record.readInt(); + if (!shouldSkipCheckingODR(ED)) { + ED->setHasODRHash(true); + ED->ODRHash = Record.readInt(); + } // If this is a definition subject to the ODR, and we already have a // definition, merge this one into it. @@ -827,7 +829,10 @@ void ASTDeclReader::VisitEnumDecl(EnumDecl *ED) { Reader.MergedDeclContexts.insert(std::make_pair(ED, OldDef)); ED->demoteThisDefinitionToDeclaration(); Reader.mergeDefinitionVisibility(OldDef, ED); - if (OldDef->getODRHash() != ED->getODRHash()) + // We don't want to check the ODR hash value for declarations from global + // module fragment. + if (!shouldSkipCheckingODR(ED) && + OldDef->getODRHash() != ED->getODRHash()) Reader.PendingEnumOdrMergeFailures[OldDef].push_back(ED); } else { OldDef = ED; @@ -866,6 +871,9 @@ ASTDeclReader::VisitRecordDeclImpl(RecordDecl *RD) { void ASTDeclReader::VisitRecordDecl(RecordDecl *RD) { VisitRecordDeclImpl(RD); + // We should only reach here if we're in C/Objective-C. There is no + // global module fragment. + assert(!shouldSkipCheckingODR(RD)); RD->setODRHash(Record.readInt()); // Maintain the invariant of a redeclaration chain containing only @@ -1094,8 +1102,10 @@ void ASTDeclReader::VisitFunctionDecl(FunctionDecl *FD) { if (FD->isExplicitlyDefaulted()) FD->setDefaultLoc(readSourceLocation()); - FD->ODRHash = Record.readInt(); - FD->setHasODRHash(true); + if (!shouldSkipCheckingODR(FD)) { + FD->ODRHash = Record.readInt(); + FD->setHasODRHash(true); + } if (FD->isDefaulted()) { if (unsigned NumLookups = Record.readInt()) { @@ -1971,9 +1981,12 @@ void ASTDeclReader::ReadCXXDefinitionData( #include "clang/AST/CXXRecordDeclDefinitionBits.def" #undef FIELD - // Note: the caller has deserialized the IsLambda bit already. - Data.ODRHash = Record.readInt(); - Data.HasODRHash = true; + // We only perform ODR checks for decls not in GMF. + if (!shouldSkipCheckingODR(D)) { + // Note: the caller has deserialized the IsLambda bit already. + Data.ODRHash = Record.readInt(); + Data.HasODRHash = true; + } if (Record.readInt()) { Reader.DefinitionSource[D] = @@ -2134,6 +2147,10 @@ void ASTDeclReader::MergeDefinitionData( } } + // We don't want to check ODR for decls in the global module fragment. + if (shouldSkipCheckingODR(MergeDD.Definition)) + return; + if (D->getODRHash() != MergeDD.ODRHash) { DetectedOdrViolation = true; } @@ -3498,11 +3515,14 @@ ASTDeclReader::FindExistingResult ASTDeclReader::findExisting(NamedDecl *D) { // If this declaration is from a merged context, make a note that we need to // check that the canonical definition of that context contains the decl. // + // Note that we don't perform ODR checks for decls from the global module + // fragment. + // // FIXME: We should do something similar if we merge two definitions of the // same template specialization into the same CXXRecordDecl. auto MergedDCIt = Reader.MergedDeclContexts.find(D->getLexicalDeclContext()); if (MergedDCIt != Reader.MergedDeclContexts.end() && - MergedDCIt->second == D->getDeclContext()) + !shouldSkipCheckingODR(D) && MergedDCIt->second == D->getDeclContext()) Reader.PendingOdrMergeChecks.push_back(D); return FindExistingResult(Reader, D, /*Existing=*/nullptr, diff --git a/clang/lib/Serialization/ASTWriter.cpp b/clang/lib/Serialization/ASTWriter.cpp index 03bddfe0f504..3b79a9238d1a 100644 --- a/clang/lib/Serialization/ASTWriter.cpp +++ b/clang/lib/Serialization/ASTWriter.cpp @@ -6022,8 +6022,12 @@ void ASTRecordWriter::AddCXXDefinitionData(const CXXRecordDecl *D) { Record->push_back(DefinitionBits); - // getODRHash will compute the ODRHash if it has not been previously computed. - Record->push_back(D->getODRHash()); + // We only perform ODR checks for decls not in GMF. + if (!shouldSkipCheckingODR(D)) { + // getODRHash will compute the ODRHash if it has not been previously + // computed. + Record->push_back(D->getODRHash()); + } bool ModulesDebugInfo = Writer->Context->getLangOpts().ModulesDebugInfo && !D->isDependentType(); diff --git a/clang/lib/Serialization/ASTWriterDecl.cpp b/clang/lib/Serialization/ASTWriterDecl.cpp index bb1f51786d28..f224075643e9 100644 --- a/clang/lib/Serialization/ASTWriterDecl.cpp +++ b/clang/lib/Serialization/ASTWriterDecl.cpp @@ -493,7 +493,9 @@ void ASTDeclWriter::VisitEnumDecl(EnumDecl *D) { EnumDeclBits.addBit(D->isFixed()); Record.push_back(EnumDeclBits); - Record.push_back(D->getODRHash()); + // We only perform ODR checks for decls not in GMF. + if (!shouldSkipCheckingODR(D)) + Record.push_back(D->getODRHash()); if (MemberSpecializationInfo *MemberInfo = D->getMemberSpecializationInfo()) { Record.AddDeclRef(MemberInfo->getInstantiatedFrom()); @@ -510,7 +512,7 @@ void ASTDeclWriter::VisitEnumDecl(EnumDecl *D) { !D->isTopLevelDeclInObjCContainer() && !CXXRecordDecl::classofKind(D->getKind()) && !D->getIntegerTypeSourceInfo() && !D->getMemberSpecializationInfo() && - !needsAnonymousDeclarationNumber(D) && + !needsAnonymousDeclarationNumber(D) && !shouldSkipCheckingODR(D) && D->getDeclName().getNameKind() == DeclarationName::Identifier) AbbrevToUse = Writer.getDeclEnumAbbrev(); @@ -701,7 +703,9 @@ void ASTDeclWriter::VisitFunctionDecl(FunctionDecl *D) { if (D->isExplicitlyDefaulted()) Record.AddSourceLocation(D->getDefaultLoc()); - Record.push_back(D->getODRHash()); + // We only perform ODR checks for decls not in GMF. + if (!shouldSkipCheckingODR(D)) + Record.push_back(D->getODRHash()); if (D->isDefaulted()) { if (auto *FDI = D->getDefaultedFunctionInfo()) { @@ -1506,7 +1510,8 @@ void ASTDeclWriter::VisitCXXMethodDecl(CXXMethodDecl *D) { D->getFirstDecl() == D->getMostRecentDecl() && !D->isInvalidDecl() && !D->hasAttrs() && !D->isTopLevelDeclInObjCContainer() && D->getDeclName().getNameKind() == DeclarationName::Identifier && - !D->hasExtInfo() && !D->isExplicitlyDefaulted()) { + !shouldSkipCheckingODR(D) && !D->hasExtInfo() && + !D->isExplicitlyDefaulted()) { if (D->getTemplatedKind() == FunctionDecl::TK_NonTemplate || D->getTemplatedKind() == FunctionDecl::TK_FunctionTemplate || D->getTemplatedKind() == FunctionDecl::TK_MemberSpecialization || diff --git a/clang/lib/StaticAnalyzer/Core/Environment.cpp b/clang/lib/StaticAnalyzer/Core/Environment.cpp index 4f989ed59bee..427f51109853 100644 --- a/clang/lib/StaticAnalyzer/Core/Environment.cpp +++ b/clang/lib/StaticAnalyzer/Core/Environment.cpp @@ -40,8 +40,11 @@ static const Expr *ignoreTransparentExprs(const Expr *E) { switch (E->getStmtClass()) { case Stmt::OpaqueValueExprClass: - E = cast<OpaqueValueExpr>(E)->getSourceExpr(); - break; + if (const Expr *SE = cast<OpaqueValueExpr>(E)->getSourceExpr()) { + E = SE; + break; + } + return E; case Stmt::ExprWithCleanupsClass: E = cast<ExprWithCleanups>(E)->getSubExpr(); break; @@ -98,7 +101,6 @@ SVal Environment::getSVal(const EnvironmentEntry &Entry, case Stmt::CXXBindTemporaryExprClass: case Stmt::ExprWithCleanupsClass: case Stmt::GenericSelectionExprClass: - case Stmt::OpaqueValueExprClass: case Stmt::ConstantExprClass: case Stmt::ParenExprClass: case Stmt::SubstNonTypeTemplateParmExprClass: diff --git a/compiler-rt/lib/builtins/i386/chkstk.S b/compiler-rt/lib/builtins/i386/chkstk.S index a84bb0ee3007..cdd9a4c2a575 100644 --- a/compiler-rt/lib/builtins/i386/chkstk.S +++ b/compiler-rt/lib/builtins/i386/chkstk.S @@ -14,7 +14,6 @@ .text .balign 4 DEFINE_COMPILERRT_FUNCTION(_alloca) // _chkstk and _alloca are the same function -DEFINE_COMPILERRT_FUNCTION(_chkstk) push %ecx cmp $0x1000,%eax lea 8(%esp),%ecx // esp before calling this routine -> ecx @@ -35,7 +34,6 @@ DEFINE_COMPILERRT_FUNCTION(_chkstk) push (%eax) // push return address onto the stack sub %esp,%eax // restore the original value in eax ret -END_COMPILERRT_FUNCTION(_chkstk) END_COMPILERRT_FUNCTION(_alloca) #endif // __i386__ diff --git a/compiler-rt/lib/builtins/x86_64/chkstk.S b/compiler-rt/lib/builtins/x86_64/chkstk.S index 494ee261193b..ad7953a116ac 100644 --- a/compiler-rt/lib/builtins/x86_64/chkstk.S +++ b/compiler-rt/lib/builtins/x86_64/chkstk.S @@ -18,7 +18,6 @@ .text .balign 4 DEFINE_COMPILERRT_FUNCTION(___chkstk_ms) -DEFINE_COMPILERRT_FUNCTION(__chkstk) push %rcx push %rax cmp $0x1000,%rax @@ -36,7 +35,6 @@ DEFINE_COMPILERRT_FUNCTION(__chkstk) pop %rax pop %rcx ret -END_COMPILERRT_FUNCTION(__chkstk) END_COMPILERRT_FUNCTION(___chkstk_ms) #endif // __x86_64__ diff --git a/compiler-rt/lib/profile/InstrProfilingPlatformWindows.c b/compiler-rt/lib/profile/InstrProfilingPlatformWindows.c index c976776ae59e..0751b28f81d0 100644 --- a/compiler-rt/lib/profile/InstrProfilingPlatformWindows.c +++ b/compiler-rt/lib/profile/InstrProfilingPlatformWindows.c @@ -77,7 +77,7 @@ ValueProfNode *EndVNode = &VNodesEnd; /* lld-link provides __buildid symbol which ponits to the 16 bytes build id when * using /build-id flag. https://lld.llvm.org/windows_support.html#lld-flags */ #define BUILD_ID_LEN 16 -COMPILER_RT_WEAK extern uint8_t __buildid[BUILD_ID_LEN]; +COMPILER_RT_WEAK uint8_t __buildid[BUILD_ID_LEN]; COMPILER_RT_VISIBILITY int __llvm_write_binary_ids(ProfDataWriter *Writer) { if (*__buildid) { if (Writer && diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_report.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_report.cpp index 8438e019591b..f6b157c07c65 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_report.cpp +++ b/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_report.cpp @@ -34,8 +34,10 @@ static bool FrameIsInternal(const SymbolizedStack *frame) { return true; const char *file = frame->info.file; const char *module = frame->info.module; + // On Gentoo, the path is g++-*, so there's *not* a missing /. if (file && (internal_strstr(file, "/compiler-rt/lib/") || - internal_strstr(file, "/include/c++/"))) + internal_strstr(file, "/include/c++/") || + internal_strstr(file, "/include/g++"))) return true; if (module && (internal_strstr(module, "libclang_rt."))) return true; diff --git a/libcxx/include/__algorithm/copy_move_common.h b/libcxx/include/__algorithm/copy_move_common.h index b350507e32ba..0fc7a5e3cee7 100644 --- a/libcxx/include/__algorithm/copy_move_common.h +++ b/libcxx/include/__algorithm/copy_move_common.h @@ -31,6 +31,9 @@ # pragma GCC system_header #endif +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + _LIBCPP_BEGIN_NAMESPACE_STD // Type traits. @@ -132,4 +135,6 @@ __dispatch_copy_or_move(_InIter __first, _Sent __last, _OutIter __out_first) { _LIBCPP_END_NAMESPACE_STD +_LIBCPP_POP_MACROS + #endif // _LIBCPP___ALGORITHM_COPY_MOVE_COMMON_H diff --git a/libcxx/include/__algorithm/equal.h b/libcxx/include/__algorithm/equal.h index f03f010aa51a..3c0e3060e39a 100644 --- a/libcxx/include/__algorithm/equal.h +++ b/libcxx/include/__algorithm/equal.h @@ -30,6 +30,9 @@ # pragma GCC system_header #endif +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + _LIBCPP_BEGIN_NAMESPACE_STD template <class _InputIterator1, class _InputIterator2, class _BinaryPredicate> @@ -162,4 +165,6 @@ equal(_InputIterator1 __first1, _InputIterator1 __last1, _InputIterator2 __first _LIBCPP_END_NAMESPACE_STD +_LIBCPP_POP_MACROS + #endif // _LIBCPP___ALGORITHM_EQUAL_H diff --git a/libcxx/include/__algorithm/equal_range.h b/libcxx/include/__algorithm/equal_range.h index 7ce54965fff0..a94290431971 100644 --- a/libcxx/include/__algorithm/equal_range.h +++ b/libcxx/include/__algorithm/equal_range.h @@ -31,6 +31,9 @@ # pragma GCC system_header #endif +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + _LIBCPP_BEGIN_NAMESPACE_STD template <class _AlgPolicy, class _Compare, class _Iter, class _Sent, class _Tp, class _Proj> @@ -77,4 +80,6 @@ equal_range(_ForwardIterator __first, _ForwardIterator __last, const _Tp& __valu _LIBCPP_END_NAMESPACE_STD +_LIBCPP_POP_MACROS + #endif // _LIBCPP___ALGORITHM_EQUAL_RANGE_H diff --git a/libcxx/include/__algorithm/fold.h b/libcxx/include/__algorithm/fold.h index 88e6814d5cf9..1a9d76b50d83 100644 --- a/libcxx/include/__algorithm/fold.h +++ b/libcxx/include/__algorithm/fold.h @@ -32,6 +32,9 @@ # pragma GCC system_header #endif +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + _LIBCPP_BEGIN_NAMESPACE_STD #if _LIBCPP_STD_VER >= 23 @@ -122,4 +125,6 @@ inline constexpr auto fold_left = __fold_left(); _LIBCPP_END_NAMESPACE_STD +_LIBCPP_POP_MACROS + #endif // _LIBCPP___ALGORITHM_FOLD_H diff --git a/libcxx/include/__algorithm/in_found_result.h b/libcxx/include/__algorithm/in_found_result.h index 88a0255d1698..a67ae387974c 100644 --- a/libcxx/include/__algorithm/in_found_result.h +++ b/libcxx/include/__algorithm/in_found_result.h @@ -18,6 +18,9 @@ # pragma GCC system_header #endif +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + #if _LIBCPP_STD_VER >= 20 _LIBCPP_BEGIN_NAMESPACE_STD @@ -46,4 +49,6 @@ _LIBCPP_END_NAMESPACE_STD #endif // _LIBCPP_STD_VER >= 20 +_LIBCPP_POP_MACROS + #endif // _LIBCPP___ALGORITHM_IN_FOUND_RESULT_H diff --git a/libcxx/include/__algorithm/in_fun_result.h b/libcxx/include/__algorithm/in_fun_result.h index 6110c1cf86cd..a22069a9a8dd 100644 --- a/libcxx/include/__algorithm/in_fun_result.h +++ b/libcxx/include/__algorithm/in_fun_result.h @@ -18,6 +18,9 @@ # pragma GCC system_header #endif +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + _LIBCPP_BEGIN_NAMESPACE_STD #if _LIBCPP_STD_VER >= 20 @@ -46,4 +49,6 @@ struct in_fun_result { _LIBCPP_END_NAMESPACE_STD +_LIBCPP_POP_MACROS + #endif // _LIBCPP___ALGORITHM_IN_FUN_RESULT_H diff --git a/libcxx/include/__algorithm/in_in_out_result.h b/libcxx/include/__algorithm/in_in_out_result.h index 95ce4f4fd5bd..ba0380b5c681 100644 --- a/libcxx/include/__algorithm/in_in_out_result.h +++ b/libcxx/include/__algorithm/in_in_out_result.h @@ -18,6 +18,9 @@ # pragma GCC system_header #endif +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + _LIBCPP_BEGIN_NAMESPACE_STD #if _LIBCPP_STD_VER >= 20 @@ -51,4 +54,6 @@ struct in_in_out_result { _LIBCPP_END_NAMESPACE_STD +_LIBCPP_POP_MACROS + #endif // _LIBCPP___ALGORITHM_IN_IN_OUT_RESULT_H diff --git a/libcxx/include/__algorithm/in_in_result.h b/libcxx/include/__algorithm/in_in_result.h index d1d62dae7f67..994573fc70fd 100644 --- a/libcxx/include/__algorithm/in_in_result.h +++ b/libcxx/include/__algorithm/in_in_result.h @@ -18,6 +18,9 @@ # pragma GCC system_header #endif +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + _LIBCPP_BEGIN_NAMESPACE_STD #if _LIBCPP_STD_VER >= 20 @@ -48,4 +51,6 @@ struct in_in_result { _LIBCPP_END_NAMESPACE_STD +_LIBCPP_POP_MACROS + #endif // _LIBCPP___ALGORITHM_IN_IN_RESULT_H diff --git a/libcxx/include/__algorithm/in_out_out_result.h b/libcxx/include/__algorithm/in_out_out_result.h index 143642368750..8ceb452841a4 100644 --- a/libcxx/include/__algorithm/in_out_out_result.h +++ b/libcxx/include/__algorithm/in_out_out_result.h @@ -18,6 +18,9 @@ # pragma GCC system_header #endif +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + _LIBCPP_BEGIN_NAMESPACE_STD #if _LIBCPP_STD_VER >= 20 @@ -49,4 +52,6 @@ struct in_out_out_result { _LIBCPP_END_NAMESPACE_STD +_LIBCPP_POP_MACROS + #endif // _LIBCPP___ALGORITHM_IN_OUT_OUT_RESULT_H diff --git a/libcxx/include/__algorithm/includes.h b/libcxx/include/__algorithm/includes.h index 531752e93175..05d45365eb80 100644 --- a/libcxx/include/__algorithm/includes.h +++ b/libcxx/include/__algorithm/includes.h @@ -22,6 +22,9 @@ # pragma GCC system_header #endif +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + _LIBCPP_BEGIN_NAMESPACE_STD template <class _Iter1, class _Sent1, class _Iter2, class _Sent2, class _Comp, class _Proj1, class _Proj2> @@ -71,4 +74,6 @@ includes(_InputIterator1 __first1, _InputIterator1 __last1, _InputIterator2 __fi _LIBCPP_END_NAMESPACE_STD +_LIBCPP_POP_MACROS + #endif // _LIBCPP___ALGORITHM_INCLUDES_H diff --git a/libcxx/include/__algorithm/next_permutation.h b/libcxx/include/__algorithm/next_permutation.h index d66ea9b97345..011ee028cc2f 100644 --- a/libcxx/include/__algorithm/next_permutation.h +++ b/libcxx/include/__algorithm/next_permutation.h @@ -22,6 +22,9 @@ # pragma GCC system_header #endif +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + _LIBCPP_BEGIN_NAMESPACE_STD template <class _AlgPolicy, class _Compare, class _BidirectionalIterator, class _Sentinel> @@ -67,4 +70,6 @@ next_permutation(_BidirectionalIterator __first, _BidirectionalIterator __last) _LIBCPP_END_NAMESPACE_STD +_LIBCPP_POP_MACROS + #endif // _LIBCPP___ALGORITHM_NEXT_PERMUTATION_H diff --git a/libcxx/include/__algorithm/nth_element.h b/libcxx/include/__algorithm/nth_element.h index 37ddfbdacf04..da748d7255ab 100644 --- a/libcxx/include/__algorithm/nth_element.h +++ b/libcxx/include/__algorithm/nth_element.h @@ -23,6 +23,9 @@ # pragma GCC system_header #endif +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + _LIBCPP_BEGIN_NAMESPACE_STD template <class _Compare, class _RandomAccessIterator> @@ -253,4 +256,6 @@ nth_element(_RandomAccessIterator __first, _RandomAccessIterator __nth, _RandomA _LIBCPP_END_NAMESPACE_STD +_LIBCPP_POP_MACROS + #endif // _LIBCPP___ALGORITHM_NTH_ELEMENT_H diff --git a/libcxx/include/__algorithm/partial_sort.h b/libcxx/include/__algorithm/partial_sort.h index 27511a124229..85a8fdc77aa2 100644 --- a/libcxx/include/__algorithm/partial_sort.h +++ b/libcxx/include/__algorithm/partial_sort.h @@ -26,6 +26,9 @@ # pragma GCC system_header #endif +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + _LIBCPP_BEGIN_NAMESPACE_STD template <class _AlgPolicy, class _Compare, class _RandomAccessIterator, class _Sentinel> @@ -83,4 +86,6 @@ partial_sort(_RandomAccessIterator __first, _RandomAccessIterator __middle, _Ran _LIBCPP_END_NAMESPACE_STD +_LIBCPP_POP_MACROS + #endif // _LIBCPP___ALGORITHM_PARTIAL_SORT_H diff --git a/libcxx/include/__algorithm/partial_sort_copy.h b/libcxx/include/__algorithm/partial_sort_copy.h index e7d8df4de89f..ef7c9d34d949 100644 --- a/libcxx/include/__algorithm/partial_sort_copy.h +++ b/libcxx/include/__algorithm/partial_sort_copy.h @@ -28,6 +28,9 @@ # pragma GCC system_header #endif +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + _LIBCPP_BEGIN_NAMESPACE_STD template <class _AlgPolicy, @@ -98,4 +101,6 @@ inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _RandomAccessIterator _LIBCPP_END_NAMESPACE_STD +_LIBCPP_POP_MACROS + #endif // _LIBCPP___ALGORITHM_PARTIAL_SORT_COPY_H diff --git a/libcxx/include/__algorithm/partition.h b/libcxx/include/__algorithm/partition.h index e2ceb07bf195..824e49b9ec21 100644 --- a/libcxx/include/__algorithm/partition.h +++ b/libcxx/include/__algorithm/partition.h @@ -19,6 +19,9 @@ # pragma GCC system_header #endif +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + _LIBCPP_BEGIN_NAMESPACE_STD template <class _Predicate, class _AlgPolicy, class _ForwardIterator, class _Sentinel> @@ -82,4 +85,6 @@ partition(_ForwardIterator __first, _ForwardIterator __last, _Predicate __pred) _LIBCPP_END_NAMESPACE_STD +_LIBCPP_POP_MACROS + #endif // _LIBCPP___ALGORITHM_PARTITION_H diff --git a/libcxx/include/__algorithm/prev_permutation.h b/libcxx/include/__algorithm/prev_permutation.h index 3e4bbb3fbb16..8d15b6806401 100644 --- a/libcxx/include/__algorithm/prev_permutation.h +++ b/libcxx/include/__algorithm/prev_permutation.h @@ -22,6 +22,9 @@ # pragma GCC system_header #endif +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + _LIBCPP_BEGIN_NAMESPACE_STD template <class _AlgPolicy, class _Compare, class _BidirectionalIterator, class _Sentinel> @@ -67,4 +70,6 @@ prev_permutation(_BidirectionalIterator __first, _BidirectionalIterator __last) _LIBCPP_END_NAMESPACE_STD +_LIBCPP_POP_MACROS + #endif // _LIBCPP___ALGORITHM_PREV_PERMUTATION_H diff --git a/libcxx/include/__algorithm/pstl_any_all_none_of.h b/libcxx/include/__algorithm/pstl_any_all_none_of.h index d93fdba2224c..4b1e0e61b542 100644 --- a/libcxx/include/__algorithm/pstl_any_all_none_of.h +++ b/libcxx/include/__algorithm/pstl_any_all_none_of.h @@ -23,6 +23,9 @@ # pragma GCC system_header #endif +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + #if !defined(_LIBCPP_HAS_NO_INCOMPLETE_PSTL) && _LIBCPP_STD_VER >= 17 _LIBCPP_BEGIN_NAMESPACE_STD @@ -144,4 +147,6 @@ _LIBCPP_END_NAMESPACE_STD #endif // !defined(_LIBCPP_HAS_NO_INCOMPLETE_PSTL) && _LIBCPP_STD_VER >= 17 +_LIBCPP_POP_MACROS + #endif // _LIBCPP___ALGORITHM_PSTL_ANY_ALL_NONE_OF_H diff --git a/libcxx/include/__algorithm/pstl_backends/cpu_backends/transform_reduce.h b/libcxx/include/__algorithm/pstl_backends/cpu_backends/transform_reduce.h index ab2e3172b8b6..14a0d76741d4 100644 --- a/libcxx/include/__algorithm/pstl_backends/cpu_backends/transform_reduce.h +++ b/libcxx/include/__algorithm/pstl_backends/cpu_backends/transform_reduce.h @@ -25,6 +25,9 @@ # pragma GCC system_header #endif +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + #if !defined(_LIBCPP_HAS_NO_INCOMPLETE_PSTL) && _LIBCPP_STD_VER >= 17 _LIBCPP_BEGIN_NAMESPACE_STD @@ -194,4 +197,6 @@ _LIBCPP_END_NAMESPACE_STD #endif // !defined(_LIBCPP_HAS_NO_INCOMPLETE_PSTL) && _LIBCPP_STD_VER >= 17 +_LIBCPP_POP_MACROS + #endif // _LIBCPP___ALGORITHM_PSTL_BACKENDS_CPU_BACKENDS_TRANSFORM_REDUCE_H diff --git a/libcxx/include/__algorithm/pstl_copy.h b/libcxx/include/__algorithm/pstl_copy.h index 19f275a0d5d9..1069dcec0e11 100644 --- a/libcxx/include/__algorithm/pstl_copy.h +++ b/libcxx/include/__algorithm/pstl_copy.h @@ -28,6 +28,9 @@ # pragma GCC system_header #endif +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + #if !defined(_LIBCPP_HAS_NO_INCOMPLETE_PSTL) && _LIBCPP_STD_VER >= 17 _LIBCPP_BEGIN_NAMESPACE_STD @@ -113,4 +116,6 @@ _LIBCPP_END_NAMESPACE_STD #endif // !defined(_LIBCPP_HAS_NO_INCOMPLETE_PSTL) && _LIBCPP_STD_VER >= 17 +_LIBCPP_POP_MACROS + #endif // _LIBCPP___ALGORITHM_PSTL_COPY_H diff --git a/libcxx/include/__algorithm/pstl_count.h b/libcxx/include/__algorithm/pstl_count.h index 28806fca0637..2781f6bfd3c9 100644 --- a/libcxx/include/__algorithm/pstl_count.h +++ b/libcxx/include/__algorithm/pstl_count.h @@ -29,6 +29,9 @@ # pragma GCC system_header #endif +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + #if !defined(_LIBCPP_HAS_NO_INCOMPLETE_PSTL) && _LIBCPP_STD_VER >= 17 _LIBCPP_BEGIN_NAMESPACE_STD @@ -113,4 +116,6 @@ _LIBCPP_END_NAMESPACE_STD #endif // !defined(_LIBCPP_HAS_NO_INCOMPLETE_PSTL) && _LIBCPP_STD_VER >= 17 +_LIBCPP_POP_MACROS + #endif // _LIBCPP___ALGORITHM_PSTL_COUNT_H diff --git a/libcxx/include/__algorithm/pstl_equal.h b/libcxx/include/__algorithm/pstl_equal.h index b343d2675980..d235c0f4f419 100644 --- a/libcxx/include/__algorithm/pstl_equal.h +++ b/libcxx/include/__algorithm/pstl_equal.h @@ -21,6 +21,9 @@ # pragma GCC system_header #endif +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + #if !defined(_LIBCPP_HAS_NO_INCOMPLETE_PSTL) && _LIBCPP_STD_VER >= 17 _LIBCPP_BEGIN_NAMESPACE_STD @@ -167,4 +170,6 @@ _LIBCPP_END_NAMESPACE_STD #endif // !defined(_LIBCPP_HAS_NO_INCOMPLETE_PSTL) && _LIBCPP_STD_VER >= 17 +_LIBCPP_POP_MACROS + #endif // _LIBCPP___ALGORITHM_PSTL_EQUAL_H diff --git a/libcxx/include/__algorithm/pstl_fill.h b/libcxx/include/__algorithm/pstl_fill.h index 3057dcc04f1a..488b49a0feec 100644 --- a/libcxx/include/__algorithm/pstl_fill.h +++ b/libcxx/include/__algorithm/pstl_fill.h @@ -26,6 +26,9 @@ # pragma GCC system_header #endif +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + #if !defined(_LIBCPP_HAS_NO_INCOMPLETE_PSTL) && _LIBCPP_STD_VER >= 17 _LIBCPP_BEGIN_NAMESPACE_STD @@ -108,4 +111,6 @@ _LIBCPP_END_NAMESPACE_STD #endif // !defined(_LIBCPP_HAS_NO_INCOMPLETE_PSTL) && _LIBCPP_STD_VER >= 17 +_LIBCPP_POP_MACROS + #endif // _LIBCPP___ALGORITHM_PSTL_FILL_H diff --git a/libcxx/include/__algorithm/pstl_find.h b/libcxx/include/__algorithm/pstl_find.h index adc05ea1a9e5..5b694db68aea 100644 --- a/libcxx/include/__algorithm/pstl_find.h +++ b/libcxx/include/__algorithm/pstl_find.h @@ -25,6 +25,9 @@ # pragma GCC system_header #endif +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + #if !defined(_LIBCPP_HAS_NO_INCOMPLETE_PSTL) && _LIBCPP_STD_VER >= 17 _LIBCPP_BEGIN_NAMESPACE_STD @@ -133,4 +136,6 @@ _LIBCPP_END_NAMESPACE_STD #endif // !defined(_LIBCPP_HAS_NO_INCOMPLETE_PSTL) && _LIBCPP_STD_VER >= 17 +_LIBCPP_POP_MACROS + #endif // _LIBCPP___ALGORITHM_PSTL_FIND_H diff --git a/libcxx/include/__algorithm/pstl_for_each.h b/libcxx/include/__algorithm/pstl_for_each.h index 819a43d685ab..bb7b5a61a6dc 100644 --- a/libcxx/include/__algorithm/pstl_for_each.h +++ b/libcxx/include/__algorithm/pstl_for_each.h @@ -28,6 +28,9 @@ # pragma GCC system_header #endif +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + #if !defined(_LIBCPP_HAS_NO_INCOMPLETE_PSTL) && _LIBCPP_STD_VER >= 17 _LIBCPP_BEGIN_NAMESPACE_STD @@ -100,4 +103,6 @@ _LIBCPP_END_NAMESPACE_STD #endif // !defined(_LIBCPP_HAS_NO_INCOMPLETE_PSTL) && _LIBCPP_STD_VER >= 17 +_LIBCPP_POP_MACROS + #endif // _LIBCPP___ALGORITHM_PSTL_FOR_EACH_H diff --git a/libcxx/include/__algorithm/pstl_generate.h b/libcxx/include/__algorithm/pstl_generate.h index 56538392d5b5..7133c6f4f4c6 100644 --- a/libcxx/include/__algorithm/pstl_generate.h +++ b/libcxx/include/__algorithm/pstl_generate.h @@ -25,6 +25,9 @@ # pragma GCC system_header #endif +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + #if !defined(_LIBCPP_HAS_NO_INCOMPLETE_PSTL) && _LIBCPP_STD_VER >= 17 _LIBCPP_BEGIN_NAMESPACE_STD @@ -106,4 +109,6 @@ _LIBCPP_END_NAMESPACE_STD #endif // !defined(_LIBCPP_HAS_NO_INCOMPLETE_PSTL) && _LIBCPP_STD_VER >= 17 +_LIBCPP_POP_MACROS + #endif // _LIBCPP___ALGORITHM_PSTL_GENERATE_H diff --git a/libcxx/include/__algorithm/pstl_is_partitioned.h b/libcxx/include/__algorithm/pstl_is_partitioned.h index 39cf6369339d..b65430212207 100644 --- a/libcxx/include/__algorithm/pstl_is_partitioned.h +++ b/libcxx/include/__algorithm/pstl_is_partitioned.h @@ -24,6 +24,9 @@ # pragma GCC system_header #endif +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + #if !defined(_LIBCPP_HAS_NO_INCOMPLETE_PSTL) && _LIBCPP_STD_VER >= 17 _LIBCPP_BEGIN_NAMESPACE_STD @@ -69,4 +72,6 @@ _LIBCPP_END_NAMESPACE_STD #endif // !defined(_LIBCPP_HAS_NO_INCOMPLETE_PSTL) && _LIBCPP_STD_VER >= 17 +_LIBCPP_POP_MACROS + #endif // _LIBCPP___ALGORITHM_PSTL_IS_PARITTIONED diff --git a/libcxx/include/__algorithm/pstl_merge.h b/libcxx/include/__algorithm/pstl_merge.h index ed8014510863..3d262db6bc0c 100644 --- a/libcxx/include/__algorithm/pstl_merge.h +++ b/libcxx/include/__algorithm/pstl_merge.h @@ -22,6 +22,9 @@ # pragma GCC system_header #endif +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + #if !defined(_LIBCPP_HAS_NO_INCOMPLETE_PSTL) && _LIBCPP_STD_VER >= 17 _LIBCPP_BEGIN_NAMESPACE_STD @@ -84,4 +87,6 @@ _LIBCPP_END_NAMESPACE_STD #endif // !defined(_LIBCPP_HAS_NO_INCOMPLETE_PSTL) && _LIBCPP_STD_VER >= 17 +_LIBCPP_POP_MACROS + #endif // _LIBCPP___ALGORITHM_PSTL_MERGE_H diff --git a/libcxx/include/__algorithm/pstl_move.h b/libcxx/include/__algorithm/pstl_move.h index 52baab57591e..d8441f1a6c2e 100644 --- a/libcxx/include/__algorithm/pstl_move.h +++ b/libcxx/include/__algorithm/pstl_move.h @@ -27,6 +27,9 @@ # pragma GCC system_header #endif +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + #if !defined(_LIBCPP_HAS_NO_INCOMPLETE_PSTL) && _LIBCPP_STD_VER >= 17 _LIBCPP_BEGIN_NAMESPACE_STD @@ -76,4 +79,6 @@ _LIBCPP_END_NAMESPACE_STD #endif // !defined(_LIBCPP_HAS_NO_INCOMPLETE_PSTL) && _LIBCPP_STD_VER >= 17 +_LIBCPP_POP_MACROS + #endif // _LIBCPP___ALGORITHM_PSTL_MOVE_H diff --git a/libcxx/include/__algorithm/pstl_replace.h b/libcxx/include/__algorithm/pstl_replace.h index 05dee3f6a4f3..b1caf3fd4ac0 100644 --- a/libcxx/include/__algorithm/pstl_replace.h +++ b/libcxx/include/__algorithm/pstl_replace.h @@ -24,6 +24,9 @@ # pragma GCC system_header #endif +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + #if !defined(_LIBCPP_HAS_NO_INCOMPLETE_PSTL) && _LIBCPP_STD_VER >= 17 _LIBCPP_BEGIN_NAMESPACE_STD @@ -239,4 +242,6 @@ _LIBCPP_END_NAMESPACE_STD #endif // !defined(_LIBCPP_HAS_NO_INCOMPLETE_PSTL) && _LIBCPP_STD_VER >= 17 +_LIBCPP_POP_MACROS + #endif // _LIBCPP___ALGORITHM_PSTL_REPLACE_H diff --git a/libcxx/include/__algorithm/pstl_rotate_copy.h b/libcxx/include/__algorithm/pstl_rotate_copy.h index 33dc9a3635f7..346aab1d4a55 100644 --- a/libcxx/include/__algorithm/pstl_rotate_copy.h +++ b/libcxx/include/__algorithm/pstl_rotate_copy.h @@ -19,6 +19,9 @@ # pragma GCC system_header #endif +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + #if !defined(_LIBCPP_HAS_NO_INCOMPLETE_PSTL) && _LIBCPP_STD_VER >= 17 _LIBCPP_BEGIN_NAMESPACE_STD @@ -77,4 +80,6 @@ _LIBCPP_END_NAMESPACE_STD #endif // !defined(_LIBCPP_HAS_NO_INCOMPLETE_PSTL) && _LIBCPP_STD_VER >= 17 +_LIBCPP_POP_MACROS + #endif // _LIBCPP___ALGORITHM_PSTL_ROTATE_COPY_H diff --git a/libcxx/include/__algorithm/pstl_sort.h b/libcxx/include/__algorithm/pstl_sort.h index 3e71e0aa5ae0..a931f768111a 100644 --- a/libcxx/include/__algorithm/pstl_sort.h +++ b/libcxx/include/__algorithm/pstl_sort.h @@ -25,6 +25,9 @@ # pragma GCC system_header #endif +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + #if !defined(_LIBCPP_HAS_NO_INCOMPLETE_PSTL) && _LIBCPP_STD_VER >= 17 _LIBCPP_BEGIN_NAMESPACE_STD @@ -74,4 +77,6 @@ _LIBCPP_END_NAMESPACE_STD #endif // !defined(_LIBCPP_HAS_NO_INCOMPLETE_PSTL) && _LIBCPP_STD_VER >= 17 +_LIBCPP_POP_MACROS + #endif // _LIBCPP___ALGORITHM_PSTL_SORT_H diff --git a/libcxx/include/__algorithm/pstl_stable_sort.h b/libcxx/include/__algorithm/pstl_stable_sort.h index c9d375535fc4..8ea0bb3f9a8d 100644 --- a/libcxx/include/__algorithm/pstl_stable_sort.h +++ b/libcxx/include/__algorithm/pstl_stable_sort.h @@ -23,6 +23,9 @@ # pragma GCC system_header #endif +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + #if !defined(_LIBCPP_HAS_NO_INCOMPLETE_PSTL) && _LIBCPP_STD_VER >= 17 _LIBCPP_BEGIN_NAMESPACE_STD @@ -53,4 +56,6 @@ _LIBCPP_END_NAMESPACE_STD #endif // !defined(_LIBCPP_HAS_NO_INCOMPLETE_PSTL) && _LIBCPP_STD_VER >= 17 +_LIBCPP_POP_MACROS + #endif // _LIBCPP___ALGORITHM_PSTL_STABLE_SORT_H diff --git a/libcxx/include/__algorithm/pstl_transform.h b/libcxx/include/__algorithm/pstl_transform.h index aad59d1f30e6..f95938782fc3 100644 --- a/libcxx/include/__algorithm/pstl_transform.h +++ b/libcxx/include/__algorithm/pstl_transform.h @@ -22,6 +22,9 @@ # pragma GCC system_header #endif +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + #if !defined(_LIBCPP_HAS_NO_INCOMPLETE_PSTL) && _LIBCPP_STD_VER >= 17 _LIBCPP_BEGIN_NAMESPACE_STD @@ -112,4 +115,6 @@ _LIBCPP_END_NAMESPACE_STD #endif // !defined(_LIBCPP_HAS_NO_INCOMPLETE_PSTL) && _LIBCPP_STD_VER >= 17 +_LIBCPP_POP_MACROS + #endif // _LIBCPP___ALGORITHM_PSTL_TRANSFORM_H diff --git a/libcxx/include/__algorithm/ranges_all_of.h b/libcxx/include/__algorithm/ranges_all_of.h index 39a2ae4de01e..8976541d590c 100644 --- a/libcxx/include/__algorithm/ranges_all_of.h +++ b/libcxx/include/__algorithm/ranges_all_of.h @@ -22,6 +22,9 @@ # pragma GCC system_header #endif +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + #if _LIBCPP_STD_VER >= 20 _LIBCPP_BEGIN_NAMESPACE_STD @@ -66,4 +69,6 @@ _LIBCPP_END_NAMESPACE_STD #endif // _LIBCPP_STD_VER >= 20 +_LIBCPP_POP_MACROS + #endif // _LIBCPP___ALGORITHM_RANGES_ALL_OF_H diff --git a/libcxx/include/__algorithm/ranges_any_of.h b/libcxx/include/__algorithm/ranges_any_of.h index 2ca8531102ea..7c775f5f64de 100644 --- a/libcxx/include/__algorithm/ranges_any_of.h +++ b/libcxx/include/__algorithm/ranges_any_of.h @@ -22,6 +22,9 @@ # pragma GCC system_header #endif +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + #if _LIBCPP_STD_VER >= 20 _LIBCPP_BEGIN_NAMESPACE_STD @@ -66,4 +69,6 @@ _LIBCPP_END_NAMESPACE_STD #endif // _LIBCPP_STD_VER >= 20 +_LIBCPP_POP_MACROS + #endif // _LIBCPP___ALGORITHM_RANGES_ANY_OF_H diff --git a/libcxx/include/__algorithm/ranges_binary_search.h b/libcxx/include/__algorithm/ranges_binary_search.h index 22008e0f1bc8..f3b7842d5ccc 100644 --- a/libcxx/include/__algorithm/ranges_binary_search.h +++ b/libcxx/include/__algorithm/ranges_binary_search.h @@ -24,6 +24,9 @@ # pragma GCC system_header #endif +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + #if _LIBCPP_STD_VER >= 20 _LIBCPP_BEGIN_NAMESPACE_STD @@ -65,4 +68,6 @@ _LIBCPP_END_NAMESPACE_STD #endif // _LIBCPP_STD_VER >= 20 +_LIBCPP_POP_MACROS + #endif // _LIBCPP___ALGORITHM_RANGES_BINARY_SEARCH_H diff --git a/libcxx/include/__algorithm/ranges_clamp.h b/libcxx/include/__algorithm/ranges_clamp.h index a1185e7278f0..f5ef5fd7f26e 100644 --- a/libcxx/include/__algorithm/ranges_clamp.h +++ b/libcxx/include/__algorithm/ranges_clamp.h @@ -22,6 +22,9 @@ # pragma GCC system_header #endif +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + #if _LIBCPP_STD_VER >= 20 _LIBCPP_BEGIN_NAMESPACE_STD @@ -58,4 +61,6 @@ _LIBCPP_END_NAMESPACE_STD #endif // _LIBCPP_STD_VER >= 20 +_LIBCPP_POP_MACROS + #endif // _LIBCPP___ALGORITHM_RANGES_CLAMP_H diff --git a/libcxx/include/__algorithm/ranges_contains.h b/libcxx/include/__algorithm/ranges_contains.h index f92fcec587d8..00d0e5401988 100644 --- a/libcxx/include/__algorithm/ranges_contains.h +++ b/libcxx/include/__algorithm/ranges_contains.h @@ -25,6 +25,9 @@ # pragma GCC system_header #endif +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + #if _LIBCPP_STD_VER >= 23 _LIBCPP_BEGIN_NAMESPACE_STD @@ -58,4 +61,6 @@ _LIBCPP_END_NAMESPACE_STD #endif // _LIBCPP_STD_VER >= 23 +_LIBCPP_POP_MACROS + #endif // _LIBCPP___ALGORITHM_RANGES_CONTAINS_H diff --git a/libcxx/include/__algorithm/ranges_copy.h b/libcxx/include/__algorithm/ranges_copy.h index 1c87f074e7ca..e1d6d32f05f7 100644 --- a/libcxx/include/__algorithm/ranges_copy.h +++ b/libcxx/include/__algorithm/ranges_copy.h @@ -25,6 +25,9 @@ # pragma GCC system_header #endif +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + #if _LIBCPP_STD_VER >= 20 _LIBCPP_BEGIN_NAMESPACE_STD @@ -63,4 +66,6 @@ _LIBCPP_END_NAMESPACE_STD #endif // _LIBCPP_STD_VER >= 20 +_LIBCPP_POP_MACROS + #endif // _LIBCPP___ALGORITHM_RANGES_COPY_H diff --git a/libcxx/include/__algorithm/ranges_copy_backward.h b/libcxx/include/__algorithm/ranges_copy_backward.h index 865e944d4384..93e326042503 100644 --- a/libcxx/include/__algorithm/ranges_copy_backward.h +++ b/libcxx/include/__algorithm/ranges_copy_backward.h @@ -23,6 +23,9 @@ # pragma GCC system_header #endif +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + #if _LIBCPP_STD_VER >= 20 _LIBCPP_BEGIN_NAMESPACE_STD @@ -61,4 +64,6 @@ _LIBCPP_END_NAMESPACE_STD #endif // _LIBCPP_STD_VER >= 20 +_LIBCPP_POP_MACROS + #endif // _LIBCPP___ALGORITHM_RANGES_COPY_BACKWARD_H diff --git a/libcxx/include/__algorithm/ranges_copy_if.h b/libcxx/include/__algorithm/ranges_copy_if.h index b77dbd37fcee..4b41d2154e7f 100644 --- a/libcxx/include/__algorithm/ranges_copy_if.h +++ b/libcxx/include/__algorithm/ranges_copy_if.h @@ -24,6 +24,9 @@ # pragma GCC system_header #endif +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + #if _LIBCPP_STD_VER >= 20 _LIBCPP_BEGIN_NAMESPACE_STD @@ -79,4 +82,6 @@ _LIBCPP_END_NAMESPACE_STD #endif // _LIBCPP_STD_VER >= 20 +_LIBCPP_POP_MACROS + #endif // _LIBCPP___ALGORITHM_RANGES_COPY_IF_H diff --git a/libcxx/include/__algorithm/ranges_copy_n.h b/libcxx/include/__algorithm/ranges_copy_n.h index 99e8eee14d0f..4353fa99278c 100644 --- a/libcxx/include/__algorithm/ranges_copy_n.h +++ b/libcxx/include/__algorithm/ranges_copy_n.h @@ -25,6 +25,9 @@ # pragma GCC system_header #endif +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + _LIBCPP_BEGIN_NAMESPACE_STD #if _LIBCPP_STD_VER >= 20 @@ -73,4 +76,6 @@ inline constexpr auto copy_n = __copy_n::__fn{}; _LIBCPP_END_NAMESPACE_STD +_LIBCPP_POP_MACROS + #endif // _LIBCPP___ALGORITHM_RANGES_COPY_N_H diff --git a/libcxx/include/__algorithm/ranges_count.h b/libcxx/include/__algorithm/ranges_count.h index 4c8f1b2cbea7..a8965c1b961f 100644 --- a/libcxx/include/__algorithm/ranges_count.h +++ b/libcxx/include/__algorithm/ranges_count.h @@ -26,6 +26,9 @@ # pragma GCC system_header #endif +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + #if _LIBCPP_STD_VER >= 20 _LIBCPP_BEGIN_NAMESPACE_STD @@ -58,4 +61,6 @@ _LIBCPP_END_NAMESPACE_STD #endif // _LIBCPP_STD_VER >= 20 +_LIBCPP_POP_MACROS + #endif // _LIBCPP___ALGORITHM_RANGES_COUNT_H diff --git a/libcxx/include/__algorithm/ranges_count_if.h b/libcxx/include/__algorithm/ranges_count_if.h index 92f37d049e0c..71b942dd5322 100644 --- a/libcxx/include/__algorithm/ranges_count_if.h +++ b/libcxx/include/__algorithm/ranges_count_if.h @@ -25,6 +25,9 @@ # pragma GCC system_header #endif +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + #if _LIBCPP_STD_VER >= 20 _LIBCPP_BEGIN_NAMESPACE_STD @@ -71,4 +74,6 @@ _LIBCPP_END_NAMESPACE_STD #endif // _LIBCPP_STD_VER >= 20 +_LIBCPP_POP_MACROS + #endif // _LIBCPP___ALGORITHM_RANGES_COUNT_IF_H diff --git a/libcxx/include/__algorithm/ranges_ends_with.h b/libcxx/include/__algorithm/ranges_ends_with.h index 2afb74bff0f1..c2a3cae9f3b1 100644 --- a/libcxx/include/__algorithm/ranges_ends_with.h +++ b/libcxx/include/__algorithm/ranges_ends_with.h @@ -28,6 +28,9 @@ # pragma GCC system_header #endif +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + #if _LIBCPP_STD_VER >= 23 _LIBCPP_BEGIN_NAMESPACE_STD @@ -193,4 +196,6 @@ _LIBCPP_END_NAMESPACE_STD #endif // _LIBCPP_STD_VER >= 23 +_LIBCPP_POP_MACROS + #endif // _LIBCPP___ALGORITHM_RANGES_ENDS_WITH_H diff --git a/libcxx/include/__algorithm/ranges_equal.h b/libcxx/include/__algorithm/ranges_equal.h index 4cb1f7df1952..31c7ee261da6 100644 --- a/libcxx/include/__algorithm/ranges_equal.h +++ b/libcxx/include/__algorithm/ranges_equal.h @@ -26,6 +26,9 @@ # pragma GCC system_header #endif +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + #if _LIBCPP_STD_VER >= 20 _LIBCPP_BEGIN_NAMESPACE_STD @@ -101,4 +104,6 @@ _LIBCPP_END_NAMESPACE_STD #endif // _LIBCPP_STD_VER >= 20 +_LIBCPP_POP_MACROS + #endif // _LIBCPP___ALGORITHM_RANGES_EQUAL_H diff --git a/libcxx/include/__algorithm/ranges_equal_range.h b/libcxx/include/__algorithm/ranges_equal_range.h index 1ff8856ca03f..4c1c3834ba9f 100644 --- a/libcxx/include/__algorithm/ranges_equal_range.h +++ b/libcxx/include/__algorithm/ranges_equal_range.h @@ -30,6 +30,9 @@ # pragma GCC system_header #endif +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + #if _LIBCPP_STD_VER >= 20 _LIBCPP_BEGIN_NAMESPACE_STD @@ -72,4 +75,6 @@ _LIBCPP_END_NAMESPACE_STD #endif // _LIBCPP_STD_VER >= 20 +_LIBCPP_POP_MACROS + #endif // _LIBCPP___ALGORITHM_RANGES_EQUAL_RANGE_H diff --git a/libcxx/include/__algorithm/ranges_fill.h b/libcxx/include/__algorithm/ranges_fill.h index 88a892f5c278..7a177d85e9f0 100644 --- a/libcxx/include/__algorithm/ranges_fill.h +++ b/libcxx/include/__algorithm/ranges_fill.h @@ -20,6 +20,9 @@ # pragma GCC system_header #endif +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + #if _LIBCPP_STD_VER >= 20 _LIBCPP_BEGIN_NAMESPACE_STD @@ -54,4 +57,6 @@ _LIBCPP_END_NAMESPACE_STD #endif // _LIBCPP_STD_VER >= 20 +_LIBCPP_POP_MACROS + #endif // _LIBCPP___ALGORITHM_RANGES_FILL_H diff --git a/libcxx/include/__algorithm/ranges_fill_n.h b/libcxx/include/__algorithm/ranges_fill_n.h index dbd8ec27aef9..a6e988c0089c 100644 --- a/libcxx/include/__algorithm/ranges_fill_n.h +++ b/libcxx/include/__algorithm/ranges_fill_n.h @@ -17,6 +17,9 @@ # pragma GCC system_header #endif +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + #if _LIBCPP_STD_VER >= 20 _LIBCPP_BEGIN_NAMESPACE_STD @@ -45,4 +48,6 @@ _LIBCPP_END_NAMESPACE_STD #endif // _LIBCPP_STD_VER >= 20 +_LIBCPP_POP_MACROS + #endif // _LIBCPP___ALGORITHM_RANGES_FILL_N_H diff --git a/libcxx/include/__algorithm/ranges_find.h b/libcxx/include/__algorithm/ranges_find.h index de870e381184..7459fad717a5 100644 --- a/libcxx/include/__algorithm/ranges_find.h +++ b/libcxx/include/__algorithm/ranges_find.h @@ -28,6 +28,9 @@ # pragma GCC system_header #endif +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + #if _LIBCPP_STD_VER >= 20 _LIBCPP_BEGIN_NAMESPACE_STD @@ -72,4 +75,6 @@ _LIBCPP_END_NAMESPACE_STD #endif // _LIBCPP_STD_VER >= 20 +_LIBCPP_POP_MACROS + #endif // _LIBCPP___ALGORITHM_RANGES_FIND_H diff --git a/libcxx/include/__algorithm/ranges_find_end.h b/libcxx/include/__algorithm/ranges_find_end.h index 2c57ad424bfd..0bda4f3e1cea 100644 --- a/libcxx/include/__algorithm/ranges_find_end.h +++ b/libcxx/include/__algorithm/ranges_find_end.h @@ -27,6 +27,9 @@ # pragma GCC system_header #endif +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + #if _LIBCPP_STD_VER >= 20 _LIBCPP_BEGIN_NAMESPACE_STD @@ -95,4 +98,6 @@ _LIBCPP_END_NAMESPACE_STD #endif // _LIBCPP_STD_VER >= 20 +_LIBCPP_POP_MACROS + #endif // _LIBCPP___ALGORITHM_RANGES_FIND_END_H diff --git a/libcxx/include/__algorithm/ranges_find_first_of.h b/libcxx/include/__algorithm/ranges_find_first_of.h index ec6d52c63250..63a7b8335faa 100644 --- a/libcxx/include/__algorithm/ranges_find_first_of.h +++ b/libcxx/include/__algorithm/ranges_find_first_of.h @@ -24,6 +24,9 @@ # pragma GCC system_header #endif +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + #if _LIBCPP_STD_VER >= 20 _LIBCPP_BEGIN_NAMESPACE_STD @@ -98,4 +101,6 @@ _LIBCPP_END_NAMESPACE_STD #endif // _LIBCPP_STD_VER >= 20 +_LIBCPP_POP_MACROS + #endif // _LIBCPP___ALGORITHM_RANGES_FIND_FIRST_OF_H diff --git a/libcxx/include/__algorithm/ranges_find_if.h b/libcxx/include/__algorithm/ranges_find_if.h index af54a5007ee2..52ae55ce96c3 100644 --- a/libcxx/include/__algorithm/ranges_find_if.h +++ b/libcxx/include/__algorithm/ranges_find_if.h @@ -24,6 +24,9 @@ # pragma GCC system_header #endif +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + #if _LIBCPP_STD_VER >= 20 _LIBCPP_BEGIN_NAMESPACE_STD @@ -67,4 +70,6 @@ _LIBCPP_END_NAMESPACE_STD #endif // _LIBCPP_STD_VER >= 20 +_LIBCPP_POP_MACROS + #endif // _LIBCPP___ALGORITHM_RANGES_FIND_IF_H diff --git a/libcxx/include/__algorithm/ranges_find_if_not.h b/libcxx/include/__algorithm/ranges_find_if_not.h index a18bea43165e..60c6796cbbfc 100644 --- a/libcxx/include/__algorithm/ranges_find_if_not.h +++ b/libcxx/include/__algorithm/ranges_find_if_not.h @@ -26,6 +26,9 @@ # pragma GCC system_header #endif +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + #if _LIBCPP_STD_VER >= 20 _LIBCPP_BEGIN_NAMESPACE_STD @@ -61,4 +64,6 @@ _LIBCPP_END_NAMESPACE_STD #endif // _LIBCPP_STD_VER >= 20 +_LIBCPP_POP_MACROS + #endif // _LIBCPP___ALGORITHM_RANGES_FIND_IF_NOT_H diff --git a/libcxx/include/__algorithm/ranges_for_each.h b/libcxx/include/__algorithm/ranges_for_each.h index 7878ed26709f..225dc774c876 100644 --- a/libcxx/include/__algorithm/ranges_for_each.h +++ b/libcxx/include/__algorithm/ranges_for_each.h @@ -24,6 +24,9 @@ # pragma GCC system_header #endif +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + #if _LIBCPP_STD_VER >= 20 _LIBCPP_BEGIN_NAMESPACE_STD @@ -73,4 +76,6 @@ _LIBCPP_END_NAMESPACE_STD #endif // _LIBCPP_STD_VER >= 20 +_LIBCPP_POP_MACROS + #endif // _LIBCPP___ALGORITHM_RANGES_FOR_EACH_H diff --git a/libcxx/include/__algorithm/ranges_for_each_n.h b/libcxx/include/__algorithm/ranges_for_each_n.h index 53ccb9a6035a..d1fdca34cc5a 100644 --- a/libcxx/include/__algorithm/ranges_for_each_n.h +++ b/libcxx/include/__algorithm/ranges_for_each_n.h @@ -24,6 +24,9 @@ # pragma GCC system_header #endif +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + #if _LIBCPP_STD_VER >= 20 _LIBCPP_BEGIN_NAMESPACE_STD @@ -56,4 +59,6 @@ _LIBCPP_END_NAMESPACE_STD #endif // _LIBCPP_STD_VER >= 20 +_LIBCPP_POP_MACROS + #endif // _LIBCPP___ALGORITHM_RANGES_FOR_EACH_N_H diff --git a/libcxx/include/__algorithm/ranges_generate.h b/libcxx/include/__algorithm/ranges_generate.h index 3ff1e13c4220..e6467198e6ba 100644 --- a/libcxx/include/__algorithm/ranges_generate.h +++ b/libcxx/include/__algorithm/ranges_generate.h @@ -24,6 +24,9 @@ # pragma GCC system_header #endif +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + #if _LIBCPP_STD_VER >= 20 _LIBCPP_BEGIN_NAMESPACE_STD @@ -65,4 +68,6 @@ _LIBCPP_END_NAMESPACE_STD #endif // _LIBCPP_STD_VER >= 20 +_LIBCPP_POP_MACROS + #endif // _LIBCPP___ALGORITHM_RANGES_GENERATE_H diff --git a/libcxx/include/__algorithm/ranges_generate_n.h b/libcxx/include/__algorithm/ranges_generate_n.h index c025c621a191..cd5fd7483ab2 100644 --- a/libcxx/include/__algorithm/ranges_generate_n.h +++ b/libcxx/include/__algorithm/ranges_generate_n.h @@ -25,6 +25,9 @@ # pragma GCC system_header #endif +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + #if _LIBCPP_STD_VER >= 20 _LIBCPP_BEGIN_NAMESPACE_STD @@ -57,4 +60,6 @@ _LIBCPP_END_NAMESPACE_STD #endif // _LIBCPP_STD_VER >= 20 +_LIBCPP_POP_MACROS + #endif // _LIBCPP___ALGORITHM_RANGES_GENERATE_N_H diff --git a/libcxx/include/__algorithm/ranges_includes.h b/libcxx/include/__algorithm/ranges_includes.h index aa35080c8cfd..0bc4c043bd18 100644 --- a/libcxx/include/__algorithm/ranges_includes.h +++ b/libcxx/include/__algorithm/ranges_includes.h @@ -27,6 +27,9 @@ # pragma GCC system_header #endif +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + #if _LIBCPP_STD_VER >= 20 _LIBCPP_BEGIN_NAMESPACE_STD @@ -90,4 +93,6 @@ _LIBCPP_END_NAMESPACE_STD #endif // _LIBCPP_STD_VER >= 20 +_LIBCPP_POP_MACROS + #endif // _LIBCPP___ALGORITHM_RANGES_INCLUDES_H diff --git a/libcxx/include/__algorithm/ranges_inplace_merge.h b/libcxx/include/__algorithm/ranges_inplace_merge.h index 86001b003d5c..d94c0ad46567 100644 --- a/libcxx/include/__algorithm/ranges_inplace_merge.h +++ b/libcxx/include/__algorithm/ranges_inplace_merge.h @@ -31,6 +31,9 @@ # pragma GCC system_header #endif +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + #if _LIBCPP_STD_VER >= 20 _LIBCPP_BEGIN_NAMESPACE_STD @@ -76,4 +79,6 @@ _LIBCPP_END_NAMESPACE_STD #endif // _LIBCPP_STD_VER >= 20 +_LIBCPP_POP_MACROS + #endif // _LIBCPP___ALGORITHM_RANGES_INPLACE_MERGE_H diff --git a/libcxx/include/__algorithm/ranges_is_heap.h b/libcxx/include/__algorithm/ranges_is_heap.h index f298c347b747..122368c90d92 100644 --- a/libcxx/include/__algorithm/ranges_is_heap.h +++ b/libcxx/include/__algorithm/ranges_is_heap.h @@ -26,6 +26,9 @@ # pragma GCC system_header #endif +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + #if _LIBCPP_STD_VER >= 20 _LIBCPP_BEGIN_NAMESPACE_STD @@ -73,4 +76,6 @@ _LIBCPP_END_NAMESPACE_STD #endif // _LIBCPP_STD_VER >= 20 +_LIBCPP_POP_MACROS + #endif // _LIBCPP___ALGORITHM_RANGES_IS_HEAP_H diff --git a/libcxx/include/__algorithm/ranges_is_heap_until.h b/libcxx/include/__algorithm/ranges_is_heap_until.h index 73f13fb50440..b2705d37a6d3 100644 --- a/libcxx/include/__algorithm/ranges_is_heap_until.h +++ b/libcxx/include/__algorithm/ranges_is_heap_until.h @@ -27,6 +27,9 @@ # pragma GCC system_header #endif +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + #if _LIBCPP_STD_VER >= 20 _LIBCPP_BEGIN_NAMESPACE_STD @@ -73,4 +76,6 @@ _LIBCPP_END_NAMESPACE_STD #endif // _LIBCPP_STD_VER >= 20 +_LIBCPP_POP_MACROS + #endif // _LIBCPP___ALGORITHM_RANGES_IS_HEAP_UNTIL_H diff --git a/libcxx/include/__algorithm/ranges_is_partitioned.h b/libcxx/include/__algorithm/ranges_is_partitioned.h index 76db870efc70..c6a585c9f510 100644 --- a/libcxx/include/__algorithm/ranges_is_partitioned.h +++ b/libcxx/include/__algorithm/ranges_is_partitioned.h @@ -23,6 +23,9 @@ # pragma GCC system_header #endif +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + #if _LIBCPP_STD_VER >= 20 _LIBCPP_BEGIN_NAMESPACE_STD @@ -78,4 +81,6 @@ _LIBCPP_END_NAMESPACE_STD #endif // _LIBCPP_STD_VER >= 20 +_LIBCPP_POP_MACROS + #endif // _LIBCPP___ALGORITHM_RANGES_IS_PARTITIONED_H diff --git a/libcxx/include/__algorithm/ranges_is_permutation.h b/libcxx/include/__algorithm/ranges_is_permutation.h index 2b99839bc66f..e0423d722b5b 100644 --- a/libcxx/include/__algorithm/ranges_is_permutation.h +++ b/libcxx/include/__algorithm/ranges_is_permutation.h @@ -25,6 +25,9 @@ # pragma GCC system_header #endif +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + #if _LIBCPP_STD_VER >= 20 _LIBCPP_BEGIN_NAMESPACE_STD @@ -99,4 +102,6 @@ _LIBCPP_END_NAMESPACE_STD #endif // _LIBCPP_STD_VER >= 20 +_LIBCPP_POP_MACROS + #endif // _LIBCPP___ALGORITHM_RANGES_IS_PERMUTATION_H diff --git a/libcxx/include/__algorithm/ranges_is_sorted.h b/libcxx/include/__algorithm/ranges_is_sorted.h index 3eb2c768d66a..d71035d5aa1d 100644 --- a/libcxx/include/__algorithm/ranges_is_sorted.h +++ b/libcxx/include/__algorithm/ranges_is_sorted.h @@ -23,6 +23,9 @@ # pragma GCC system_header #endif +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + #if _LIBCPP_STD_VER >= 20 _LIBCPP_BEGIN_NAMESPACE_STD @@ -59,4 +62,6 @@ _LIBCPP_END_NAMESPACE_STD #endif // _LIBCPP_STD_VER >= 20 +_LIBCPP_POP_MACROS + #endif // _LIBCPP__ALGORITHM_RANGES_IS_SORTED_H diff --git a/libcxx/include/__algorithm/ranges_is_sorted_until.h b/libcxx/include/__algorithm/ranges_is_sorted_until.h index 19e9875d2757..dcfb6a4e1813 100644 --- a/libcxx/include/__algorithm/ranges_is_sorted_until.h +++ b/libcxx/include/__algorithm/ranges_is_sorted_until.h @@ -24,6 +24,9 @@ # pragma GCC system_header #endif +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + #if _LIBCPP_STD_VER >= 20 _LIBCPP_BEGIN_NAMESPACE_STD @@ -74,4 +77,6 @@ _LIBCPP_END_NAMESPACE_STD #endif // _LIBCPP_STD_VER >= 20 +_LIBCPP_POP_MACROS + #endif // _LIBCPP__ALGORITHM_RANGES_IS_SORTED_UNTIL_H diff --git a/libcxx/include/__algorithm/ranges_iterator_concept.h b/libcxx/include/__algorithm/ranges_iterator_concept.h index 9a9203040336..2af891d3af00 100644 --- a/libcxx/include/__algorithm/ranges_iterator_concept.h +++ b/libcxx/include/__algorithm/ranges_iterator_concept.h @@ -18,6 +18,9 @@ # pragma GCC system_header #endif +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + #if _LIBCPP_STD_VER >= 20 _LIBCPP_BEGIN_NAMESPACE_STD @@ -48,4 +51,6 @@ _LIBCPP_END_NAMESPACE_STD #endif // _LIBCPP_STD_VER >= 20 +_LIBCPP_POP_MACROS + #endif // _LIBCPP___ALGORITHM_RANGES_ITERATOR_CONCEPT_H diff --git a/libcxx/include/__algorithm/ranges_lexicographical_compare.h b/libcxx/include/__algorithm/ranges_lexicographical_compare.h index 5b843dfd7b31..90e96b546516 100644 --- a/libcxx/include/__algorithm/ranges_lexicographical_compare.h +++ b/libcxx/include/__algorithm/ranges_lexicographical_compare.h @@ -23,6 +23,9 @@ # pragma GCC system_header #endif +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + #if _LIBCPP_STD_VER >= 20 _LIBCPP_BEGIN_NAMESPACE_STD @@ -98,4 +101,6 @@ _LIBCPP_END_NAMESPACE_STD #endif // _LIBCPP_STD_VER >= 20 +_LIBCPP_POP_MACROS + #endif // _LIBCPP___ALGORITHM_RANGES_LEXICOGRAPHICAL_COMPARE_H diff --git a/libcxx/include/__algorithm/ranges_lower_bound.h b/libcxx/include/__algorithm/ranges_lower_bound.h index 58b3f815b96a..ab1f80e7ab77 100644 --- a/libcxx/include/__algorithm/ranges_lower_bound.h +++ b/libcxx/include/__algorithm/ranges_lower_bound.h @@ -27,6 +27,9 @@ # pragma GCC system_header #endif +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + #if _LIBCPP_STD_VER >= 20 _LIBCPP_BEGIN_NAMESPACE_STD @@ -65,4 +68,6 @@ _LIBCPP_END_NAMESPACE_STD #endif // _LIBCPP_STD_VER >= 20 +_LIBCPP_POP_MACROS + #endif // _LIBCPP___ALGORITHM_RANGES_LOWER_BOUND_H diff --git a/libcxx/include/__algorithm/ranges_make_heap.h b/libcxx/include/__algorithm/ranges_make_heap.h index f17eabff43d2..fe9c024fbf8a 100644 --- a/libcxx/include/__algorithm/ranges_make_heap.h +++ b/libcxx/include/__algorithm/ranges_make_heap.h @@ -32,6 +32,9 @@ # pragma GCC system_header #endif +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + #if _LIBCPP_STD_VER >= 20 _LIBCPP_BEGIN_NAMESPACE_STD @@ -77,4 +80,6 @@ _LIBCPP_END_NAMESPACE_STD #endif // _LIBCPP_STD_VER >= 20 +_LIBCPP_POP_MACROS + #endif // _LIBCPP___ALGORITHM_RANGES_MAKE_HEAP_H diff --git a/libcxx/include/__algorithm/ranges_max_element.h b/libcxx/include/__algorithm/ranges_max_element.h index 2ba97042f1f6..83adf49b61ad 100644 --- a/libcxx/include/__algorithm/ranges_max_element.h +++ b/libcxx/include/__algorithm/ranges_max_element.h @@ -24,6 +24,9 @@ # pragma GCC system_header #endif +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + #if _LIBCPP_STD_VER >= 20 _LIBCPP_BEGIN_NAMESPACE_STD @@ -61,4 +64,6 @@ _LIBCPP_END_NAMESPACE_STD #endif // _LIBCPP_STD_VER >= 20 +_LIBCPP_POP_MACROS + #endif // _LIBCPP___ALGORITHM_RANGES_MAX_ELEMENT_H diff --git a/libcxx/include/__algorithm/ranges_merge.h b/libcxx/include/__algorithm/ranges_merge.h index 7f49154ec922..bdf9a62d90bd 100644 --- a/libcxx/include/__algorithm/ranges_merge.h +++ b/libcxx/include/__algorithm/ranges_merge.h @@ -27,6 +27,9 @@ # pragma GCC system_header #endif +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + #if _LIBCPP_STD_VER >= 20 _LIBCPP_BEGIN_NAMESPACE_STD @@ -130,4 +133,6 @@ _LIBCPP_END_NAMESPACE_STD #endif // _LIBCPP_STD_VER >= 20 +_LIBCPP_POP_MACROS + #endif // _LIBCPP___ALGORITHM_RANGES_MERGE_H diff --git a/libcxx/include/__algorithm/ranges_min_element.h b/libcxx/include/__algorithm/ranges_min_element.h index 07826a0e6b81..4b9cb76da578 100644 --- a/libcxx/include/__algorithm/ranges_min_element.h +++ b/libcxx/include/__algorithm/ranges_min_element.h @@ -24,6 +24,9 @@ # pragma GCC system_header #endif +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + #if _LIBCPP_STD_VER >= 20 _LIBCPP_BEGIN_NAMESPACE_STD @@ -73,4 +76,6 @@ _LIBCPP_END_NAMESPACE_STD #endif // _LIBCPP_STD_VER >= 20 +_LIBCPP_POP_MACROS + #endif // _LIBCPP___ALGORITHM_RANGES_MIN_ELEMENT_H diff --git a/libcxx/include/__algorithm/ranges_minmax_element.h b/libcxx/include/__algorithm/ranges_minmax_element.h index a52319f6b5d3..5132856ebcd5 100644 --- a/libcxx/include/__algorithm/ranges_minmax_element.h +++ b/libcxx/include/__algorithm/ranges_minmax_element.h @@ -28,6 +28,9 @@ # pragma GCC system_header #endif +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + #if _LIBCPP_STD_VER >= 20 _LIBCPP_BEGIN_NAMESPACE_STD @@ -70,4 +73,6 @@ _LIBCPP_END_NAMESPACE_STD #endif // _LIBCPP_STD_VER >= 20 +_LIBCPP_POP_MACROS + #endif // _LIBCPP___ALGORITHM_RANGES_MINMAX_H diff --git a/libcxx/include/__algorithm/ranges_mismatch.h b/libcxx/include/__algorithm/ranges_mismatch.h index db9bfc8e87db..037af3912623 100644 --- a/libcxx/include/__algorithm/ranges_mismatch.h +++ b/libcxx/include/__algorithm/ranges_mismatch.h @@ -25,6 +25,9 @@ # pragma GCC system_header #endif +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + _LIBCPP_BEGIN_NAMESPACE_STD #if _LIBCPP_STD_VER >= 20 @@ -86,4 +89,6 @@ constexpr inline auto mismatch = __mismatch::__fn{}; _LIBCPP_END_NAMESPACE_STD +_LIBCPP_POP_MACROS + #endif // _LIBCPP___ALGORITHM_RANGES_MISMATCH_H diff --git a/libcxx/include/__algorithm/ranges_move.h b/libcxx/include/__algorithm/ranges_move.h index 8bd2409f891c..be869f36c973 100644 --- a/libcxx/include/__algorithm/ranges_move.h +++ b/libcxx/include/__algorithm/ranges_move.h @@ -23,6 +23,9 @@ # pragma GCC system_header #endif +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + #if _LIBCPP_STD_VER >= 20 _LIBCPP_BEGIN_NAMESPACE_STD @@ -66,4 +69,6 @@ _LIBCPP_END_NAMESPACE_STD #endif // _LIBCPP_STD_VER >= 20 +_LIBCPP_POP_MACROS + #endif // _LIBCPP___ALGORITHM_RANGES_MOVE_H diff --git a/libcxx/include/__algorithm/ranges_move_backward.h b/libcxx/include/__algorithm/ranges_move_backward.h index ee390a40e489..6d4071a33b81 100644 --- a/libcxx/include/__algorithm/ranges_move_backward.h +++ b/libcxx/include/__algorithm/ranges_move_backward.h @@ -25,6 +25,9 @@ # pragma GCC system_header #endif +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + #if _LIBCPP_STD_VER >= 20 _LIBCPP_BEGIN_NAMESPACE_STD @@ -68,4 +71,6 @@ _LIBCPP_END_NAMESPACE_STD #endif // _LIBCPP_STD_VER >= 20 +_LIBCPP_POP_MACROS + #endif // _LIBCPP___ALGORITHM_RANGES_MOVE_BACKWARD_H diff --git a/libcxx/include/__algorithm/ranges_next_permutation.h b/libcxx/include/__algorithm/ranges_next_permutation.h index 9ebab3ea7c13..18535e0a6254 100644 --- a/libcxx/include/__algorithm/ranges_next_permutation.h +++ b/libcxx/include/__algorithm/ranges_next_permutation.h @@ -28,6 +28,9 @@ # pragma GCC system_header #endif +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + #if _LIBCPP_STD_VER >= 20 _LIBCPP_BEGIN_NAMESPACE_STD @@ -70,4 +73,6 @@ _LIBCPP_END_NAMESPACE_STD #endif // _LIBCPP_STD_VER >= 20 +_LIBCPP_POP_MACROS + #endif // _LIBCPP___ALGORITHM_RANGES_NEXT_PERMUTATION_H diff --git a/libcxx/include/__algorithm/ranges_none_of.h b/libcxx/include/__algorithm/ranges_none_of.h index b0d363895e00..59bd87997d44 100644 --- a/libcxx/include/__algorithm/ranges_none_of.h +++ b/libcxx/include/__algorithm/ranges_none_of.h @@ -22,6 +22,9 @@ # pragma GCC system_header #endif +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + #if _LIBCPP_STD_VER >= 20 _LIBCPP_BEGIN_NAMESPACE_STD @@ -67,4 +70,6 @@ _LIBCPP_END_NAMESPACE_STD #endif // _LIBCPP_STD_VER >= 20 +_LIBCPP_POP_MACROS + #endif // _LIBCPP___ALGORITHM_RANGES_NONE_OF_H diff --git a/libcxx/include/__algorithm/ranges_nth_element.h b/libcxx/include/__algorithm/ranges_nth_element.h index 7abdbd0889e0..90ade9efe10d 100644 --- a/libcxx/include/__algorithm/ranges_nth_element.h +++ b/libcxx/include/__algorithm/ranges_nth_element.h @@ -31,6 +31,9 @@ # pragma GCC system_header #endif +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + #if _LIBCPP_STD_VER >= 20 _LIBCPP_BEGIN_NAMESPACE_STD @@ -76,4 +79,6 @@ _LIBCPP_END_NAMESPACE_STD #endif // _LIBCPP_STD_VER >= 20 +_LIBCPP_POP_MACROS + #endif // _LIBCPP___ALGORITHM_RANGES_NTH_ELEMENT_H diff --git a/libcxx/include/__algorithm/ranges_partial_sort.h b/libcxx/include/__algorithm/ranges_partial_sort.h index 9ec8882097d7..c67247d2e0a7 100644 --- a/libcxx/include/__algorithm/ranges_partial_sort.h +++ b/libcxx/include/__algorithm/ranges_partial_sort.h @@ -33,6 +33,9 @@ # pragma GCC system_header #endif +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + #if _LIBCPP_STD_VER >= 20 _LIBCPP_BEGIN_NAMESPACE_STD @@ -74,4 +77,6 @@ _LIBCPP_END_NAMESPACE_STD #endif // _LIBCPP_STD_VER >= 20 +_LIBCPP_POP_MACROS + #endif // _LIBCPP___ALGORITHM_RANGES_PARTIAL_SORT_H diff --git a/libcxx/include/__algorithm/ranges_partial_sort_copy.h b/libcxx/include/__algorithm/ranges_partial_sort_copy.h index eba7d9ac4165..b3bdeb78fb6f 100644 --- a/libcxx/include/__algorithm/ranges_partial_sort_copy.h +++ b/libcxx/include/__algorithm/ranges_partial_sort_copy.h @@ -30,6 +30,9 @@ # pragma GCC system_header #endif +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + #if _LIBCPP_STD_VER >= 20 _LIBCPP_BEGIN_NAMESPACE_STD @@ -106,4 +109,6 @@ _LIBCPP_END_NAMESPACE_STD #endif // _LIBCPP_STD_VER >= 20 +_LIBCPP_POP_MACROS + #endif // _LIBCPP___ALGORITHM_RANGES_PARTIAL_SORT_COPY_H diff --git a/libcxx/include/__algorithm/ranges_partition.h b/libcxx/include/__algorithm/ranges_partition.h index 89d192b51fd3..a67ac4c96757 100644 --- a/libcxx/include/__algorithm/ranges_partition.h +++ b/libcxx/include/__algorithm/ranges_partition.h @@ -32,6 +32,9 @@ # pragma GCC system_header #endif +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + #if _LIBCPP_STD_VER >= 20 _LIBCPP_BEGIN_NAMESPACE_STD @@ -80,4 +83,6 @@ _LIBCPP_END_NAMESPACE_STD #endif // _LIBCPP_STD_VER >= 20 +_LIBCPP_POP_MACROS + #endif // _LIBCPP___ALGORITHM_RANGES_PARTITION_H diff --git a/libcxx/include/__algorithm/ranges_partition_copy.h b/libcxx/include/__algorithm/ranges_partition_copy.h index 6a16b02db3e5..d60c865dd2a8 100644 --- a/libcxx/include/__algorithm/ranges_partition_copy.h +++ b/libcxx/include/__algorithm/ranges_partition_copy.h @@ -26,6 +26,9 @@ # pragma GCC system_header #endif +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + #if _LIBCPP_STD_VER >= 20 _LIBCPP_BEGIN_NAMESPACE_STD @@ -102,4 +105,6 @@ _LIBCPP_END_NAMESPACE_STD #endif // _LIBCPP_STD_VER >= 20 +_LIBCPP_POP_MACROS + #endif // _LIBCPP___ALGORITHM_RANGES_PARTITION_COPY_H diff --git a/libcxx/include/__algorithm/ranges_partition_point.h b/libcxx/include/__algorithm/ranges_partition_point.h index 6fc20e7d00e9..c5b11b5fed19 100644 --- a/libcxx/include/__algorithm/ranges_partition_point.h +++ b/libcxx/include/__algorithm/ranges_partition_point.h @@ -27,6 +27,9 @@ # pragma GCC system_header #endif +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + #if _LIBCPP_STD_VER >= 20 _LIBCPP_BEGIN_NAMESPACE_STD @@ -85,4 +88,6 @@ _LIBCPP_END_NAMESPACE_STD #endif // _LIBCPP_STD_VER >= 20 +_LIBCPP_POP_MACROS + #endif // _LIBCPP___ALGORITHM_RANGES_PARTITION_POINT_H diff --git a/libcxx/include/__algorithm/ranges_pop_heap.h b/libcxx/include/__algorithm/ranges_pop_heap.h index 364cfe94b161..01f92c0f2288 100644 --- a/libcxx/include/__algorithm/ranges_pop_heap.h +++ b/libcxx/include/__algorithm/ranges_pop_heap.h @@ -32,6 +32,9 @@ # pragma GCC system_header #endif +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + #if _LIBCPP_STD_VER >= 20 _LIBCPP_BEGIN_NAMESPACE_STD @@ -78,4 +81,6 @@ _LIBCPP_END_NAMESPACE_STD #endif // _LIBCPP_STD_VER >= 20 +_LIBCPP_POP_MACROS + #endif // _LIBCPP___ALGORITHM_RANGES_POP_HEAP_H diff --git a/libcxx/include/__algorithm/ranges_prev_permutation.h b/libcxx/include/__algorithm/ranges_prev_permutation.h index ae7a68cce5fd..225cee9b75ec 100644 --- a/libcxx/include/__algorithm/ranges_prev_permutation.h +++ b/libcxx/include/__algorithm/ranges_prev_permutation.h @@ -28,6 +28,9 @@ # pragma GCC system_header #endif +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + #if _LIBCPP_STD_VER >= 20 _LIBCPP_BEGIN_NAMESPACE_STD @@ -70,4 +73,6 @@ _LIBCPP_END_NAMESPACE_STD #endif // _LIBCPP_STD_VER >= 20 +_LIBCPP_POP_MACROS + #endif // _LIBCPP___ALGORITHM_RANGES_PREV_PERMUTATION_H diff --git a/libcxx/include/__algorithm/ranges_push_heap.h b/libcxx/include/__algorithm/ranges_push_heap.h index 1ed9c953f54c..9d187af38c53 100644 --- a/libcxx/include/__algorithm/ranges_push_heap.h +++ b/libcxx/include/__algorithm/ranges_push_heap.h @@ -32,6 +32,9 @@ # pragma GCC system_header #endif +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + #if _LIBCPP_STD_VER >= 20 _LIBCPP_BEGIN_NAMESPACE_STD @@ -77,4 +80,6 @@ _LIBCPP_END_NAMESPACE_STD #endif // _LIBCPP_STD_VER >= 20 +_LIBCPP_POP_MACROS + #endif // _LIBCPP___ALGORITHM_RANGES_PUSH_HEAP_H diff --git a/libcxx/include/__algorithm/ranges_remove.h b/libcxx/include/__algorithm/ranges_remove.h index e27c4bdd733d..315bed8fba77 100644 --- a/libcxx/include/__algorithm/ranges_remove.h +++ b/libcxx/include/__algorithm/ranges_remove.h @@ -25,6 +25,9 @@ # pragma GCC system_header #endif +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + #if _LIBCPP_STD_VER >= 20 _LIBCPP_BEGIN_NAMESPACE_STD @@ -60,4 +63,6 @@ _LIBCPP_END_NAMESPACE_STD #endif // _LIBCPP_STD_VER >= 20 +_LIBCPP_POP_MACROS + #endif // _LIBCPP___ALGORITHM_RANGES_REMOVE_H diff --git a/libcxx/include/__algorithm/ranges_remove_copy.h b/libcxx/include/__algorithm/ranges_remove_copy.h index 5158a78e4814..84529eceac68 100644 --- a/libcxx/include/__algorithm/ranges_remove_copy.h +++ b/libcxx/include/__algorithm/ranges_remove_copy.h @@ -26,6 +26,9 @@ # pragma GCC system_header #endif +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + #if _LIBCPP_STD_VER >= 20 _LIBCPP_BEGIN_NAMESPACE_STD @@ -73,4 +76,6 @@ _LIBCPP_END_NAMESPACE_STD #endif // _LIBCPP_STD_VER >= 20 +_LIBCPP_POP_MACROS + #endif // _LIBCPP___ALGORITHM_RANGES_REMOVE_COPY_H diff --git a/libcxx/include/__algorithm/ranges_remove_copy_if.h b/libcxx/include/__algorithm/ranges_remove_copy_if.h index c07b4813d7d0..56fe01753312 100644 --- a/libcxx/include/__algorithm/ranges_remove_copy_if.h +++ b/libcxx/include/__algorithm/ranges_remove_copy_if.h @@ -29,6 +29,9 @@ # pragma GCC system_header #endif +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + #if _LIBCPP_STD_VER >= 20 _LIBCPP_BEGIN_NAMESPACE_STD @@ -87,4 +90,6 @@ _LIBCPP_END_NAMESPACE_STD #endif // _LIBCPP_STD_VER >= 20 +_LIBCPP_POP_MACROS + #endif // _LIBCPP___ALGORITHM_RANGES_REMOVE_COPY_IF_H diff --git a/libcxx/include/__algorithm/ranges_remove_if.h b/libcxx/include/__algorithm/ranges_remove_if.h index 4b7aa2d2be78..943dbdd73807 100644 --- a/libcxx/include/__algorithm/ranges_remove_if.h +++ b/libcxx/include/__algorithm/ranges_remove_if.h @@ -27,6 +27,9 @@ # pragma GCC system_header #endif +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + #if _LIBCPP_STD_VER >= 20 _LIBCPP_BEGIN_NAMESPACE_STD @@ -81,4 +84,6 @@ _LIBCPP_END_NAMESPACE_STD #endif // _LIBCPP_STD_VER >= 20 +_LIBCPP_POP_MACROS + #endif // _LIBCPP___ALGORITHM_RANGES_REMOVE_IF_H diff --git a/libcxx/include/__algorithm/ranges_replace.h b/libcxx/include/__algorithm/ranges_replace.h index b66a41aa8d0d..2b88dc032972 100644 --- a/libcxx/include/__algorithm/ranges_replace.h +++ b/libcxx/include/__algorithm/ranges_replace.h @@ -24,6 +24,9 @@ # pragma GCC system_header #endif +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + #if _LIBCPP_STD_VER >= 20 _LIBCPP_BEGIN_NAMESPACE_STD @@ -60,4 +63,6 @@ _LIBCPP_END_NAMESPACE_STD #endif // _LIBCPP_STD_VER >= 20 +_LIBCPP_POP_MACROS + #endif // _LIBCPP___ALGORITHM_RANGES_REPLACE_H diff --git a/libcxx/include/__algorithm/ranges_replace_copy.h b/libcxx/include/__algorithm/ranges_replace_copy.h index a7627024812f..633f993e5c94 100644 --- a/libcxx/include/__algorithm/ranges_replace_copy.h +++ b/libcxx/include/__algorithm/ranges_replace_copy.h @@ -26,6 +26,9 @@ # pragma GCC system_header #endif +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + #if _LIBCPP_STD_VER >= 20 _LIBCPP_BEGIN_NAMESPACE_STD @@ -85,4 +88,6 @@ _LIBCPP_END_NAMESPACE_STD #endif // _LIBCPP_STD_VER >= 20 +_LIBCPP_POP_MACROS + #endif // _LIBCPP___ALGORITHM_RANGES_REPLACE_COPY_H diff --git a/libcxx/include/__algorithm/ranges_replace_copy_if.h b/libcxx/include/__algorithm/ranges_replace_copy_if.h index 10ed1fda6c5c..e065c3ac0acc 100644 --- a/libcxx/include/__algorithm/ranges_replace_copy_if.h +++ b/libcxx/include/__algorithm/ranges_replace_copy_if.h @@ -24,6 +24,9 @@ # pragma GCC system_header #endif +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + #if _LIBCPP_STD_VER >= 20 _LIBCPP_BEGIN_NAMESPACE_STD @@ -90,4 +93,6 @@ _LIBCPP_END_NAMESPACE_STD #endif // _LIBCPP_STD_VER >= 20 +_LIBCPP_POP_MACROS + #endif // _LIBCPP___ALGORITHM_RANGES_REPLACE_COPY_IF_H diff --git a/libcxx/include/__algorithm/ranges_replace_if.h b/libcxx/include/__algorithm/ranges_replace_if.h index 519fa32029ac..6445f42aea19 100644 --- a/libcxx/include/__algorithm/ranges_replace_if.h +++ b/libcxx/include/__algorithm/ranges_replace_if.h @@ -23,6 +23,9 @@ # pragma GCC system_header #endif +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + #if _LIBCPP_STD_VER >= 20 _LIBCPP_BEGIN_NAMESPACE_STD @@ -73,4 +76,6 @@ _LIBCPP_END_NAMESPACE_STD #endif // _LIBCPP_STD_VER >= 20 +_LIBCPP_POP_MACROS + #endif // _LIBCPP___ALGORITHM_RANGES_REPLACE_IF_H diff --git a/libcxx/include/__algorithm/ranges_reverse_copy.h b/libcxx/include/__algorithm/ranges_reverse_copy.h index 35b9edba0bfb..60043787a717 100644 --- a/libcxx/include/__algorithm/ranges_reverse_copy.h +++ b/libcxx/include/__algorithm/ranges_reverse_copy.h @@ -25,6 +25,9 @@ # pragma GCC system_header #endif +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + #if _LIBCPP_STD_VER >= 20 _LIBCPP_BEGIN_NAMESPACE_STD @@ -62,4 +65,6 @@ _LIBCPP_END_NAMESPACE_STD #endif // _LIBCPP_STD_VER >= 20 +_LIBCPP_POP_MACROS + #endif // _LIBCPP___ALGORITHM_RANGES_REVERSE_COPY_H diff --git a/libcxx/include/__algorithm/ranges_rotate.h b/libcxx/include/__algorithm/ranges_rotate.h index ebed9bbd5426..8d33a6f0799b 100644 --- a/libcxx/include/__algorithm/ranges_rotate.h +++ b/libcxx/include/__algorithm/ranges_rotate.h @@ -25,6 +25,9 @@ # pragma GCC system_header #endif +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + #if _LIBCPP_STD_VER >= 20 _LIBCPP_BEGIN_NAMESPACE_STD @@ -63,4 +66,6 @@ _LIBCPP_END_NAMESPACE_STD #endif // _LIBCPP_STD_VER >= 20 +_LIBCPP_POP_MACROS + #endif // _LIBCPP___ALGORITHM_RANGES_ROTATE_H diff --git a/libcxx/include/__algorithm/ranges_rotate_copy.h b/libcxx/include/__algorithm/ranges_rotate_copy.h index ab76c0944c47..26fe110b5389 100644 --- a/libcxx/include/__algorithm/ranges_rotate_copy.h +++ b/libcxx/include/__algorithm/ranges_rotate_copy.h @@ -22,6 +22,9 @@ # pragma GCC system_header #endif +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + #if _LIBCPP_STD_VER >= 20 _LIBCPP_BEGIN_NAMESPACE_STD @@ -60,4 +63,6 @@ _LIBCPP_END_NAMESPACE_STD #endif // _LIBCPP_STD_VER >= 20 +_LIBCPP_POP_MACROS + #endif // _LIBCPP___ALGORITHM_RANGES_ROTATE_COPY_H diff --git a/libcxx/include/__algorithm/ranges_sample.h b/libcxx/include/__algorithm/ranges_sample.h index d347d82205a8..e4f60a7b66be 100644 --- a/libcxx/include/__algorithm/ranges_sample.h +++ b/libcxx/include/__algorithm/ranges_sample.h @@ -27,6 +27,9 @@ # pragma GCC system_header #endif +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + #if _LIBCPP_STD_VER >= 20 _LIBCPP_BEGIN_NAMESPACE_STD @@ -66,4 +69,6 @@ _LIBCPP_END_NAMESPACE_STD #endif // _LIBCPP_STD_VER >= 20 +_LIBCPP_POP_MACROS + #endif // _LIBCPP___ALGORITHM_RANGES_SAMPLE_H diff --git a/libcxx/include/__algorithm/ranges_search_n.h b/libcxx/include/__algorithm/ranges_search_n.h index 4e53f30f71f9..4c1d73d8e6c3 100644 --- a/libcxx/include/__algorithm/ranges_search_n.h +++ b/libcxx/include/__algorithm/ranges_search_n.h @@ -31,6 +31,9 @@ # pragma GCC system_header #endif +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + #if _LIBCPP_STD_VER >= 20 _LIBCPP_BEGIN_NAMESPACE_STD @@ -108,4 +111,6 @@ _LIBCPP_END_NAMESPACE_STD #endif // _LIBCPP_STD_VER >= 20 +_LIBCPP_POP_MACROS + #endif // _LIBCPP___ALGORITHM_RANGES_SEARCH_N_H diff --git a/libcxx/include/__algorithm/ranges_set_difference.h b/libcxx/include/__algorithm/ranges_set_difference.h index a9453ed336f5..0841fb4ffd0c 100644 --- a/libcxx/include/__algorithm/ranges_set_difference.h +++ b/libcxx/include/__algorithm/ranges_set_difference.h @@ -30,6 +30,9 @@ # pragma GCC system_header #endif +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + #if _LIBCPP_STD_VER >= 20 _LIBCPP_BEGIN_NAMESPACE_STD @@ -100,4 +103,7 @@ inline constexpr auto set_difference = __set_difference::__fn{}; _LIBCPP_END_NAMESPACE_STD #endif // _LIBCPP_STD_VER >= 20 + +_LIBCPP_POP_MACROS + #endif // _LIBCPP___ALGORITHM_RANGES_SET_DIFFERENCE_H diff --git a/libcxx/include/__algorithm/ranges_set_intersection.h b/libcxx/include/__algorithm/ranges_set_intersection.h index 4cdcbb75051a..9427379745b6 100644 --- a/libcxx/include/__algorithm/ranges_set_intersection.h +++ b/libcxx/include/__algorithm/ranges_set_intersection.h @@ -28,6 +28,9 @@ # pragma GCC system_header #endif +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + #if _LIBCPP_STD_VER >= 20 _LIBCPP_BEGIN_NAMESPACE_STD @@ -105,4 +108,7 @@ inline constexpr auto set_intersection = __set_intersection::__fn{}; _LIBCPP_END_NAMESPACE_STD #endif // _LIBCPP_STD_VER >= 20 + +_LIBCPP_POP_MACROS + #endif // _LIBCPP___ALGORITHM_RANGES_SET_INTERSECTION_H diff --git a/libcxx/include/__algorithm/ranges_set_symmetric_difference.h b/libcxx/include/__algorithm/ranges_set_symmetric_difference.h index d8710a1c47b0..995eb0999d94 100644 --- a/libcxx/include/__algorithm/ranges_set_symmetric_difference.h +++ b/libcxx/include/__algorithm/ranges_set_symmetric_difference.h @@ -28,6 +28,9 @@ # pragma GCC system_header #endif +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + #if _LIBCPP_STD_VER >= 20 _LIBCPP_BEGIN_NAMESPACE_STD @@ -105,4 +108,7 @@ inline constexpr auto set_symmetric_difference = __set_symmetric_difference::__f _LIBCPP_END_NAMESPACE_STD #endif // _LIBCPP_STD_VER >= 20 + +_LIBCPP_POP_MACROS + #endif // _LIBCPP___ALGORITHM_RANGES_SET_SYMMETRIC_DIFFERENCE_H diff --git a/libcxx/include/__algorithm/ranges_set_union.h b/libcxx/include/__algorithm/ranges_set_union.h index c627166fffed..e870e390cc66 100644 --- a/libcxx/include/__algorithm/ranges_set_union.h +++ b/libcxx/include/__algorithm/ranges_set_union.h @@ -31,6 +31,9 @@ # pragma GCC system_header #endif +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + #if _LIBCPP_STD_VER >= 20 _LIBCPP_BEGIN_NAMESPACE_STD @@ -107,4 +110,6 @@ _LIBCPP_END_NAMESPACE_STD #endif // _LIBCPP_STD_VER >= 20 +_LIBCPP_POP_MACROS + #endif // _LIBCPP___ALGORITHM_RANGES_SET_UNION_H diff --git a/libcxx/include/__algorithm/ranges_shuffle.h b/libcxx/include/__algorithm/ranges_shuffle.h index fca420058dec..ab98ea22caab 100644 --- a/libcxx/include/__algorithm/ranges_shuffle.h +++ b/libcxx/include/__algorithm/ranges_shuffle.h @@ -31,6 +31,9 @@ # pragma GCC system_header #endif +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + #if _LIBCPP_STD_VER >= 20 _LIBCPP_BEGIN_NAMESPACE_STD @@ -64,4 +67,6 @@ _LIBCPP_END_NAMESPACE_STD #endif // _LIBCPP_STD_VER >= 20 +_LIBCPP_POP_MACROS + #endif // _LIBCPP___ALGORITHM_RANGES_SHUFFLE_H diff --git a/libcxx/include/__algorithm/ranges_sort.h b/libcxx/include/__algorithm/ranges_sort.h index 2ad0e0c233be..0296c146b3ed 100644 --- a/libcxx/include/__algorithm/ranges_sort.h +++ b/libcxx/include/__algorithm/ranges_sort.h @@ -31,6 +31,9 @@ # pragma GCC system_header #endif +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + #if _LIBCPP_STD_VER >= 20 _LIBCPP_BEGIN_NAMESPACE_STD @@ -76,4 +79,6 @@ _LIBCPP_END_NAMESPACE_STD #endif // _LIBCPP_STD_VER >= 20 +_LIBCPP_POP_MACROS + #endif // _LIBCPP___ALGORITHM_RANGES_SORT_H diff --git a/libcxx/include/__algorithm/ranges_sort_heap.h b/libcxx/include/__algorithm/ranges_sort_heap.h index 365c7dba6156..bab30df1708c 100644 --- a/libcxx/include/__algorithm/ranges_sort_heap.h +++ b/libcxx/include/__algorithm/ranges_sort_heap.h @@ -32,6 +32,9 @@ # pragma GCC system_header #endif +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + #if _LIBCPP_STD_VER >= 20 _LIBCPP_BEGIN_NAMESPACE_STD @@ -77,4 +80,6 @@ _LIBCPP_END_NAMESPACE_STD #endif // _LIBCPP_STD_VER >= 20 +_LIBCPP_POP_MACROS + #endif // _LIBCPP___ALGORITHM_RANGES_SORT_HEAP_H diff --git a/libcxx/include/__algorithm/ranges_stable_partition.h b/libcxx/include/__algorithm/ranges_stable_partition.h index 44937fa58990..f34027ff772c 100644 --- a/libcxx/include/__algorithm/ranges_stable_partition.h +++ b/libcxx/include/__algorithm/ranges_stable_partition.h @@ -34,6 +34,9 @@ # pragma GCC system_header #endif +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + #if _LIBCPP_STD_VER >= 20 _LIBCPP_BEGIN_NAMESPACE_STD @@ -84,4 +87,6 @@ _LIBCPP_END_NAMESPACE_STD #endif // _LIBCPP_STD_VER >= 20 +_LIBCPP_POP_MACROS + #endif // _LIBCPP___ALGORITHM_RANGES_STABLE_PARTITION_H diff --git a/libcxx/include/__algorithm/ranges_stable_sort.h b/libcxx/include/__algorithm/ranges_stable_sort.h index a4eed3836356..93909e253cc0 100644 --- a/libcxx/include/__algorithm/ranges_stable_sort.h +++ b/libcxx/include/__algorithm/ranges_stable_sort.h @@ -31,6 +31,9 @@ # pragma GCC system_header #endif +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + #if _LIBCPP_STD_VER >= 20 _LIBCPP_BEGIN_NAMESPACE_STD @@ -74,4 +77,6 @@ _LIBCPP_END_NAMESPACE_STD #endif // _LIBCPP_STD_VER >= 20 +_LIBCPP_POP_MACROS + #endif // _LIBCPP___ALGORITHM_RANGES_STABLE_SORT_H diff --git a/libcxx/include/__algorithm/ranges_starts_with.h b/libcxx/include/__algorithm/ranges_starts_with.h index 7da78001d814..90e184aa9bcc 100644 --- a/libcxx/include/__algorithm/ranges_starts_with.h +++ b/libcxx/include/__algorithm/ranges_starts_with.h @@ -24,6 +24,9 @@ # pragma GCC system_header #endif +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + #if _LIBCPP_STD_VER >= 23 _LIBCPP_BEGIN_NAMESPACE_STD @@ -87,4 +90,6 @@ _LIBCPP_END_NAMESPACE_STD #endif // _LIBCPP_STD_VER >= 23 +_LIBCPP_POP_MACROS + #endif // _LIBCPP___ALGORITHM_RANGES_STARTS_WITH_H diff --git a/libcxx/include/__algorithm/ranges_swap_ranges.h b/libcxx/include/__algorithm/ranges_swap_ranges.h index 1d0ebc0d5221..b6d9f618395a 100644 --- a/libcxx/include/__algorithm/ranges_swap_ranges.h +++ b/libcxx/include/__algorithm/ranges_swap_ranges.h @@ -24,6 +24,9 @@ # pragma GCC system_header #endif +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + #if _LIBCPP_STD_VER >= 20 _LIBCPP_BEGIN_NAMESPACE_STD @@ -62,4 +65,6 @@ _LIBCPP_END_NAMESPACE_STD #endif // _LIBCPP_STD_VER >= 20 +_LIBCPP_POP_MACROS + #endif // _LIBCPP___ALGORITHM_RANGES_SWAP_RANGES_H diff --git a/libcxx/include/__algorithm/ranges_transform.h b/libcxx/include/__algorithm/ranges_transform.h index f66a07ac026e..7850ec4f8465 100644 --- a/libcxx/include/__algorithm/ranges_transform.h +++ b/libcxx/include/__algorithm/ranges_transform.h @@ -26,6 +26,9 @@ # pragma GCC system_header #endif +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + #if _LIBCPP_STD_VER >= 20 _LIBCPP_BEGIN_NAMESPACE_STD @@ -169,4 +172,6 @@ _LIBCPP_END_NAMESPACE_STD #endif // _LIBCPP_STD_VER >= 20 +_LIBCPP_POP_MACROS + #endif // _LIBCPP___ALGORITHM_RANGES_TRANSFORM_H diff --git a/libcxx/include/__algorithm/ranges_unique.h b/libcxx/include/__algorithm/ranges_unique.h index b17e01fc5057..7340310eb36a 100644 --- a/libcxx/include/__algorithm/ranges_unique.h +++ b/libcxx/include/__algorithm/ranges_unique.h @@ -32,6 +32,9 @@ # pragma GCC system_header #endif +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + #if _LIBCPP_STD_VER >= 20 _LIBCPP_BEGIN_NAMESPACE_STD @@ -74,4 +77,6 @@ _LIBCPP_END_NAMESPACE_STD #endif // _LIBCPP_STD_VER >= 20 +_LIBCPP_POP_MACROS + #endif // _LIBCPP___ALGORITHM_RANGES_UNIQUE_H diff --git a/libcxx/include/__algorithm/ranges_unique_copy.h b/libcxx/include/__algorithm/ranges_unique_copy.h index 7e89f9d97af7..61133885ae80 100644 --- a/libcxx/include/__algorithm/ranges_unique_copy.h +++ b/libcxx/include/__algorithm/ranges_unique_copy.h @@ -32,6 +32,9 @@ # pragma GCC system_header #endif +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + #if _LIBCPP_STD_VER >= 20 _LIBCPP_BEGIN_NAMESPACE_STD @@ -112,4 +115,6 @@ _LIBCPP_END_NAMESPACE_STD #endif // _LIBCPP_STD_VER >= 20 +_LIBCPP_POP_MACROS + #endif // _LIBCPP___ALGORITHM_RANGES_UNIQUE_COPY_H diff --git a/libcxx/include/__algorithm/remove.h b/libcxx/include/__algorithm/remove.h index 2b9d4ff26ed2..1498852c4361 100644 --- a/libcxx/include/__algorithm/remove.h +++ b/libcxx/include/__algorithm/remove.h @@ -18,6 +18,9 @@ # pragma GCC system_header #endif +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + _LIBCPP_BEGIN_NAMESPACE_STD template <class _ForwardIterator, class _Tp> @@ -38,4 +41,6 @@ remove(_ForwardIterator __first, _ForwardIterator __last, const _Tp& __value) { _LIBCPP_END_NAMESPACE_STD +_LIBCPP_POP_MACROS + #endif // _LIBCPP___ALGORITHM_REMOVE_H diff --git a/libcxx/include/__algorithm/remove_if.h b/libcxx/include/__algorithm/remove_if.h index 6eceddce8d56..c77b78023f52 100644 --- a/libcxx/include/__algorithm/remove_if.h +++ b/libcxx/include/__algorithm/remove_if.h @@ -17,6 +17,9 @@ # pragma GCC system_header #endif +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + _LIBCPP_BEGIN_NAMESPACE_STD template <class _ForwardIterator, class _Predicate> @@ -37,4 +40,6 @@ remove_if(_ForwardIterator __first, _ForwardIterator __last, _Predicate __pred) _LIBCPP_END_NAMESPACE_STD +_LIBCPP_POP_MACROS + #endif // _LIBCPP___ALGORITHM_REMOVE_IF_H diff --git a/libcxx/include/__algorithm/reverse.h b/libcxx/include/__algorithm/reverse.h index 6bd0aa393280..4167c9116d96 100644 --- a/libcxx/include/__algorithm/reverse.h +++ b/libcxx/include/__algorithm/reverse.h @@ -19,6 +19,9 @@ # pragma GCC system_header #endif +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + _LIBCPP_BEGIN_NAMESPACE_STD template <class _AlgPolicy, class _BidirectionalIterator> @@ -54,4 +57,6 @@ reverse(_BidirectionalIterator __first, _BidirectionalIterator __last) { _LIBCPP_END_NAMESPACE_STD +_LIBCPP_POP_MACROS + #endif // _LIBCPP___ALGORITHM_REVERSE_H diff --git a/libcxx/include/__algorithm/rotate.h b/libcxx/include/__algorithm/rotate.h index d8162b1a94b2..9a4d07883e32 100644 --- a/libcxx/include/__algorithm/rotate.h +++ b/libcxx/include/__algorithm/rotate.h @@ -23,6 +23,9 @@ # pragma GCC system_header #endif +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + _LIBCPP_BEGIN_NAMESPACE_STD template <class _AlgPolicy, class _ForwardIterator> @@ -190,4 +193,6 @@ rotate(_ForwardIterator __first, _ForwardIterator __middle, _ForwardIterator __l _LIBCPP_END_NAMESPACE_STD +_LIBCPP_POP_MACROS + #endif // _LIBCPP___ALGORITHM_ROTATE_H diff --git a/libcxx/include/__algorithm/set_difference.h b/libcxx/include/__algorithm/set_difference.h index a924702ce5f2..f414bcecb50d 100644 --- a/libcxx/include/__algorithm/set_difference.h +++ b/libcxx/include/__algorithm/set_difference.h @@ -25,6 +25,9 @@ # pragma GCC system_header #endif +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + _LIBCPP_BEGIN_NAMESPACE_STD template <class _AlgPolicy, class _Comp, class _InIter1, class _Sent1, class _InIter2, class _Sent2, class _OutIter> @@ -71,4 +74,6 @@ inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _OutputIterator set_d _LIBCPP_END_NAMESPACE_STD +_LIBCPP_POP_MACROS + #endif // _LIBCPP___ALGORITHM_SET_DIFFERENCE_H diff --git a/libcxx/include/__algorithm/set_intersection.h b/libcxx/include/__algorithm/set_intersection.h index f2603fe1365a..73d888d1b038 100644 --- a/libcxx/include/__algorithm/set_intersection.h +++ b/libcxx/include/__algorithm/set_intersection.h @@ -21,6 +21,9 @@ # pragma GCC system_header #endif +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + _LIBCPP_BEGIN_NAMESPACE_STD template <class _InIter1, class _InIter2, class _OutIter> @@ -95,4 +98,6 @@ inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _OutputIterator set_i _LIBCPP_END_NAMESPACE_STD +_LIBCPP_POP_MACROS + #endif // _LIBCPP___ALGORITHM_SET_INTERSECTION_H diff --git a/libcxx/include/__algorithm/set_symmetric_difference.h b/libcxx/include/__algorithm/set_symmetric_difference.h index 832c3979bfd7..db36665a6136 100644 --- a/libcxx/include/__algorithm/set_symmetric_difference.h +++ b/libcxx/include/__algorithm/set_symmetric_difference.h @@ -22,6 +22,9 @@ # pragma GCC system_header #endif +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + _LIBCPP_BEGIN_NAMESPACE_STD template <class _InIter1, class _InIter2, class _OutIter> @@ -101,4 +104,6 @@ _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _OutputIterator set_symmetri _LIBCPP_END_NAMESPACE_STD +_LIBCPP_POP_MACROS + #endif // _LIBCPP___ALGORITHM_SET_SYMMETRIC_DIFFERENCE_H diff --git a/libcxx/include/__algorithm/set_union.h b/libcxx/include/__algorithm/set_union.h index cf48adae03be..a79c50fd3cf2 100644 --- a/libcxx/include/__algorithm/set_union.h +++ b/libcxx/include/__algorithm/set_union.h @@ -22,6 +22,9 @@ # pragma GCC system_header #endif +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + _LIBCPP_BEGIN_NAMESPACE_STD template <class _InIter1, class _InIter2, class _OutIter> @@ -97,4 +100,6 @@ _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _OutputIterator set_union( _LIBCPP_END_NAMESPACE_STD +_LIBCPP_POP_MACROS + #endif // _LIBCPP___ALGORITHM_SET_UNION_H diff --git a/libcxx/include/__algorithm/shift_left.h b/libcxx/include/__algorithm/shift_left.h index 645c58c29119..06cd7c5f8764 100644 --- a/libcxx/include/__algorithm/shift_left.h +++ b/libcxx/include/__algorithm/shift_left.h @@ -17,6 +17,9 @@ # pragma GCC system_header #endif +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + _LIBCPP_BEGIN_NAMESPACE_STD #if _LIBCPP_STD_VER >= 20 @@ -51,4 +54,6 @@ shift_left(_ForwardIterator __first, _LIBCPP_END_NAMESPACE_STD +_LIBCPP_POP_MACROS + #endif // _LIBCPP___ALGORITHM_SHIFT_LEFT_H diff --git a/libcxx/include/__algorithm/shift_right.h b/libcxx/include/__algorithm/shift_right.h index 73ef98bd39de..01853057fc47 100644 --- a/libcxx/include/__algorithm/shift_right.h +++ b/libcxx/include/__algorithm/shift_right.h @@ -20,6 +20,9 @@ # pragma GCC system_header #endif +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + _LIBCPP_BEGIN_NAMESPACE_STD #if _LIBCPP_STD_VER >= 20 @@ -97,4 +100,6 @@ shift_right(_ForwardIterator __first, _LIBCPP_END_NAMESPACE_STD +_LIBCPP_POP_MACROS + #endif // _LIBCPP___ALGORITHM_SHIFT_RIGHT_H diff --git a/libcxx/include/__algorithm/sort.h b/libcxx/include/__algorithm/sort.h index 451133a2d193..8a5e0211cdf4 100644 --- a/libcxx/include/__algorithm/sort.h +++ b/libcxx/include/__algorithm/sort.h @@ -39,6 +39,9 @@ # pragma GCC system_header #endif +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + _LIBCPP_BEGIN_NAMESPACE_STD // stable, 2-3 compares, 0-2 swaps @@ -1009,4 +1012,6 @@ sort(_RandomAccessIterator __first, _RandomAccessIterator __last) { _LIBCPP_END_NAMESPACE_STD +_LIBCPP_POP_MACROS + #endif // _LIBCPP___ALGORITHM_SORT_H diff --git a/libcxx/include/__algorithm/sort_heap.h b/libcxx/include/__algorithm/sort_heap.h index 0a6d992d0090..060fc33c3c6e 100644 --- a/libcxx/include/__algorithm/sort_heap.h +++ b/libcxx/include/__algorithm/sort_heap.h @@ -24,6 +24,9 @@ # pragma GCC system_header #endif +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + _LIBCPP_BEGIN_NAMESPACE_STD template <class _AlgPolicy, class _Compare, class _RandomAccessIterator> @@ -55,4 +58,6 @@ sort_heap(_RandomAccessIterator __first, _RandomAccessIterator __last) { _LIBCPP_END_NAMESPACE_STD +_LIBCPP_POP_MACROS + #endif // _LIBCPP___ALGORITHM_SORT_HEAP_H diff --git a/libcxx/include/__algorithm/stable_partition.h b/libcxx/include/__algorithm/stable_partition.h index 8762abcf18e1..8bb1eaf2d224 100644 --- a/libcxx/include/__algorithm/stable_partition.h +++ b/libcxx/include/__algorithm/stable_partition.h @@ -26,6 +26,9 @@ # pragma GCC system_header #endif +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + _LIBCPP_BEGIN_NAMESPACE_STD template <class _AlgPolicy, class _Predicate, class _ForwardIterator, class _Distance, class _Pair> @@ -299,4 +302,6 @@ stable_partition(_ForwardIterator __first, _ForwardIterator __last, _Predicate _ _LIBCPP_END_NAMESPACE_STD +_LIBCPP_POP_MACROS + #endif // _LIBCPP___ALGORITHM_STABLE_PARTITION_H diff --git a/libcxx/include/__algorithm/stable_sort.h b/libcxx/include/__algorithm/stable_sort.h index ffc6e4ce2818..9be192bd65a6 100644 --- a/libcxx/include/__algorithm/stable_sort.h +++ b/libcxx/include/__algorithm/stable_sort.h @@ -29,6 +29,9 @@ # pragma GCC system_header #endif +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + _LIBCPP_BEGIN_NAMESPACE_STD template <class _AlgPolicy, class _Compare, class _BidirectionalIterator> @@ -265,4 +268,6 @@ inline _LIBCPP_HIDE_FROM_ABI void stable_sort(_RandomAccessIterator __first, _Ra _LIBCPP_END_NAMESPACE_STD +_LIBCPP_POP_MACROS + #endif // _LIBCPP___ALGORITHM_STABLE_SORT_H diff --git a/libcxx/include/__algorithm/swap_ranges.h b/libcxx/include/__algorithm/swap_ranges.h index 7fab5c49a656..54b453b72360 100644 --- a/libcxx/include/__algorithm/swap_ranges.h +++ b/libcxx/include/__algorithm/swap_ranges.h @@ -18,6 +18,9 @@ # pragma GCC system_header #endif +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + _LIBCPP_BEGIN_NAMESPACE_STD // 2+2 iterators: the shorter size will be used. @@ -54,4 +57,6 @@ swap_ranges(_ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardItera _LIBCPP_END_NAMESPACE_STD +_LIBCPP_POP_MACROS + #endif // _LIBCPP___ALGORITHM_SWAP_RANGES_H diff --git a/libcxx/include/__algorithm/unique.h b/libcxx/include/__algorithm/unique.h index 1717a00c8a93..056373d06fe4 100644 --- a/libcxx/include/__algorithm/unique.h +++ b/libcxx/include/__algorithm/unique.h @@ -21,6 +21,9 @@ # pragma GCC system_header #endif +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + _LIBCPP_BEGIN_NAMESPACE_STD // unique @@ -56,4 +59,6 @@ unique(_ForwardIterator __first, _ForwardIterator __last) { _LIBCPP_END_NAMESPACE_STD +_LIBCPP_POP_MACROS + #endif // _LIBCPP___ALGORITHM_UNIQUE_H diff --git a/libcxx/include/__algorithm/unique_copy.h b/libcxx/include/__algorithm/unique_copy.h index 81fcd50f011d..16ce80cab32f 100644 --- a/libcxx/include/__algorithm/unique_copy.h +++ b/libcxx/include/__algorithm/unique_copy.h @@ -23,6 +23,9 @@ # pragma GCC system_header #endif +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + _LIBCPP_BEGIN_NAMESPACE_STD namespace __unique_copy_tags { @@ -119,4 +122,6 @@ unique_copy(_InputIterator __first, _InputIterator __last, _OutputIterator __res _LIBCPP_END_NAMESPACE_STD +_LIBCPP_POP_MACROS + #endif // _LIBCPP___ALGORITHM_UNIQUE_COPY_H diff --git a/libcxx/include/__algorithm/unwrap_iter.h b/libcxx/include/__algorithm/unwrap_iter.h index a298a2b27105..50d815c97088 100644 --- a/libcxx/include/__algorithm/unwrap_iter.h +++ b/libcxx/include/__algorithm/unwrap_iter.h @@ -80,6 +80,6 @@ _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR _OrigIter __rewrap_iter(_OrigIter __orig _LIBCPP_END_NAMESPACE_STD -_LIBCPP_PUSH_MACROS +_LIBCPP_POP_MACROS #endif // _LIBCPP___ALGORITHM_UNWRAP_ITER_H diff --git a/libcxx/include/__algorithm/unwrap_range.h b/libcxx/include/__algorithm/unwrap_range.h index 053fd550b302..2d4b9bb5545a 100644 --- a/libcxx/include/__algorithm/unwrap_range.h +++ b/libcxx/include/__algorithm/unwrap_range.h @@ -22,6 +22,9 @@ # pragma GCC system_header #endif +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + _LIBCPP_BEGIN_NAMESPACE_STD // __unwrap_range and __rewrap_range are used to unwrap ranges which may have different iterator and sentinel types. @@ -91,4 +94,6 @@ _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR _Iter __rewrap_range(_Iter __orig_iter, _LIBCPP_END_NAMESPACE_STD +_LIBCPP_POP_MACROS + #endif // _LIBCPP___ALGORITHM_UNWRAP_RANGE_H diff --git a/libcxx/include/__bit_reference b/libcxx/include/__bit_reference index 9032b8f01809..3a5339b72ddc 100644 --- a/libcxx/include/__bit_reference +++ b/libcxx/include/__bit_reference @@ -173,7 +173,7 @@ private: // fill_n -template <bool _FillValue, class _Cp> +template <bool _FillVal, class _Cp> _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI void __fill_n(__bit_iterator<_Cp, false> __first, typename _Cp::size_type __n) { using _It = __bit_iterator<_Cp, false>; @@ -185,7 +185,7 @@ __fill_n(__bit_iterator<_Cp, false> __first, typename _Cp::size_type __n) { __storage_type __clz_f = static_cast<__storage_type>(__bits_per_word - __first.__ctz_); __storage_type __dn = std::min(__clz_f, __n); __storage_type __m = (~__storage_type(0) << __first.__ctz_) & (~__storage_type(0) >> (__clz_f - __dn)); - if (_FillValue) + if (_FillVal) *__first.__seg_ |= __m; else *__first.__seg_ &= ~__m; @@ -194,13 +194,13 @@ __fill_n(__bit_iterator<_Cp, false> __first, typename _Cp::size_type __n) { } // do middle whole words __storage_type __nw = __n / __bits_per_word; - std::fill_n(std::__to_address(__first.__seg_), __nw, _FillValue ? static_cast<__storage_type>(-1) : 0); + std::fill_n(std::__to_address(__first.__seg_), __nw, _FillVal ? static_cast<__storage_type>(-1) : 0); __n -= __nw * __bits_per_word; // do last partial word if (__n > 0) { __first.__seg_ += __nw; __storage_type __m = ~__storage_type(0) >> (__bits_per_word - __n); - if (_FillValue) + if (_FillVal) *__first.__seg_ |= __m; else *__first.__seg_ &= ~__m; @@ -1007,7 +1007,7 @@ private: friend class __bit_iterator<_Cp, true>; template <class _Dp> friend struct __bit_array; - template <bool _FillValue, class _Dp> + template <bool _FillVal, class _Dp> _LIBCPP_CONSTEXPR_SINCE_CXX20 friend void __fill_n(__bit_iterator<_Dp, false> __first, typename _Dp::size_type __n); template <class _Dp, bool _IC> diff --git a/libcxx/include/__config b/libcxx/include/__config index 18875e1b61e6..8b2eaf69d170 100644 --- a/libcxx/include/__config +++ b/libcxx/include/__config @@ -174,11 +174,6 @@ // The implementation moved to the header, but we still export the symbols from // the dylib for backwards compatibility. # define _LIBCPP_ABI_DO_NOT_EXPORT_TO_CHARS_BASE_10 -// Save memory by providing the allocator more freedom to allocate the most -// efficient size class by dropping the alignment requirements for std::string's -// pointer from 16 to 8. This changes the output of std::string::max_size, -// which makes it ABI breaking -# define _LIBCPP_ABI_STRING_8_BYTE_ALIGNMENT # elif _LIBCPP_ABI_VERSION == 1 # if !(defined(_LIBCPP_OBJECT_FORMAT_COFF) || defined(_LIBCPP_OBJECT_FORMAT_XCOFF)) // Enable compiling copies of now inline methods into the dylib to support @@ -1280,8 +1275,8 @@ __sanitizer_verify_double_ended_contiguous_container(const void*, const void*, c # endif // _LIBCPP_ENABLE_CXX20_REMOVED_FEATURES // clang-format off -# define _LIBCPP_PUSH_MACROS _Pragma("push_macro(\"min\")") _Pragma("push_macro(\"max\")") _Pragma("push_macro(\"refresh()\")") _Pragma("push_macro(\"move(int, int)\")") _Pragma("push_macro(\"erase()\")") -# define _LIBCPP_POP_MACROS _Pragma("pop_macro(\"min\")") _Pragma("pop_macro(\"max\")") _Pragma("pop_macro(\"refresh()\")") _Pragma("pop_macro(\"move(int, int)\")") _Pragma("pop_macro(\"erase()\")") +# define _LIBCPP_PUSH_MACROS _Pragma("push_macro(\"min\")") _Pragma("push_macro(\"max\")") _Pragma("push_macro(\"refresh\")") _Pragma("push_macro(\"move\")") _Pragma("push_macro(\"erase\")") +# define _LIBCPP_POP_MACROS _Pragma("pop_macro(\"min\")") _Pragma("pop_macro(\"max\")") _Pragma("pop_macro(\"refresh\")") _Pragma("pop_macro(\"move\")") _Pragma("pop_macro(\"erase\")") // clang-format on # ifndef _LIBCPP_NO_AUTO_LINK diff --git a/libcxx/include/__filesystem/directory_iterator.h b/libcxx/include/__filesystem/directory_iterator.h index 5287a4d8b055..a5aa5ff5432d 100644 --- a/libcxx/include/__filesystem/directory_iterator.h +++ b/libcxx/include/__filesystem/directory_iterator.h @@ -29,6 +29,9 @@ # pragma GCC system_header #endif +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + #if _LIBCPP_STD_VER >= 17 && !defined(_LIBCPP_HAS_NO_FILESYSTEM) _LIBCPP_BEGIN_NAMESPACE_FILESYSTEM @@ -144,4 +147,6 @@ _LIBCPP_AVAILABILITY_FILESYSTEM_LIBRARY inline constexpr bool #endif // _LIBCPP_STD_VER >= 17 && !defined(_LIBCPP_HAS_NO_FILESYSTEM) +_LIBCPP_POP_MACROS + #endif // _LIBCPP___FILESYSTEM_DIRECTORY_ITERATOR_H diff --git a/libcxx/include/__filesystem/path.h b/libcxx/include/__filesystem/path.h index 1ff992dd64e6..8c7d426f7a6f 100644 --- a/libcxx/include/__filesystem/path.h +++ b/libcxx/include/__filesystem/path.h @@ -36,6 +36,9 @@ # pragma GCC system_header #endif +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + #if _LIBCPP_STD_VER >= 17 _LIBCPP_BEGIN_NAMESPACE_FILESYSTEM @@ -925,4 +928,6 @@ _LIBCPP_END_NAMESPACE_STD #endif // _LIBCPP_STD_VER >= 17 +_LIBCPP_POP_MACROS + #endif // _LIBCPP___FILESYSTEM_PATH_H diff --git a/libcxx/include/__filesystem/recursive_directory_iterator.h b/libcxx/include/__filesystem/recursive_directory_iterator.h index 7519cc2f2932..a8af4f73b14a 100644 --- a/libcxx/include/__filesystem/recursive_directory_iterator.h +++ b/libcxx/include/__filesystem/recursive_directory_iterator.h @@ -28,6 +28,9 @@ # pragma GCC system_header #endif +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + #if _LIBCPP_STD_VER >= 17 && !defined(_LIBCPP_HAS_NO_FILESYSTEM) _LIBCPP_BEGIN_NAMESPACE_FILESYSTEM @@ -157,4 +160,6 @@ _LIBCPP_AVAILABILITY_FILESYSTEM_LIBRARY inline constexpr bool #endif // _LIBCPP_STD_VER >= 17 && !defined(_LIBCPP_HAS_NO_FILESYSTEM) +_LIBCPP_POP_MACROS + #endif // _LIBCPP___FILESYSTEM_RECURSIVE_DIRECTORY_ITERATOR_H diff --git a/libcxx/include/__format/format_arg.h b/libcxx/include/__format/format_arg.h index 10fca15d5a7a..34ed9bcd6d63 100644 --- a/libcxx/include/__format/format_arg.h +++ b/libcxx/include/__format/format_arg.h @@ -30,6 +30,9 @@ # pragma GCC system_header #endif +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + _LIBCPP_BEGIN_NAMESPACE_STD #if _LIBCPP_STD_VER >= 20 @@ -289,4 +292,6 @@ _LIBCPP_HIDE_FROM_ABI decltype(auto) visit_format_arg(_Visitor&& __vis, basic_fo _LIBCPP_END_NAMESPACE_STD +_LIBCPP_POP_MACROS + #endif // _LIBCPP___FORMAT_FORMAT_ARG_H diff --git a/libcxx/include/__format/format_context.h b/libcxx/include/__format/format_context.h index 5b252b81f691..edb0348b34f3 100644 --- a/libcxx/include/__format/format_context.h +++ b/libcxx/include/__format/format_context.h @@ -35,6 +35,9 @@ # pragma GCC system_header #endif +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + _LIBCPP_BEGIN_NAMESPACE_STD #if _LIBCPP_STD_VER >= 20 @@ -205,4 +208,6 @@ _LIBCPP_CTAD_SUPPORTED_FOR_TYPE(basic_format_context); _LIBCPP_END_NAMESPACE_STD +_LIBCPP_POP_MACROS + #endif // _LIBCPP___FORMAT_FORMAT_CONTEXT_H diff --git a/libcxx/include/__format/format_functions.h b/libcxx/include/__format/format_functions.h index 015bff70f51d..cf833ad20554 100644 --- a/libcxx/include/__format/format_functions.h +++ b/libcxx/include/__format/format_functions.h @@ -48,6 +48,9 @@ # pragma GCC system_header #endif +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + _LIBCPP_BEGIN_NAMESPACE_STD #if _LIBCPP_STD_VER >= 20 @@ -674,4 +677,6 @@ formatted_size(locale __loc, wformat_string<_Args...> __fmt, _Args&&... __args) _LIBCPP_END_NAMESPACE_STD +_LIBCPP_POP_MACROS + #endif // _LIBCPP___FORMAT_FORMAT_FUNCTIONS diff --git a/libcxx/include/__format/formatter_output.h b/libcxx/include/__format/formatter_output.h index eebe880d69ef..d5038eb158b0 100644 --- a/libcxx/include/__format/formatter_output.h +++ b/libcxx/include/__format/formatter_output.h @@ -35,6 +35,9 @@ # pragma GCC system_header #endif +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + _LIBCPP_BEGIN_NAMESPACE_STD #if _LIBCPP_STD_VER >= 20 @@ -325,4 +328,6 @@ _LIBCPP_HIDE_FROM_ABI int __truncate(basic_string_view<_CharT>& __str, int __pre _LIBCPP_END_NAMESPACE_STD +_LIBCPP_POP_MACROS + #endif // _LIBCPP___FORMAT_FORMATTER_OUTPUT_H diff --git a/libcxx/include/__format/write_escaped.h b/libcxx/include/__format/write_escaped.h index ec1283a173e9..43a074dd8d70 100644 --- a/libcxx/include/__format/write_escaped.h +++ b/libcxx/include/__format/write_escaped.h @@ -30,6 +30,9 @@ # pragma GCC system_header #endif +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + _LIBCPP_BEGIN_NAMESPACE_STD namespace __formatter { @@ -218,4 +221,6 @@ __format_escaped_string(basic_string_view<_CharT> __values, _LIBCPP_END_NAMESPACE_STD +_LIBCPP_POP_MACROS + #endif // _LIBCPP___FORMAT_WRITE_ESCAPED_H diff --git a/libcxx/include/__functional/function.h b/libcxx/include/__functional/function.h index 6505bb587173..416c26a0c73f 100644 --- a/libcxx/include/__functional/function.h +++ b/libcxx/include/__functional/function.h @@ -45,6 +45,9 @@ # pragma GCC system_header #endif +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + #ifndef _LIBCPP_CXX03_LANG _LIBCPP_BEGIN_NAMESPACE_STD @@ -1032,4 +1035,6 @@ _LIBCPP_END_NAMESPACE_STD #endif // _LIBCPP_CXX03_LANG +_LIBCPP_POP_MACROS + #endif // _LIBCPP___FUNCTIONAL_FUNCTION_H diff --git a/libcxx/include/__iterator/cpp17_iterator_concepts.h b/libcxx/include/__iterator/cpp17_iterator_concepts.h index c4f49fe74227..d1ad2b4e2848 100644 --- a/libcxx/include/__iterator/cpp17_iterator_concepts.h +++ b/libcxx/include/__iterator/cpp17_iterator_concepts.h @@ -29,6 +29,9 @@ # pragma GCC system_header #endif +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + #if _LIBCPP_STD_VER >= 20 _LIBCPP_BEGIN_NAMESPACE_STD @@ -182,4 +185,6 @@ _LIBCPP_END_NAMESPACE_STD #endif // _LIBCPP_STD_VER >= 20 +_LIBCPP_POP_MACROS + #endif // _LIBCPP___ITERATOR_CPP17_ITERATOR_CONCEPTS_H diff --git a/libcxx/include/__iterator/iterator_with_data.h b/libcxx/include/__iterator/iterator_with_data.h index 06c2fa699c30..afdc0a4e12e2 100644 --- a/libcxx/include/__iterator/iterator_with_data.h +++ b/libcxx/include/__iterator/iterator_with_data.h @@ -24,6 +24,9 @@ # pragma GCC system_header #endif +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + #if _LIBCPP_STD_VER >= 20 _LIBCPP_BEGIN_NAMESPACE_STD @@ -97,4 +100,6 @@ _LIBCPP_END_NAMESPACE_STD #endif // _LIBCPP_STD_VER >= 20 +_LIBCPP_POP_MACROS + #endif // _LIBCPP___ITERATOR_ITERATOR_WITH_DATA_H diff --git a/libcxx/include/__memory/ranges_uninitialized_algorithms.h b/libcxx/include/__memory/ranges_uninitialized_algorithms.h index d836d00820a6..90090055bbbb 100644 --- a/libcxx/include/__memory/ranges_uninitialized_algorithms.h +++ b/libcxx/include/__memory/ranges_uninitialized_algorithms.h @@ -31,6 +31,9 @@ # pragma GCC system_header #endif +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + _LIBCPP_BEGIN_NAMESPACE_STD #if _LIBCPP_STD_VER >= 20 @@ -317,4 +320,6 @@ inline constexpr auto uninitialized_move_n = __uninitialized_move_n::__fn{}; _LIBCPP_END_NAMESPACE_STD +_LIBCPP_POP_MACROS + #endif // _LIBCPP___MEMORY_RANGES_UNINITIALIZED_ALGORITHMS_H diff --git a/libcxx/include/__memory/raw_storage_iterator.h b/libcxx/include/__memory/raw_storage_iterator.h index 33790a397c84..774878aa1c5e 100644 --- a/libcxx/include/__memory/raw_storage_iterator.h +++ b/libcxx/include/__memory/raw_storage_iterator.h @@ -22,6 +22,9 @@ # pragma GCC system_header #endif +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + _LIBCPP_BEGIN_NAMESPACE_STD #if _LIBCPP_STD_VER <= 17 || defined(_LIBCPP_ENABLE_CXX20_REMOVED_RAW_STORAGE_ITERATOR) @@ -79,4 +82,6 @@ public: _LIBCPP_END_NAMESPACE_STD +_LIBCPP_POP_MACROS + #endif // _LIBCPP___MEMORY_RAW_STORAGE_ITERATOR_H diff --git a/libcxx/include/__memory/shared_ptr.h b/libcxx/include/__memory/shared_ptr.h index 9a73d439306d..e6de615d76fa 100644 --- a/libcxx/include/__memory/shared_ptr.h +++ b/libcxx/include/__memory/shared_ptr.h @@ -61,6 +61,9 @@ # pragma GCC system_header #endif +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + _LIBCPP_BEGIN_NAMESPACE_STD // NOTE: Relaxed and acq/rel atomics (for increment and decrement respectively) @@ -1662,4 +1665,6 @@ inline _LIBCPP_HIDE_FROM_ABI bool atomic_compare_exchange_weak_explicit( _LIBCPP_END_NAMESPACE_STD +_LIBCPP_POP_MACROS + #endif // _LIBCPP___MEMORY_SHARED_PTR_H diff --git a/libcxx/include/__memory/uninitialized_algorithms.h b/libcxx/include/__memory/uninitialized_algorithms.h index 2a4ecf655be2..9aff93a89648 100644 --- a/libcxx/include/__memory/uninitialized_algorithms.h +++ b/libcxx/include/__memory/uninitialized_algorithms.h @@ -42,6 +42,9 @@ # pragma GCC system_header #endif +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + _LIBCPP_BEGIN_NAMESPACE_STD struct __always_false { @@ -648,4 +651,6 @@ __uninitialized_allocator_move_if_noexcept(_Alloc&, _Iter1 __first1, _Iter1 __la _LIBCPP_END_NAMESPACE_STD +_LIBCPP_POP_MACROS + #endif // _LIBCPP___MEMORY_UNINITIALIZED_ALGORITHMS_H diff --git a/libcxx/include/__mutex/once_flag.h b/libcxx/include/__mutex/once_flag.h index 5a6f8e09055f..9d7baecbc708 100644 --- a/libcxx/include/__mutex/once_flag.h +++ b/libcxx/include/__mutex/once_flag.h @@ -25,6 +25,9 @@ # pragma GCC system_header #endif +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + _LIBCPP_BEGIN_NAMESPACE_STD struct _LIBCPP_TEMPLATE_VIS once_flag; @@ -151,4 +154,6 @@ inline _LIBCPP_HIDE_FROM_ABI void call_once(once_flag& __flag, const _Callable& _LIBCPP_END_NAMESPACE_STD +_LIBCPP_POP_MACROS + #endif // _LIBCPP___MUTEX_ONCE_FLAG_H diff --git a/libcxx/include/__numeric/pstl_reduce.h b/libcxx/include/__numeric/pstl_reduce.h index b19972a46db7..f9f666c2bb38 100644 --- a/libcxx/include/__numeric/pstl_reduce.h +++ b/libcxx/include/__numeric/pstl_reduce.h @@ -20,6 +20,9 @@ # pragma GCC system_header #endif +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + #if !defined(_LIBCPP_HAS_NO_INCOMPLETE_PSTL) && _LIBCPP_STD_VER >= 17 _LIBCPP_BEGIN_NAMESPACE_STD @@ -101,4 +104,6 @@ _LIBCPP_END_NAMESPACE_STD #endif // !defined(_LIBCPP_HAS_NO_INCOMPLETE_PSTL) && _LIBCPP_STD_VER >= 17 +_LIBCPP_POP_MACROS + #endif // _LIBCPP___NUMERIC_PSTL_REDUCE_H diff --git a/libcxx/include/__numeric/pstl_transform_reduce.h b/libcxx/include/__numeric/pstl_transform_reduce.h index 112772604666..2f412d41f7f2 100644 --- a/libcxx/include/__numeric/pstl_transform_reduce.h +++ b/libcxx/include/__numeric/pstl_transform_reduce.h @@ -22,6 +22,9 @@ # pragma GCC system_header #endif +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + #if !defined(_LIBCPP_HAS_NO_INCOMPLETE_PSTL) && _LIBCPP_STD_VER >= 17 _LIBCPP_BEGIN_NAMESPACE_STD @@ -148,4 +151,6 @@ _LIBCPP_END_NAMESPACE_STD #endif // !defined(_LIBCPP_HAS_NO_INCOMPLETE_PSTL) && _LIBCPP_STD_VER >= 17 +_LIBCPP_POP_MACROS + #endif // _LIBCPP___NUMERIC_PSTL_TRANSFORM_REDUCE_H diff --git a/libcxx/include/__numeric/reduce.h b/libcxx/include/__numeric/reduce.h index 1aeefce132b2..6c205bf581fb 100644 --- a/libcxx/include/__numeric/reduce.h +++ b/libcxx/include/__numeric/reduce.h @@ -19,6 +19,9 @@ # pragma GCC system_header #endif +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + _LIBCPP_BEGIN_NAMESPACE_STD #if _LIBCPP_STD_VER >= 17 @@ -45,4 +48,6 @@ reduce(_InputIterator __first, _InputIterator __last) { _LIBCPP_END_NAMESPACE_STD +_LIBCPP_POP_MACROS + #endif // _LIBCPP___NUMERIC_REDUCE_H diff --git a/libcxx/include/__numeric/saturation_arithmetic.h b/libcxx/include/__numeric/saturation_arithmetic.h index 50274c6bbd9f..0e6f455cf228 100644 --- a/libcxx/include/__numeric/saturation_arithmetic.h +++ b/libcxx/include/__numeric/saturation_arithmetic.h @@ -19,6 +19,9 @@ # pragma GCC system_header #endif +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + _LIBCPP_BEGIN_NAMESPACE_STD #if _LIBCPP_STD_VER >= 26 @@ -107,4 +110,6 @@ _LIBCPP_HIDE_FROM_ABI constexpr _Rp saturate_cast(_Tp __x) noexcept { _LIBCPP_END_NAMESPACE_STD +_LIBCPP_POP_MACROS + #endif // _LIBCPP___NUMERIC_SATURATION_ARITHMETIC_H diff --git a/libcxx/include/__numeric/transform_reduce.h b/libcxx/include/__numeric/transform_reduce.h index 6c0a81e5e4b0..f1150510f0c3 100644 --- a/libcxx/include/__numeric/transform_reduce.h +++ b/libcxx/include/__numeric/transform_reduce.h @@ -18,6 +18,9 @@ # pragma GCC system_header #endif +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + _LIBCPP_BEGIN_NAMESPACE_STD #if _LIBCPP_STD_VER >= 17 @@ -51,4 +54,6 @@ transform_reduce(_InputIterator1 __first1, _InputIterator1 __last1, _InputIterat _LIBCPP_END_NAMESPACE_STD +_LIBCPP_POP_MACROS + #endif // _LIBCPP___NUMERIC_TRANSFORM_REDUCE_H diff --git a/libcxx/include/__ranges/counted.h b/libcxx/include/__ranges/counted.h index 337634895766..83d76f8fd210 100644 --- a/libcxx/include/__ranges/counted.h +++ b/libcxx/include/__ranges/counted.h @@ -29,6 +29,9 @@ # pragma GCC system_header #endif +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + _LIBCPP_BEGIN_NAMESPACE_STD #if _LIBCPP_STD_VER >= 20 @@ -82,4 +85,6 @@ inline constexpr auto counted = __counted::__fn{}; _LIBCPP_END_NAMESPACE_STD +_LIBCPP_POP_MACROS + #endif // _LIBCPP___RANGES_COUNTED_H diff --git a/libcxx/include/__ranges/drop_while_view.h b/libcxx/include/__ranges/drop_while_view.h index 4e3ef61678f4..92f48bd0ecfb 100644 --- a/libcxx/include/__ranges/drop_while_view.h +++ b/libcxx/include/__ranges/drop_while_view.h @@ -37,6 +37,9 @@ # pragma GCC system_header #endif +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + _LIBCPP_BEGIN_NAMESPACE_STD #if _LIBCPP_STD_VER >= 20 @@ -128,4 +131,6 @@ inline constexpr auto drop_while = __drop_while::__fn{}; _LIBCPP_END_NAMESPACE_STD +_LIBCPP_POP_MACROS + #endif // _LIBCPP___RANGES_DROP_WHILE_VIEW_H diff --git a/libcxx/include/__ranges/elements_view.h b/libcxx/include/__ranges/elements_view.h index 325e4c9dca63..989d36fbcaaa 100644 --- a/libcxx/include/__ranges/elements_view.h +++ b/libcxx/include/__ranges/elements_view.h @@ -43,6 +43,9 @@ # pragma GCC system_header #endif +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + _LIBCPP_BEGIN_NAMESPACE_STD #if _LIBCPP_STD_VER >= 20 @@ -410,4 +413,6 @@ inline constexpr auto values = elements<1>; _LIBCPP_END_NAMESPACE_STD +_LIBCPP_POP_MACROS + #endif // _LIBCPP___RANGES_ELEMENTS_VIEW_H diff --git a/libcxx/include/__ranges/filter_view.h b/libcxx/include/__ranges/filter_view.h index 6e6719c14470..5b938dd4c16e 100644 --- a/libcxx/include/__ranges/filter_view.h +++ b/libcxx/include/__ranges/filter_view.h @@ -44,6 +44,9 @@ # pragma GCC system_header #endif +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + _LIBCPP_BEGIN_NAMESPACE_STD #if _LIBCPP_STD_VER >= 20 @@ -252,4 +255,6 @@ inline constexpr auto filter = __filter::__fn{}; _LIBCPP_END_NAMESPACE_STD +_LIBCPP_POP_MACROS + #endif // _LIBCPP___RANGES_FILTER_VIEW_H diff --git a/libcxx/include/__ranges/iota_view.h b/libcxx/include/__ranges/iota_view.h index c6c9618cfe6c..c8314dd848b4 100644 --- a/libcxx/include/__ranges/iota_view.h +++ b/libcxx/include/__ranges/iota_view.h @@ -41,6 +41,9 @@ # pragma GCC system_header #endif +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + _LIBCPP_BEGIN_NAMESPACE_STD #if _LIBCPP_STD_VER >= 20 @@ -395,4 +398,6 @@ inline constexpr auto iota = __iota::__fn{}; _LIBCPP_END_NAMESPACE_STD +_LIBCPP_POP_MACROS + #endif // _LIBCPP___RANGES_IOTA_VIEW_H diff --git a/libcxx/include/__ranges/join_view.h b/libcxx/include/__ranges/join_view.h index 22473059133f..9c2c77995539 100644 --- a/libcxx/include/__ranges/join_view.h +++ b/libcxx/include/__ranges/join_view.h @@ -41,6 +41,9 @@ # pragma GCC system_header #endif +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + _LIBCPP_BEGIN_NAMESPACE_STD #if _LIBCPP_STD_VER >= 20 @@ -415,4 +418,6 @@ struct __segmented_iterator_traits<_JoinViewIterator> { _LIBCPP_END_NAMESPACE_STD +_LIBCPP_POP_MACROS + #endif // _LIBCPP___RANGES_JOIN_VIEW_H diff --git a/libcxx/include/__ranges/lazy_split_view.h b/libcxx/include/__ranges/lazy_split_view.h index e96398b14b58..6aedfdabffe3 100644 --- a/libcxx/include/__ranges/lazy_split_view.h +++ b/libcxx/include/__ranges/lazy_split_view.h @@ -47,6 +47,9 @@ # pragma GCC system_header #endif +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + _LIBCPP_BEGIN_NAMESPACE_STD #if _LIBCPP_STD_VER >= 20 @@ -433,4 +436,6 @@ inline constexpr auto lazy_split = __lazy_split_view::__fn{}; _LIBCPP_END_NAMESPACE_STD +_LIBCPP_POP_MACROS + #endif // _LIBCPP___RANGES_LAZY_SPLIT_VIEW_H diff --git a/libcxx/include/__ranges/repeat_view.h b/libcxx/include/__ranges/repeat_view.h index d9759abe1cba..d08f0e0d4e9f 100644 --- a/libcxx/include/__ranges/repeat_view.h +++ b/libcxx/include/__ranges/repeat_view.h @@ -34,6 +34,9 @@ # pragma GCC system_header #endif +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + _LIBCPP_BEGIN_NAMESPACE_STD #if _LIBCPP_STD_VER >= 23 @@ -257,4 +260,6 @@ inline constexpr bool __is_repeat_specialization<repeat_view<_Tp, _Bound>> = tru _LIBCPP_END_NAMESPACE_STD +_LIBCPP_POP_MACROS + #endif // _LIBCPP___RANGES_REPEAT_VIEW_H diff --git a/libcxx/include/__ranges/reverse_view.h b/libcxx/include/__ranges/reverse_view.h index f7846259810c..ddbe8908414f 100644 --- a/libcxx/include/__ranges/reverse_view.h +++ b/libcxx/include/__ranges/reverse_view.h @@ -33,6 +33,9 @@ # pragma GCC system_header #endif +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + _LIBCPP_BEGIN_NAMESPACE_STD #if _LIBCPP_STD_VER >= 20 @@ -196,4 +199,6 @@ inline constexpr auto reverse = __reverse::__fn{}; _LIBCPP_END_NAMESPACE_STD +_LIBCPP_POP_MACROS + #endif // _LIBCPP___RANGES_REVERSE_VIEW_H diff --git a/libcxx/include/__ranges/single_view.h b/libcxx/include/__ranges/single_view.h index ead597a9be17..f91c7c352636 100644 --- a/libcxx/include/__ranges/single_view.h +++ b/libcxx/include/__ranges/single_view.h @@ -26,6 +26,9 @@ # pragma GCC system_header #endif +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + _LIBCPP_BEGIN_NAMESPACE_STD #if _LIBCPP_STD_VER >= 20 @@ -101,4 +104,6 @@ inline constexpr auto single = __single_view::__fn{}; _LIBCPP_END_NAMESPACE_STD +_LIBCPP_POP_MACROS + #endif // _LIBCPP___RANGES_SINGLE_VIEW_H diff --git a/libcxx/include/__ranges/split_view.h b/libcxx/include/__ranges/split_view.h index 7f03be3c346a..98f17be04f62 100644 --- a/libcxx/include/__ranges/split_view.h +++ b/libcxx/include/__ranges/split_view.h @@ -36,6 +36,9 @@ # pragma GCC system_header #endif +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + _LIBCPP_BEGIN_NAMESPACE_STD #if _LIBCPP_STD_VER >= 20 @@ -224,4 +227,6 @@ inline constexpr auto split = __split_view::__fn{}; _LIBCPP_END_NAMESPACE_STD +_LIBCPP_POP_MACROS + #endif // _LIBCPP___RANGES_SPLIT_VIEW_H diff --git a/libcxx/include/__ranges/take_while_view.h b/libcxx/include/__ranges/take_while_view.h index 46cfe4f70ac8..77ea9f7bb813 100644 --- a/libcxx/include/__ranges/take_while_view.h +++ b/libcxx/include/__ranges/take_while_view.h @@ -35,6 +35,9 @@ # pragma GCC system_header #endif +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + _LIBCPP_BEGIN_NAMESPACE_STD #if _LIBCPP_STD_VER >= 20 @@ -162,4 +165,6 @@ inline constexpr auto take_while = __take_while::__fn{}; _LIBCPP_END_NAMESPACE_STD +_LIBCPP_POP_MACROS + #endif // _LIBCPP___RANGES_TAKE_WHILE_VIEW_H diff --git a/libcxx/include/__ranges/transform_view.h b/libcxx/include/__ranges/transform_view.h index 3c8d825789cb..dc3aaa59ed8c 100644 --- a/libcxx/include/__ranges/transform_view.h +++ b/libcxx/include/__ranges/transform_view.h @@ -47,6 +47,9 @@ # pragma GCC system_header #endif +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + _LIBCPP_BEGIN_NAMESPACE_STD #if _LIBCPP_STD_VER >= 20 @@ -416,4 +419,6 @@ inline constexpr auto transform = __transform::__fn{}; _LIBCPP_END_NAMESPACE_STD +_LIBCPP_POP_MACROS + #endif // _LIBCPP___RANGES_TRANSFORM_VIEW_H diff --git a/libcxx/include/__thread/jthread.h b/libcxx/include/__thread/jthread.h index fc86b13afb13..2fbc8a36755e 100644 --- a/libcxx/include/__thread/jthread.h +++ b/libcxx/include/__thread/jthread.h @@ -28,6 +28,9 @@ # pragma GCC system_header #endif +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + #if _LIBCPP_STD_VER >= 20 && !defined(_LIBCPP_HAS_NO_EXPERIMENTAL_STOP_TOKEN) _LIBCPP_BEGIN_NAMESPACE_STD @@ -127,4 +130,6 @@ _LIBCPP_END_NAMESPACE_STD #endif // _LIBCPP_STD_VER >= 20 && !defined(_LIBCPP_HAS_NO_EXPERIMENTAL_STOP_TOKEN) +_LIBCPP_POP_MACROS + #endif // _LIBCPP___THREAD_JTHREAD_H diff --git a/libcxx/include/__thread/thread.h b/libcxx/include/__thread/thread.h index 463bbd677255..0ecaac1b011b 100644 --- a/libcxx/include/__thread/thread.h +++ b/libcxx/include/__thread/thread.h @@ -32,6 +32,9 @@ # pragma GCC system_header #endif +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + _LIBCPP_BEGIN_NAMESPACE_STD template <class _Tp> @@ -251,4 +254,6 @@ inline _LIBCPP_HIDE_FROM_ABI void swap(thread& __x, thread& __y) _NOEXCEPT { __x _LIBCPP_END_NAMESPACE_STD +_LIBCPP_POP_MACROS + #endif // _LIBCPP___THREAD_THREAD_H diff --git a/libcxx/include/array b/libcxx/include/array index dcb419f536dc..41f016a4859a 100644 --- a/libcxx/include/array +++ b/libcxx/include/array @@ -159,6 +159,9 @@ template <size_t I, class T, size_t N> const T&& get(const array<T, N>&&) noexce # pragma GCC system_header #endif +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + _LIBCPP_BEGIN_NAMESPACE_STD template <class _Tp, size_t _Size> @@ -493,6 +496,8 @@ to_array(_Tp (&&__arr)[_Size]) noexcept(is_nothrow_move_constructible_v<_Tp>) { _LIBCPP_END_NAMESPACE_STD +_LIBCPP_POP_MACROS + #if !defined(_LIBCPP_REMOVE_TRANSITIVE_INCLUDES) && _LIBCPP_STD_VER <= 20 # include <algorithm> # include <concepts> diff --git a/libcxx/include/condition_variable b/libcxx/include/condition_variable index e375c986e7f1..6aac3c13ef4a 100644 --- a/libcxx/include/condition_variable +++ b/libcxx/include/condition_variable @@ -139,6 +139,9 @@ public: # pragma GCC system_header #endif +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + #ifndef _LIBCPP_HAS_NO_THREADS _LIBCPP_BEGIN_NAMESPACE_STD @@ -348,6 +351,8 @@ _LIBCPP_END_NAMESPACE_STD #endif // !_LIBCPP_HAS_NO_THREADS +_LIBCPP_POP_MACROS + #if !defined(_LIBCPP_REMOVE_TRANSITIVE_INCLUDES) && _LIBCPP_STD_VER <= 20 # include <atomic> # include <concepts> diff --git a/libcxx/include/experimental/iterator b/libcxx/include/experimental/iterator index 5bb1dd1ada63..e9c1fb6924ec 100644 --- a/libcxx/include/experimental/iterator +++ b/libcxx/include/experimental/iterator @@ -64,6 +64,9 @@ namespace std { # pragma GCC system_header #endif +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + #if _LIBCPP_STD_VER >= 14 _LIBCPP_BEGIN_NAMESPACE_LFTS @@ -115,6 +118,8 @@ _LIBCPP_END_NAMESPACE_LFTS #endif // _LIBCPP_STD_VER >= 14 +_LIBCPP_POP_MACROS + #if !defined(_LIBCPP_REMOVE_TRANSITIVE_INCLUDES) && _LIBCPP_STD_VER <= 20 # include <iosfwd> # include <type_traits> diff --git a/libcxx/include/future b/libcxx/include/future index 5602ae41c142..4eeb401c9bbc 100644 --- a/libcxx/include/future +++ b/libcxx/include/future @@ -401,6 +401,9 @@ template <class R, class Alloc> struct uses_allocator<packaged_task<R>, Alloc>; # pragma GCC system_header #endif +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + _LIBCPP_BEGIN_NAMESPACE_STD // enum class future_errc @@ -2044,6 +2047,8 @@ inline shared_future<void> future<void>::share() _NOEXCEPT { return shared_futur _LIBCPP_END_NAMESPACE_STD +_LIBCPP_POP_MACROS + #if !defined(_LIBCPP_REMOVE_TRANSITIVE_INCLUDES) && _LIBCPP_STD_VER <= 17 # include <chrono> #endif diff --git a/libcxx/include/ios b/libcxx/include/ios index d36f5fb2ca28..8465860d08dc 100644 --- a/libcxx/include/ios +++ b/libcxx/include/ios @@ -242,6 +242,9 @@ storage-class-specifier const error_category& iostream_category() noexcept; # pragma GCC system_header #endif +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + _LIBCPP_BEGIN_NAMESPACE_STD typedef ptrdiff_t streamsize; @@ -820,6 +823,8 @@ _LIBCPP_HIDE_FROM_ABI inline ios_base& defaultfloat(ios_base& __str) { _LIBCPP_END_NAMESPACE_STD +_LIBCPP_POP_MACROS + #if !defined(_LIBCPP_REMOVE_TRANSITIVE_INCLUDES) && _LIBCPP_STD_VER <= 20 # include <atomic> # include <concepts> diff --git a/libcxx/include/map b/libcxx/include/map index f122f2ebb15b..2edbc0cf6245 100644 --- a/libcxx/include/map +++ b/libcxx/include/map @@ -617,6 +617,9 @@ erase_if(multimap<Key, T, Compare, Allocator>& c, Predicate pred); // C++20 # pragma GCC system_header #endif +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + _LIBCPP_BEGIN_NAMESPACE_STD template <class _Key, @@ -2182,6 +2185,8 @@ using multimap _LIBCPP_AVAILABILITY_PMR = _LIBCPP_END_NAMESPACE_STD #endif +_LIBCPP_POP_MACROS + #if !defined(_LIBCPP_REMOVE_TRANSITIVE_INCLUDES) && _LIBCPP_STD_VER <= 20 # include <concepts> # include <cstdlib> diff --git a/libcxx/include/ostream b/libcxx/include/ostream index e2b2c0cbaaf2..180adda201d8 100644 --- a/libcxx/include/ostream +++ b/libcxx/include/ostream @@ -199,6 +199,9 @@ void vprint_nonunicode(ostream& os, string_view fmt, format_args args); # pragma GCC system_header #endif +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + _LIBCPP_BEGIN_NAMESPACE_STD template <class _CharT, class _Traits> @@ -1169,6 +1172,8 @@ println(ostream& __os, format_string<_Args...> __fmt, _Args&&... __args) { _LIBCPP_END_NAMESPACE_STD +_LIBCPP_POP_MACROS + #if !defined(_LIBCPP_REMOVE_TRANSITIVE_INCLUDES) && _LIBCPP_STD_VER <= 20 # include <atomic> # include <concepts> diff --git a/libcxx/include/queue b/libcxx/include/queue index 692e38bb3522..76ef85945662 100644 --- a/libcxx/include/queue +++ b/libcxx/include/queue @@ -283,6 +283,9 @@ template <class T, class Container, class Compare> # pragma GCC system_header #endif +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + _LIBCPP_BEGIN_NAMESPACE_STD template <class _Tp, class _Container = deque<_Tp> > @@ -971,6 +974,8 @@ struct _LIBCPP_TEMPLATE_VIS uses_allocator<priority_queue<_Tp, _Container, _Comp _LIBCPP_END_NAMESPACE_STD +_LIBCPP_POP_MACROS + #if !defined(_LIBCPP_REMOVE_TRANSITIVE_INCLUDES) && _LIBCPP_STD_VER <= 20 # include <concepts> # include <cstdlib> diff --git a/libcxx/include/set b/libcxx/include/set index 55ba8f8208be..7f8245f8b605 100644 --- a/libcxx/include/set +++ b/libcxx/include/set @@ -552,6 +552,9 @@ erase_if(multiset<Key, Compare, Allocator>& c, Predicate pred); // C++20 # pragma GCC system_header #endif +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + _LIBCPP_BEGIN_NAMESPACE_STD template <class _Key, class _Compare, class _Allocator> @@ -1488,6 +1491,8 @@ using multiset _LIBCPP_AVAILABILITY_PMR = std::multiset<_KeyT, _CompareT, polymo _LIBCPP_END_NAMESPACE_STD #endif +_LIBCPP_POP_MACROS + #if !defined(_LIBCPP_REMOVE_TRANSITIVE_INCLUDES) && _LIBCPP_STD_VER <= 20 # include <concepts> # include <cstdlib> diff --git a/libcxx/include/stack b/libcxx/include/stack index 546380b0aacd..f1f6ee8482fd 100644 --- a/libcxx/include/stack +++ b/libcxx/include/stack @@ -138,6 +138,9 @@ template <class T, class Container> # pragma GCC system_header #endif +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + _LIBCPP_BEGIN_NAMESPACE_STD template <class _Tp, class _Container = deque<_Tp> > @@ -366,6 +369,8 @@ struct _LIBCPP_TEMPLATE_VIS uses_allocator<stack<_Tp, _Container>, _Alloc> : pub _LIBCPP_END_NAMESPACE_STD +_LIBCPP_POP_MACROS + #if !defined(_LIBCPP_REMOVE_TRANSITIVE_INCLUDES) && _LIBCPP_STD_VER <= 20 # include <concepts> # include <functional> diff --git a/libcxx/include/string b/libcxx/include/string index e97139206d4f..ba169c3dbfc9 100644 --- a/libcxx/include/string +++ b/libcxx/include/string @@ -1937,12 +1937,7 @@ private: return (__s + (__a - 1)) & ~(__a - 1); } enum { - __alignment = -#ifdef _LIBCPP_ABI_STRING_8_BYTE_ALIGNMENT - 8 -#else - 16 -#endif + __alignment = 8 }; static _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 size_type __recommend(size_type __s) _NOEXCEPT { if (__s < __min_cap) { diff --git a/libcxx/include/strstream b/libcxx/include/strstream index 7843184e4da4..e20c86baa6df 100644 --- a/libcxx/include/strstream +++ b/libcxx/include/strstream @@ -139,6 +139,9 @@ private: # pragma GCC system_header #endif +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + _LIBCPP_BEGIN_NAMESPACE_STD class _LIBCPP_DEPRECATED _LIBCPP_EXPORTED_FROM_ABI strstreambuf : public streambuf { @@ -340,4 +343,6 @@ private: _LIBCPP_END_NAMESPACE_STD +_LIBCPP_POP_MACROS + #endif // _LIBCPP_STRSTREAM diff --git a/libcxx/include/unordered_map b/libcxx/include/unordered_map index 4be25fc1cdd8..2c1782dc879e 100644 --- a/libcxx/include/unordered_map +++ b/libcxx/include/unordered_map @@ -625,6 +625,9 @@ template <class Key, class T, class Hash, class Pred, class Alloc> # pragma GCC system_header #endif +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + _LIBCPP_BEGIN_NAMESPACE_STD template <class _Key, @@ -2544,6 +2547,8 @@ using unordered_multimap _LIBCPP_AVAILABILITY_PMR = _LIBCPP_END_NAMESPACE_STD #endif +_LIBCPP_POP_MACROS + #if !defined(_LIBCPP_REMOVE_TRANSITIVE_INCLUDES) && _LIBCPP_STD_VER <= 20 # include <algorithm> # include <bit> diff --git a/libcxx/include/unordered_set b/libcxx/include/unordered_set index 6414885f4c51..50b616907f00 100644 --- a/libcxx/include/unordered_set +++ b/libcxx/include/unordered_set @@ -570,6 +570,9 @@ template <class Value, class Hash, class Pred, class Alloc> # pragma GCC system_header #endif +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + _LIBCPP_BEGIN_NAMESPACE_STD template <class _Value, class _Hash, class _Pred, class _Alloc> @@ -1810,6 +1813,8 @@ using unordered_multiset _LIBCPP_AVAILABILITY_PMR = _LIBCPP_END_NAMESPACE_STD #endif +_LIBCPP_POP_MACROS + #if !defined(_LIBCPP_REMOVE_TRANSITIVE_INCLUDES) && _LIBCPP_STD_VER <= 20 # include <concepts> # include <cstdlib> diff --git a/libcxx/include/version b/libcxx/include/version index 9e26da8c1b24..d356976d6454 100644 --- a/libcxx/include/version +++ b/libcxx/include/version @@ -266,7 +266,9 @@ __cpp_lib_within_lifetime 202306L <type_traits> # define __cpp_lib_make_reverse_iterator 201402L # define __cpp_lib_make_unique 201304L # define __cpp_lib_null_iterators 201304L -# define __cpp_lib_quoted_string_io 201304L +# if !defined(_LIBCPP_HAS_NO_LOCALIZATION) +# define __cpp_lib_quoted_string_io 201304L +# endif # define __cpp_lib_result_of_sfinae 201210L # define __cpp_lib_robust_nonmodifying_seq_ops 201304L # if !defined(_LIBCPP_HAS_NO_THREADS) @@ -294,7 +296,7 @@ __cpp_lib_within_lifetime 202306L <type_traits> # define __cpp_lib_clamp 201603L # define __cpp_lib_enable_shared_from_this 201603L // # define __cpp_lib_execution 201603L -# if _LIBCPP_AVAILABILITY_HAS_FILESYSTEM_LIBRARY +# if !defined(_LIBCPP_HAS_NO_FILESYSTEM) && _LIBCPP_AVAILABILITY_HAS_FILESYSTEM_LIBRARY # define __cpp_lib_filesystem 201703L # endif # define __cpp_lib_gcd_lcm 201606L @@ -323,7 +325,9 @@ __cpp_lib_within_lifetime 202306L <type_traits> // # define __cpp_lib_parallel_algorithm 201603L # define __cpp_lib_raw_memory_algorithms 201606L # define __cpp_lib_sample 201603L -# define __cpp_lib_scoped_lock 201703L +# if !defined(_LIBCPP_HAS_NO_THREADS) +# define __cpp_lib_scoped_lock 201703L +# endif # if !defined(_LIBCPP_HAS_NO_THREADS) # define __cpp_lib_shared_mutex 201505L # endif @@ -496,7 +500,9 @@ __cpp_lib_within_lifetime 202306L <type_traits> // # define __cpp_lib_freestanding_optional 202311L // # define __cpp_lib_freestanding_string_view 202311L // # define __cpp_lib_freestanding_variant 202311L -# define __cpp_lib_fstream_native_handle 202306L +# if !defined(_LIBCPP_HAS_NO_FILESYSTEM) && !defined(_LIBCPP_HAS_NO_LOCALIZATION) +# define __cpp_lib_fstream_native_handle 202306L +# endif // # define __cpp_lib_function_ref 202306L // # define __cpp_lib_hazard_pointer 202306L // # define __cpp_lib_linalg 202311L diff --git a/libcxx/modules/std/atomic.inc b/libcxx/modules/std/atomic.inc index 5139b7531093..88b31ccdb208 100644 --- a/libcxx/modules/std/atomic.inc +++ b/libcxx/modules/std/atomic.inc @@ -60,7 +60,9 @@ export namespace std { using std::atomic_char; using std::atomic_char16_t; using std::atomic_char32_t; +#ifndef _LIBCPP_HAS_NO_CHAR8_T using std::atomic_char8_t; +#endif using std::atomic_int; using std::atomic_llong; using std::atomic_long; diff --git a/libcxx/modules/std/iosfwd.inc b/libcxx/modules/std/iosfwd.inc index ec8b434ca0c5..410fb6aefed8 100644 --- a/libcxx/modules/std/iosfwd.inc +++ b/libcxx/modules/std/iosfwd.inc @@ -14,7 +14,9 @@ export namespace std { #endif using std::u16streampos; using std::u32streampos; +#ifndef _LIBCPP_HAS_NO_CHAR8_T using std::u8streampos; +#endif using std::basic_osyncstream; using std::basic_syncbuf; diff --git a/libcxx/modules/std/string.inc b/libcxx/modules/std/string.inc index c83ee7643f87..9808a96215a1 100644 --- a/libcxx/modules/std/string.inc +++ b/libcxx/modules/std/string.inc @@ -34,7 +34,9 @@ export namespace std { using std::string; using std::u16string; using std::u32string; +#ifndef _LIBCPP_HAS_NO_CHAR8_T using std::u8string; +#endif #ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS using std::wstring; #endif @@ -58,7 +60,9 @@ export namespace std { using std::pmr::string; using std::pmr::u16string; using std::pmr::u32string; +#ifndef _LIBCPP_HAS_NO_CHAR8_T using std::pmr::u8string; +#endif #ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS using std::pmr::wstring; #endif diff --git a/libcxx/modules/std/string_view.inc b/libcxx/modules/std/string_view.inc index 1fa63a773953..f4f9d80ddb83 100644 --- a/libcxx/modules/std/string_view.inc +++ b/libcxx/modules/std/string_view.inc @@ -27,7 +27,9 @@ export namespace std { using std::string_view; using std::u16string_view; using std::u32string_view; +#ifndef _LIBCPP_HAS_NO_CHAR8_T using std::u8string_view; +#endif #ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS using std::wstring_view; #endif diff --git a/lld/ELF/Arch/RISCV.cpp b/lld/ELF/Arch/RISCV.cpp index d7d3d3e47814..8ce92b4badfb 100644 --- a/lld/ELF/Arch/RISCV.cpp +++ b/lld/ELF/Arch/RISCV.cpp @@ -61,6 +61,7 @@ enum Op { AUIPC = 0x17, JALR = 0x67, LD = 0x3003, + LUI = 0x37, LW = 0x2003, SRLI = 0x5013, SUB = 0x40000033, @@ -73,6 +74,7 @@ enum Reg { X_T0 = 5, X_T1 = 6, X_T2 = 7, + X_A0 = 10, X_T3 = 28, }; @@ -102,6 +104,26 @@ static uint32_t setLO12_S(uint32_t insn, uint32_t imm) { (extractBits(imm, 4, 0) << 7); } +namespace { +struct SymbolAnchor { + uint64_t offset; + Defined *d; + bool end; // true for the anchor of st_value+st_size +}; +} // namespace + +struct elf::RISCVRelaxAux { + // This records symbol start and end offsets which will be adjusted according + // to the nearest relocDeltas element. + SmallVector<SymbolAnchor, 0> anchors; + // For relocations[i], the actual offset is + // r_offset - (i ? relocDeltas[i-1] : 0). + std::unique_ptr<uint32_t[]> relocDeltas; + // For relocations[i], the actual type is relocTypes[i]. + std::unique_ptr<RelType[]> relocTypes; + SmallVector<uint32_t, 0> writes; +}; + RISCV::RISCV() { copyRel = R_RISCV_COPY; pltRel = R_RISCV_JUMP_SLOT; @@ -119,6 +141,7 @@ RISCV::RISCV() { tlsGotRel = R_RISCV_TLS_TPREL32; } gotRel = symbolicRel; + tlsDescRel = R_RISCV_TLSDESC; // .got[0] = _DYNAMIC gotHeaderEntriesNum = 1; @@ -187,6 +210,8 @@ int64_t RISCV::getImplicitAddend(const uint8_t *buf, RelType type) const { case R_RISCV_JUMP_SLOT: // These relocations are defined as not having an implicit addend. return 0; + case R_RISCV_TLSDESC: + return config->is64 ? read64le(buf + 8) : read32le(buf + 4); } } @@ -295,6 +320,12 @@ RelExpr RISCV::getRelExpr(const RelType type, const Symbol &s, case R_RISCV_PCREL_LO12_I: case R_RISCV_PCREL_LO12_S: return R_RISCV_PC_INDIRECT; + case R_RISCV_TLSDESC_HI20: + case R_RISCV_TLSDESC_LOAD_LO12: + case R_RISCV_TLSDESC_ADD_LO12: + return R_TLSDESC_PC; + case R_RISCV_TLSDESC_CALL: + return R_TLSDESC_CALL; case R_RISCV_TLS_GD_HI20: return R_TLSGD_PC; case R_RISCV_TLS_GOT_HI20: @@ -419,6 +450,7 @@ void RISCV::relocate(uint8_t *loc, const Relocation &rel, uint64_t val) const { case R_RISCV_GOT_HI20: case R_RISCV_PCREL_HI20: + case R_RISCV_TLSDESC_HI20: case R_RISCV_TLS_GD_HI20: case R_RISCV_TLS_GOT_HI20: case R_RISCV_TPREL_HI20: @@ -430,6 +462,8 @@ void RISCV::relocate(uint8_t *loc, const Relocation &rel, uint64_t val) const { } case R_RISCV_PCREL_LO12_I: + case R_RISCV_TLSDESC_LOAD_LO12: + case R_RISCV_TLSDESC_ADD_LO12: case R_RISCV_TPREL_LO12_I: case R_RISCV_LO12_I: { uint64_t hi = (val + 0x800) >> 12; @@ -513,32 +547,133 @@ void RISCV::relocate(uint8_t *loc, const Relocation &rel, uint64_t val) const { break; case R_RISCV_RELAX: - return; // Ignored (for now) - + return; + case R_RISCV_TLSDESC: + // The addend is stored in the second word. + if (config->is64) + write64le(loc + 8, val); + else + write32le(loc + 4, val); + break; default: llvm_unreachable("unknown relocation"); } } +static bool relaxable(ArrayRef<Relocation> relocs, size_t i) { + return i + 1 != relocs.size() && relocs[i + 1].type == R_RISCV_RELAX; +} + +static void tlsdescToIe(uint8_t *loc, const Relocation &rel, uint64_t val) { + switch (rel.type) { + case R_RISCV_TLSDESC_HI20: + case R_RISCV_TLSDESC_LOAD_LO12: + write32le(loc, 0x00000013); // nop + break; + case R_RISCV_TLSDESC_ADD_LO12: + write32le(loc, utype(AUIPC, X_A0, hi20(val))); // auipc a0,<hi20> + break; + case R_RISCV_TLSDESC_CALL: + if (config->is64) + write32le(loc, itype(LD, X_A0, X_A0, lo12(val))); // ld a0,<lo12>(a0) + else + write32le(loc, itype(LW, X_A0, X_A0, lo12(val))); // lw a0,<lo12>(a0) + break; + default: + llvm_unreachable("unsupported relocation for TLSDESC to IE"); + } +} + +static void tlsdescToLe(uint8_t *loc, const Relocation &rel, uint64_t val) { + switch (rel.type) { + case R_RISCV_TLSDESC_HI20: + case R_RISCV_TLSDESC_LOAD_LO12: + write32le(loc, 0x00000013); // nop + return; + case R_RISCV_TLSDESC_ADD_LO12: + if (isInt<12>(val)) + write32le(loc, 0x00000013); // nop + else + write32le(loc, utype(LUI, X_A0, hi20(val))); // lui a0,<hi20> + return; + case R_RISCV_TLSDESC_CALL: + if (isInt<12>(val)) + write32le(loc, itype(ADDI, X_A0, 0, val)); // addi a0,zero,<lo12> + else + write32le(loc, itype(ADDI, X_A0, X_A0, lo12(val))); // addi a0,a0,<lo12> + return; + default: + llvm_unreachable("unsupported relocation for TLSDESC to LE"); + } +} + void RISCV::relocateAlloc(InputSectionBase &sec, uint8_t *buf) const { uint64_t secAddr = sec.getOutputSection()->addr; if (auto *s = dyn_cast<InputSection>(&sec)) secAddr += s->outSecOff; else if (auto *ehIn = dyn_cast<EhInputSection>(&sec)) secAddr += ehIn->getParent()->outSecOff; - for (size_t i = 0, size = sec.relocs().size(); i != size; ++i) { - const Relocation &rel = sec.relocs()[i]; + uint64_t tlsdescVal = 0; + bool tlsdescRelax = false, isToLe = false; + const ArrayRef<Relocation> relocs = sec.relocs(); + for (size_t i = 0, size = relocs.size(); i != size; ++i) { + const Relocation &rel = relocs[i]; uint8_t *loc = buf + rel.offset; - const uint64_t val = + uint64_t val = sec.getRelocTargetVA(sec.file, rel.type, rel.addend, secAddr + rel.offset, *rel.sym, rel.expr); switch (rel.expr) { case R_RELAX_HINT: + continue; + case R_TLSDESC_PC: + // For R_RISCV_TLSDESC_HI20, store &got(sym)-PC to be used by the + // following two instructions L[DW] and ADDI. + if (rel.type == R_RISCV_TLSDESC_HI20) + tlsdescVal = val; + else + val = tlsdescVal; break; + case R_RELAX_TLS_GD_TO_IE: + // Only R_RISCV_TLSDESC_HI20 reaches here. tlsdescVal will be finalized + // after we see R_RISCV_TLSDESC_ADD_LO12 in the R_RELAX_TLS_GD_TO_LE case. + // The net effect is that tlsdescVal will be smaller than `val` to take + // into account of NOP instructions (in the absence of R_RISCV_RELAX) + // before AUIPC. + tlsdescVal = val + rel.offset; + isToLe = false; + tlsdescRelax = relaxable(relocs, i); + if (!tlsdescRelax) + tlsdescToIe(loc, rel, val); + continue; + case R_RELAX_TLS_GD_TO_LE: + // See the comment in handleTlsRelocation. For TLSDESC=>IE, + // R_RISCV_TLSDESC_{LOAD_LO12,ADD_LO12,CALL} also reach here. If isToIe is + // true, this is actually TLSDESC=>IE optimization. + if (rel.type == R_RISCV_TLSDESC_HI20) { + tlsdescVal = val; + isToLe = true; + tlsdescRelax = relaxable(relocs, i); + } else { + if (!isToLe && rel.type == R_RISCV_TLSDESC_ADD_LO12) + tlsdescVal -= rel.offset; + val = tlsdescVal; + } + // When NOP conversion is eligible and relaxation applies, don't write a + // NOP in case an unrelated instruction follows the current instruction. + if (tlsdescRelax && + (rel.type == R_RISCV_TLSDESC_HI20 || + rel.type == R_RISCV_TLSDESC_LOAD_LO12 || + (rel.type == R_RISCV_TLSDESC_ADD_LO12 && isToLe && !hi20(val)))) + continue; + if (isToLe) + tlsdescToLe(loc, rel, val); + else + tlsdescToIe(loc, rel, val); + continue; case R_RISCV_LEB128: if (i + 1 < size) { - const Relocation &rel1 = sec.relocs()[i + 1]; + const Relocation &rel1 = relocs[i + 1]; if (rel.type == R_RISCV_SET_ULEB128 && rel1.type == R_RISCV_SUB_ULEB128 && rel.offset == rel1.offset) { auto val = rel.sym->getVA(rel.addend) - rel1.sym->getVA(rel1.addend); @@ -554,32 +689,12 @@ void RISCV::relocateAlloc(InputSectionBase &sec, uint8_t *buf) const { ": R_RISCV_SET_ULEB128 not paired with R_RISCV_SUB_SET128"); return; default: - relocate(loc, rel, val); break; } + relocate(loc, rel, val); } } -namespace { -struct SymbolAnchor { - uint64_t offset; - Defined *d; - bool end; // true for the anchor of st_value+st_size -}; -} // namespace - -struct elf::RISCVRelaxAux { - // This records symbol start and end offsets which will be adjusted according - // to the nearest relocDeltas element. - SmallVector<SymbolAnchor, 0> anchors; - // For relocations[i], the actual offset is r_offset - (i ? relocDeltas[i-1] : - // 0). - std::unique_ptr<uint32_t[]> relocDeltas; - // For relocations[i], the actual type is relocTypes[i]. - std::unique_ptr<RelType[]> relocTypes; - SmallVector<uint32_t, 0> writes; -}; - static void initSymbolAnchors() { SmallVector<InputSection *, 0> storage; for (OutputSection *osec : outputSections) { @@ -715,14 +830,16 @@ static void relaxHi20Lo12(const InputSection &sec, size_t i, uint64_t loc, static bool relax(InputSection &sec) { const uint64_t secAddr = sec.getVA(); + const MutableArrayRef<Relocation> relocs = sec.relocs(); auto &aux = *sec.relaxAux; bool changed = false; ArrayRef<SymbolAnchor> sa = ArrayRef(aux.anchors); uint64_t delta = 0; + bool tlsdescRelax = false, toLeShortForm = false; - std::fill_n(aux.relocTypes.get(), sec.relocs().size(), R_RISCV_NONE); + std::fill_n(aux.relocTypes.get(), relocs.size(), R_RISCV_NONE); aux.writes.clear(); - for (auto [i, r] : llvm::enumerate(sec.relocs())) { + for (auto [i, r] : llvm::enumerate(relocs)) { const uint64_t loc = secAddr + r.offset - delta; uint32_t &cur = aux.relocDeltas[i], remove = 0; switch (r.type) { @@ -743,25 +860,37 @@ static bool relax(InputSection &sec) { } case R_RISCV_CALL: case R_RISCV_CALL_PLT: - if (i + 1 != sec.relocs().size() && - sec.relocs()[i + 1].type == R_RISCV_RELAX) + if (relaxable(relocs, i)) relaxCall(sec, i, loc, r, remove); break; case R_RISCV_TPREL_HI20: case R_RISCV_TPREL_ADD: case R_RISCV_TPREL_LO12_I: case R_RISCV_TPREL_LO12_S: - if (i + 1 != sec.relocs().size() && - sec.relocs()[i + 1].type == R_RISCV_RELAX) + if (relaxable(relocs, i)) relaxTlsLe(sec, i, loc, r, remove); break; case R_RISCV_HI20: case R_RISCV_LO12_I: case R_RISCV_LO12_S: - if (i + 1 != sec.relocs().size() && - sec.relocs()[i + 1].type == R_RISCV_RELAX) + if (relaxable(relocs, i)) relaxHi20Lo12(sec, i, loc, r, remove); break; + case R_RISCV_TLSDESC_HI20: + // For TLSDESC=>LE, we can use the short form if hi20 is zero. + tlsdescRelax = relaxable(relocs, i); + toLeShortForm = tlsdescRelax && r.expr == R_RELAX_TLS_GD_TO_LE && + !hi20(r.sym->getVA(r.addend)); + [[fallthrough]]; + case R_RISCV_TLSDESC_LOAD_LO12: + // For TLSDESC=>LE/IE, AUIPC and L[DW] are removed if relaxable. + if (tlsdescRelax && r.expr != R_TLSDESC_PC) + remove = 4; + break; + case R_RISCV_TLSDESC_ADD_LO12: + if (toLeShortForm) + remove = 4; + break; } // For all anchors whose offsets are <= r.offset, they are preceded by diff --git a/lld/ELF/InputFiles.cpp b/lld/ELF/InputFiles.cpp index 75e5ee1d0da4..a292e873e72f 100644 --- a/lld/ELF/InputFiles.cpp +++ b/lld/ELF/InputFiles.cpp @@ -1788,7 +1788,12 @@ void BinaryFile::parse() { } InputFile *elf::createInternalFile(StringRef name) { - return make<InputFile>(InputFile::InternalKind, MemoryBufferRef("", name)); + auto *file = + make<InputFile>(InputFile::InternalKind, MemoryBufferRef("", name)); + // References from an internal file do not lead to --warn-backrefs + // diagnostics. + file->groupId = 0; + return file; } ELFFileBase *elf::createObjFile(MemoryBufferRef mb, StringRef archiveName, diff --git a/lld/ELF/InputSection.cpp b/lld/ELF/InputSection.cpp index c728dd6c6306..0e0b9783bd88 100644 --- a/lld/ELF/InputSection.cpp +++ b/lld/ELF/InputSection.cpp @@ -961,12 +961,11 @@ void InputSection::relocateNonAlloc(uint8_t *buf, ArrayRef<RelTy> rels) { // vector. The computed value is st_value plus a non-negative offset. // Negative values are invalid, so -1 can be used as the tombstone value. // - // If the referenced symbol is discarded (made Undefined), or the - // section defining the referenced symbol is garbage collected, - // sym.getOutputSection() is nullptr. `ds->folded` catches the ICF folded - // case. However, resolving a relocation in .debug_line to -1 would stop - // debugger users from setting breakpoints on the folded-in function, so - // exclude .debug_line. + // If the referenced symbol is relative to a discarded section (due to + // --gc-sections, COMDAT, etc), it has been converted to a Undefined. + // `ds->folded` catches the ICF folded case. However, resolving a + // relocation in .debug_line to -1 would stop debugger users from setting + // breakpoints on the folded-in function, so exclude .debug_line. // // For pre-DWARF-v5 .debug_loc and .debug_ranges, -1 is a reserved value // (base address selection entry), use 1 (which is used by GNU ld for @@ -974,7 +973,7 @@ void InputSection::relocateNonAlloc(uint8_t *buf, ArrayRef<RelTy> rels) { // // TODO To reduce disruption, we use 0 instead of -1 as the tombstone // value. Enable -1 in a future release. - if (!sym.getOutputSection() || (ds && ds->folded && !isDebugLine)) { + if (!ds || (ds->folded && !isDebugLine)) { // If -z dead-reloc-in-nonalloc= is specified, respect it. uint64_t value = SignExtend64<bits>(*tombstone); // For a 32-bit local TU reference in .debug_names, X86_64::relocate diff --git a/lld/ELF/Relocations.cpp b/lld/ELF/Relocations.cpp index b6a317bc3b6d..79c8230724ad 100644 --- a/lld/ELF/Relocations.cpp +++ b/lld/ELF/Relocations.cpp @@ -1274,29 +1274,34 @@ static unsigned handleTlsRelocation(RelType type, Symbol &sym, if (config->emachine == EM_MIPS) return handleMipsTlsRelocation(type, sym, c, offset, addend, expr); + bool isRISCV = config->emachine == EM_RISCV; if (oneof<R_AARCH64_TLSDESC_PAGE, R_TLSDESC, R_TLSDESC_CALL, R_TLSDESC_PC, R_TLSDESC_GOTPLT>(expr) && config->shared) { + // R_RISCV_TLSDESC_{LOAD_LO12,ADD_LO12_I,CALL} reference a label. Do not + // set NEEDS_TLSDESC on the label. if (expr != R_TLSDESC_CALL) { - sym.setFlags(NEEDS_TLSDESC); + if (!isRISCV || type == R_RISCV_TLSDESC_HI20) + sym.setFlags(NEEDS_TLSDESC); c.addReloc({expr, type, offset, addend, &sym}); } return 1; } // ARM, Hexagon, LoongArch and RISC-V do not support GD/LD to IE/LE - // relaxation. + // optimizations. + // RISC-V supports TLSDESC to IE/LE optimizations. // For PPC64, if the file has missing R_PPC64_TLSGD/R_PPC64_TLSLD, disable - // relaxation as well. - bool toExecRelax = !config->shared && config->emachine != EM_ARM && - config->emachine != EM_HEXAGON && - config->emachine != EM_LOONGARCH && - config->emachine != EM_RISCV && - !c.file->ppc64DisableTLSRelax; + // optimization as well. + bool execOptimize = + !config->shared && config->emachine != EM_ARM && + config->emachine != EM_HEXAGON && config->emachine != EM_LOONGARCH && + !(isRISCV && expr != R_TLSDESC_PC && expr != R_TLSDESC_CALL) && + !c.file->ppc64DisableTLSRelax; // If we are producing an executable and the symbol is non-preemptable, it - // must be defined and the code sequence can be relaxed to use Local-Exec. + // must be defined and the code sequence can be optimized to use Local-Exec. // // ARM and RISC-V do not support any relaxations for TLS relocations, however, // we can omit the DTPMOD dynamic relocations and resolve them at link time @@ -1309,8 +1314,8 @@ static unsigned handleTlsRelocation(RelType type, Symbol &sym, // module index, with a special value of 0 for the current module. GOT[e1] is // unused. There only needs to be one module index entry. if (oneof<R_TLSLD_GOT, R_TLSLD_GOTPLT, R_TLSLD_PC, R_TLSLD_HINT>(expr)) { - // Local-Dynamic relocs can be relaxed to Local-Exec. - if (toExecRelax) { + // Local-Dynamic relocs can be optimized to Local-Exec. + if (execOptimize) { c.addReloc({target->adjustTlsExpr(type, R_RELAX_TLS_LD_TO_LE), type, offset, addend, &sym}); return target->getTlsGdRelaxSkip(type); @@ -1322,16 +1327,17 @@ static unsigned handleTlsRelocation(RelType type, Symbol &sym, return 1; } - // Local-Dynamic relocs can be relaxed to Local-Exec. + // Local-Dynamic relocs can be optimized to Local-Exec. if (expr == R_DTPREL) { - if (toExecRelax) + if (execOptimize) expr = target->adjustTlsExpr(type, R_RELAX_TLS_LD_TO_LE); c.addReloc({expr, type, offset, addend, &sym}); return 1; } // Local-Dynamic sequence where offset of tls variable relative to dynamic - // thread pointer is stored in the got. This cannot be relaxed to Local-Exec. + // thread pointer is stored in the got. This cannot be optimized to + // Local-Exec. if (expr == R_TLSLD_GOT_OFF) { sym.setFlags(NEEDS_GOT_DTPREL); c.addReloc({expr, type, offset, addend, &sym}); @@ -1341,14 +1347,18 @@ static unsigned handleTlsRelocation(RelType type, Symbol &sym, if (oneof<R_AARCH64_TLSDESC_PAGE, R_TLSDESC, R_TLSDESC_CALL, R_TLSDESC_PC, R_TLSDESC_GOTPLT, R_TLSGD_GOT, R_TLSGD_GOTPLT, R_TLSGD_PC, R_LOONGARCH_TLSGD_PAGE_PC>(expr)) { - if (!toExecRelax) { + if (!execOptimize) { sym.setFlags(NEEDS_TLSGD); c.addReloc({expr, type, offset, addend, &sym}); return 1; } - // Global-Dynamic relocs can be relaxed to Initial-Exec or Local-Exec + // Global-Dynamic/TLSDESC can be optimized to Initial-Exec or Local-Exec // depending on the symbol being locally defined or not. + // + // R_RISCV_TLSDESC_{LOAD_LO12,ADD_LO12_I,CALL} reference a non-preemptible + // label, so the LE optimization will be categorized as + // R_RELAX_TLS_GD_TO_LE. We fix the categorization in RISCV::relocateAlloc. if (sym.isPreemptible) { sym.setFlags(NEEDS_TLSGD_TO_IE); c.addReloc({target->adjustTlsExpr(type, R_RELAX_TLS_GD_TO_IE), type, @@ -1363,9 +1373,9 @@ static unsigned handleTlsRelocation(RelType type, Symbol &sym, if (oneof<R_GOT, R_GOTPLT, R_GOT_PC, R_AARCH64_GOT_PAGE_PC, R_LOONGARCH_GOT_PAGE_PC, R_GOT_OFF, R_TLSIE_HINT>(expr)) { ctx.hasTlsIe.store(true, std::memory_order_relaxed); - // Initial-Exec relocs can be relaxed to Local-Exec if the symbol is locally - // defined. - if (toExecRelax && isLocalInExecutable) { + // Initial-Exec relocs can be optimized to Local-Exec if the symbol is + // locally defined. + if (execOptimize && isLocalInExecutable) { c.addReloc({R_RELAX_TLS_IE_TO_LE, type, offset, addend, &sym}); } else if (expr != R_TLSIE_HINT) { sym.setFlags(NEEDS_TLSIE); @@ -1463,7 +1473,7 @@ template <class ELFT, class RelTy> void RelocationScanner::scanOne(RelTy *&i) { in.got->hasGotOffRel.store(true, std::memory_order_relaxed); } - // Process TLS relocations, including relaxing TLS relocations. Note that + // Process TLS relocations, including TLS optimizations. Note that // R_TPREL and R_TPREL_NEG relocations are resolved in processAux. if (sym.isTls()) { if (unsigned processed = diff --git a/lld/ELF/Writer.cpp b/lld/ELF/Writer.cpp index 6f66f3615fa4..501c10f35849 100644 --- a/lld/ELF/Writer.cpp +++ b/lld/ELF/Writer.cpp @@ -1518,12 +1518,12 @@ template <class ELFT> void Writer<ELFT>::sortSections() { if (auto *osd = dyn_cast<OutputDesc>(cmd)) osd->osec.sortRank = getSectionRank(osd->osec); if (!script->hasSectionsCommand) { - // We know that all the OutputSections are contiguous in this case. - auto isSection = [](SectionCommand *cmd) { return isa<OutputDesc>(cmd); }; - std::stable_sort( - llvm::find_if(script->sectionCommands, isSection), - llvm::find_if(llvm::reverse(script->sectionCommands), isSection).base(), - compareSections); + // OutputDescs are mostly contiguous, but may be interleaved with + // SymbolAssignments in the presence of INSERT commands. + auto mid = std::stable_partition( + script->sectionCommands.begin(), script->sectionCommands.end(), + [](SectionCommand *cmd) { return isa<OutputDesc>(cmd); }); + std::stable_sort(script->sectionCommands.begin(), mid, compareSections); } // Process INSERT commands and update output section attributes. From this diff --git a/lld/docs/ReleaseNotes.rst b/lld/docs/ReleaseNotes.rst index 01669543cd50..fa0e7f2bc0b3 100644 --- a/lld/docs/ReleaseNotes.rst +++ b/lld/docs/ReleaseNotes.rst @@ -29,8 +29,50 @@ ELF Improvements * ``--fat-lto-objects`` option is added to support LLVM FatLTO. Without ``--fat-lto-objects``, LLD will link LLVM FatLTO objects using the relocatable object file. (`D146778 <https://reviews.llvm.org/D146778>`_) +* ``-Bsymbolic-non-weak`` is added to directly bind non-weak definitions. + (`D158322 <https://reviews.llvm.org/D158322>`_) +* ``--lto-validate-all-vtables-have-type-infos``, which complements + ``--lto-whole-program-visibility``, is added to disable unsafe whole-program + devirtualization. ``--lto-known-safe-vtables=<glob>`` can be used + to mark known-safe vtable symbols. + (`D155659 <https://reviews.llvm.org/D155659>`_) +* ``--save-temps --lto-emit-asm`` now derives ELF/asm file names from bitcode file names. + ``ld.lld --save-temps a.o d/b.o -o out`` will create ELF relocatable files + ``out.lto.a.o``/``d/out.lto.b.o`` instead of ``out1.lto.o``/``out2.lto.o``. + (`#78835 <https://github.com/llvm/llvm-project/pull/78835>`_) +* ``--no-allow-shlib-undefined`` now reports errors for DSO referencing + non-exported definitions. + (`#70769 <https://github.com/llvm/llvm-project/pull/70769>`_) * common-page-size can now be larger than the system page-size. (`#57618 <https://github.com/llvm/llvm-project/issues/57618>`_) +* When call graph profile information is available due to instrumentation or + sample PGO, input sections are now sorted using the new ``cdsort`` algorithm, + better than the previous ``hfsort`` algorithm. + (`D152840 <https://reviews.llvm.org/D152840>`_) +* Symbol assignments like ``a = DEFINED(a) ? a : 0;`` are now handled. + (`#65866 <https://github.com/llvm/llvm-project/pull/65866>`_) +* ``OVERLAY`` now supports optional start address and LMA + (`#77272 <https://github.com/llvm/llvm-project/pull/77272>`_) +* Relocations referencing a symbol defined in ``/DISCARD/`` section now lead to + an error. + (`#69295 <https://github.com/llvm/llvm-project/pull/69295>`_) +* For AArch64 MTE, global variable descriptors have been implemented. + (`D152921 <https://reviews.llvm.org/D152921>`_) +* ``R_AARCH64_GOTPCREL32`` is now supported. + (`#72584 <https://github.com/llvm/llvm-project/pull/72584>`_) +* ``R_LARCH_PCREL20_S2``/``R_LARCH_ADD6``/``R_LARCH_CALL36`` and extreme code + model relocations are now supported. +* ``--emit-relocs`` is now supported for RISC-V linker relaxation. + (`D159082 <https://reviews.llvm.org/D159082>`_) +* Call relaxation respects RVC when mixing +c and -c relocatable files. + (`#73977 <https://github.com/llvm/llvm-project/pull/73977>`_) +* ``R_RISCV_GOT32_PCREL`` is now supported. + (`#72587 <https://github.com/llvm/llvm-project/pull/72587>`_) +* ``R_RISCV_SET_ULEB128``/``R_RISCV_SUB_ULEB128`` relocations are now supported. + (`#72610 <https://github.com/llvm/llvm-project/pull/72610>`_) + (`#77261 <https://github.com/llvm/llvm-project/pull/77261>`_) +* RISC-V TLSDESC is now supported. + (`#79239 <https://github.com/llvm/llvm-project/pull/79239>`_) Breaking changes ---------------- @@ -41,9 +83,29 @@ COFF Improvements * Added support for ``--time-trace`` and associated ``--time-trace-granularity``. This generates a .json profile trace of the linker execution. +* LLD now prefers library paths specified with ``-libpath:`` over the implicitly + detected toolchain paths. + MinGW Improvements ------------------ +* Added support for many LTO and ThinLTO options (most LTO options supported + by the ELF driver, that are implemented by the COFF backend as well, + should be supported now). + +* LLD no longer tries to autodetect and use library paths from MSVC/WinSDK + installations when run in MinGW mode; that mode of operation shouldn't + ever be needed in MinGW mode, and could be a source of unexpected + behaviours. + +* The ``--icf=safe`` option now works as expected; it was previously a no-op. + +* More correctly handle LTO of files that define ``__imp_`` prefixed dllimport + redirections. + +* The strip flags ``-S`` and ``-s`` now can be used to strip out DWARF debug + info and symbol tables while emitting a PDB debug info file. + MachO Improvements ------------------ diff --git a/llvm/include/llvm/Analysis/AliasAnalysis.h b/llvm/include/llvm/Analysis/AliasAnalysis.h index d6f732d35fd4..e8e4f491be5a 100644 --- a/llvm/include/llvm/Analysis/AliasAnalysis.h +++ b/llvm/include/llvm/Analysis/AliasAnalysis.h @@ -287,6 +287,10 @@ public: /// store %l, ... bool MayBeCrossIteration = false; + /// Whether alias analysis is allowed to use the dominator tree, for use by + /// passes that lazily update the DT while performing AA queries. + bool UseDominatorTree = true; + AAQueryInfo(AAResults &AAR, CaptureInfo *CI) : AAR(AAR), CI(CI) {} }; @@ -668,6 +672,9 @@ public: void enableCrossIterationMode() { AAQI.MayBeCrossIteration = true; } + + /// Disable the use of the dominator tree during alias analysis queries. + void disableDominatorTree() { AAQI.UseDominatorTree = false; } }; /// Temporary typedef for legacy code that uses a generic \c AliasAnalysis diff --git a/llvm/include/llvm/Analysis/BasicAliasAnalysis.h b/llvm/include/llvm/Analysis/BasicAliasAnalysis.h index afc1811239f2..7eca82729430 100644 --- a/llvm/include/llvm/Analysis/BasicAliasAnalysis.h +++ b/llvm/include/llvm/Analysis/BasicAliasAnalysis.h @@ -43,20 +43,26 @@ class BasicAAResult : public AAResultBase { const Function &F; const TargetLibraryInfo &TLI; AssumptionCache &AC; - DominatorTree *DT; + /// Use getDT() instead of accessing this member directly, in order to + /// respect the AAQI.UseDominatorTree option. + DominatorTree *DT_; + + DominatorTree *getDT(const AAQueryInfo &AAQI) const { + return AAQI.UseDominatorTree ? DT_ : nullptr; + } public: BasicAAResult(const DataLayout &DL, const Function &F, const TargetLibraryInfo &TLI, AssumptionCache &AC, DominatorTree *DT = nullptr) - : DL(DL), F(F), TLI(TLI), AC(AC), DT(DT) {} + : DL(DL), F(F), TLI(TLI), AC(AC), DT_(DT) {} BasicAAResult(const BasicAAResult &Arg) : AAResultBase(Arg), DL(Arg.DL), F(Arg.F), TLI(Arg.TLI), AC(Arg.AC), - DT(Arg.DT) {} + DT_(Arg.DT_) {} BasicAAResult(BasicAAResult &&Arg) : AAResultBase(std::move(Arg)), DL(Arg.DL), F(Arg.F), TLI(Arg.TLI), - AC(Arg.AC), DT(Arg.DT) {} + AC(Arg.AC), DT_(Arg.DT_) {} /// Handle invalidation events in the new pass manager. bool invalidate(Function &Fn, const PreservedAnalyses &PA, diff --git a/llvm/include/llvm/Analysis/BranchProbabilityInfo.h b/llvm/include/llvm/Analysis/BranchProbabilityInfo.h index 6b9d17818201..91e1872e9bd6 100644 --- a/llvm/include/llvm/Analysis/BranchProbabilityInfo.h +++ b/llvm/include/llvm/Analysis/BranchProbabilityInfo.h @@ -122,16 +122,23 @@ public: } BranchProbabilityInfo(BranchProbabilityInfo &&Arg) - : Probs(std::move(Arg.Probs)), LastF(Arg.LastF), - EstimatedBlockWeight(std::move(Arg.EstimatedBlockWeight)) {} + : Handles(std::move(Arg.Handles)), Probs(std::move(Arg.Probs)), + LastF(Arg.LastF), + EstimatedBlockWeight(std::move(Arg.EstimatedBlockWeight)) { + for (auto &Handle : Handles) + Handle.setBPI(this); + } BranchProbabilityInfo(const BranchProbabilityInfo &) = delete; BranchProbabilityInfo &operator=(const BranchProbabilityInfo &) = delete; BranchProbabilityInfo &operator=(BranchProbabilityInfo &&RHS) { releaseMemory(); + Handles = std::move(RHS.Handles); Probs = std::move(RHS.Probs); EstimatedBlockWeight = std::move(RHS.EstimatedBlockWeight); + for (auto &Handle : Handles) + Handle.setBPI(this); return *this; } @@ -279,6 +286,8 @@ private: } public: + void setBPI(BranchProbabilityInfo *BPI) { this->BPI = BPI; } + BasicBlockCallbackVH(const Value *V, BranchProbabilityInfo *BPI = nullptr) : CallbackVH(const_cast<Value *>(V)), BPI(BPI) {} }; diff --git a/llvm/include/llvm/Analysis/Loads.h b/llvm/include/llvm/Analysis/Loads.h index 2880ed33a34c..0926093bba99 100644 --- a/llvm/include/llvm/Analysis/Loads.h +++ b/llvm/include/llvm/Analysis/Loads.h @@ -18,7 +18,7 @@ namespace llvm { -class AAResults; +class BatchAAResults; class AssumptionCache; class DataLayout; class DominatorTree; @@ -129,11 +129,10 @@ extern cl::opt<unsigned> DefMaxInstsToScan; /// location in memory, as opposed to the value operand of a store. /// /// \returns The found value, or nullptr if no value is found. -Value *FindAvailableLoadedValue(LoadInst *Load, - BasicBlock *ScanBB, +Value *FindAvailableLoadedValue(LoadInst *Load, BasicBlock *ScanBB, BasicBlock::iterator &ScanFrom, unsigned MaxInstsToScan = DefMaxInstsToScan, - AAResults *AA = nullptr, + BatchAAResults *AA = nullptr, bool *IsLoadCSE = nullptr, unsigned *NumScanedInst = nullptr); @@ -141,7 +140,8 @@ Value *FindAvailableLoadedValue(LoadInst *Load, /// FindAvailableLoadedValue() for the case where we are not interested in /// finding the closest clobbering instruction if no available load is found. /// This overload cannot be used to scan across multiple blocks. -Value *FindAvailableLoadedValue(LoadInst *Load, AAResults &AA, bool *IsLoadCSE, +Value *FindAvailableLoadedValue(LoadInst *Load, BatchAAResults &AA, + bool *IsLoadCSE, unsigned MaxInstsToScan = DefMaxInstsToScan); /// Scan backwards to see if we have the value of the given pointer available @@ -170,7 +170,7 @@ Value *FindAvailableLoadedValue(LoadInst *Load, AAResults &AA, bool *IsLoadCSE, Value *findAvailablePtrLoadStore(const MemoryLocation &Loc, Type *AccessTy, bool AtLeastAtomic, BasicBlock *ScanBB, BasicBlock::iterator &ScanFrom, - unsigned MaxInstsToScan, AAResults *AA, + unsigned MaxInstsToScan, BatchAAResults *AA, bool *IsLoadCSE, unsigned *NumScanedInst); /// Returns true if a pointer value \p A can be replace with another pointer diff --git a/llvm/include/llvm/Analysis/VecFuncs.def b/llvm/include/llvm/Analysis/VecFuncs.def index f09e12f3038c..07edf68c667a 100644 --- a/llvm/include/llvm/Analysis/VecFuncs.def +++ b/llvm/include/llvm/Analysis/VecFuncs.def @@ -771,8 +771,8 @@ TLI_DEFINE_VECFUNC("log2f", "_ZGVsMxv_log2f", SCALABLE(4), MASKED, "_ZGVsMxv") TLI_DEFINE_VECFUNC("llvm.log2.f64", "_ZGVsMxv_log2", SCALABLE(2), MASKED, "_ZGVsMxv") TLI_DEFINE_VECFUNC("llvm.log2.f32", "_ZGVsMxv_log2f", SCALABLE(4), MASKED, "_ZGVsMxv") -TLI_DEFINE_VECFUNC("modf", "_ZGVsMxvl8_modf", SCALABLE(2), MASKED, "_ZGVsMxvl8") -TLI_DEFINE_VECFUNC("modff", "_ZGVsMxvl4_modff", SCALABLE(4), MASKED, "_ZGVsMxvl4") +TLI_DEFINE_VECFUNC("modf", "_ZGVsNxvl8_modf", SCALABLE(2), NOMASK, "_ZGVsNxvl8") +TLI_DEFINE_VECFUNC("modff", "_ZGVsNxvl4_modff", SCALABLE(4), NOMASK, "_ZGVsNxvl4") TLI_DEFINE_VECFUNC("nextafter", "_ZGVsMxvv_nextafter", SCALABLE(2), MASKED, "_ZGVsMxvv") TLI_DEFINE_VECFUNC("nextafterf", "_ZGVsMxvv_nextafterf", SCALABLE(4), MASKED, "_ZGVsMxvv") @@ -787,11 +787,11 @@ TLI_DEFINE_VECFUNC("sinf", "_ZGVsMxv_sinf", SCALABLE(4), MASKED, "_ZGVsMxv") TLI_DEFINE_VECFUNC("llvm.sin.f64", "_ZGVsMxv_sin", SCALABLE(2), MASKED, "_ZGVsMxv") TLI_DEFINE_VECFUNC("llvm.sin.f32", "_ZGVsMxv_sinf", SCALABLE(4), MASKED, "_ZGVsMxv") -TLI_DEFINE_VECFUNC("sincos", "_ZGVsMxvl8l8_sincos", SCALABLE(2), MASKED, "_ZGVsMxvl8l8") -TLI_DEFINE_VECFUNC("sincosf", "_ZGVsMxvl4l4_sincosf", SCALABLE(4), MASKED, "_ZGVsMxvl4l4") +TLI_DEFINE_VECFUNC("sincos", "_ZGVsNxvl8l8_sincos", SCALABLE(2), NOMASK, "_ZGVsNxvl8l8") +TLI_DEFINE_VECFUNC("sincosf", "_ZGVsNxvl4l4_sincosf", SCALABLE(4), NOMASK, "_ZGVsNxvl4l4") -TLI_DEFINE_VECFUNC("sincospi", "_ZGVsMxvl8l8_sincospi", SCALABLE(2), MASKED, "_ZGVsMxvl8l8") -TLI_DEFINE_VECFUNC("sincospif", "_ZGVsMxvl4l4_sincospif", SCALABLE(4), MASKED, "_ZGVsMxvl4l4") +TLI_DEFINE_VECFUNC("sincospi", "_ZGVsNxvl8l8_sincospi", SCALABLE(2), NOMASK, "_ZGVsNxvl8l8") +TLI_DEFINE_VECFUNC("sincospif", "_ZGVsNxvl4l4_sincospif", SCALABLE(4), NOMASK, "_ZGVsNxvl4l4") TLI_DEFINE_VECFUNC("sinh", "_ZGVsMxv_sinh", SCALABLE(2), MASKED, "_ZGVsMxv") TLI_DEFINE_VECFUNC("sinhf", "_ZGVsMxv_sinhf", SCALABLE(4), MASKED, "_ZGVsMxv") @@ -1005,8 +1005,6 @@ TLI_DEFINE_VECFUNC("llvm.log2.f32", "armpl_svlog2_f32_x", SCALABLE(4), MASKED, " TLI_DEFINE_VECFUNC("modf", "armpl_vmodfq_f64", FIXED(2), NOMASK, "_ZGV_LLVM_N2vl8") TLI_DEFINE_VECFUNC("modff", "armpl_vmodfq_f32", FIXED(4), NOMASK, "_ZGV_LLVM_N4vl4") -TLI_DEFINE_VECFUNC("modf", "armpl_svmodf_f64_x", SCALABLE(2), MASKED, "_ZGVsMxvl8") -TLI_DEFINE_VECFUNC("modff", "armpl_svmodf_f32_x", SCALABLE(4), MASKED, "_ZGVsMxvl4") TLI_DEFINE_VECFUNC("nextafter", "armpl_vnextafterq_f64", FIXED(2), NOMASK, "_ZGV_LLVM_N2vv") TLI_DEFINE_VECFUNC("nextafterf", "armpl_vnextafterq_f32", FIXED(4), NOMASK, "_ZGV_LLVM_N4vv") @@ -1035,13 +1033,9 @@ TLI_DEFINE_VECFUNC("llvm.sin.f32", "armpl_svsin_f32_x", SCALABLE(4), MASKED, "_Z TLI_DEFINE_VECFUNC("sincos", "armpl_vsincosq_f64", FIXED(2), NOMASK, "_ZGV_LLVM_N2vl8l8") TLI_DEFINE_VECFUNC("sincosf", "armpl_vsincosq_f32", FIXED(4), NOMASK, "_ZGV_LLVM_N4vl4l4") -TLI_DEFINE_VECFUNC("sincos", "armpl_svsincos_f64_x", SCALABLE(2), MASKED, "_ZGVsMxvl8l8") -TLI_DEFINE_VECFUNC("sincosf", "armpl_svsincos_f32_x", SCALABLE(4), MASKED, "_ZGVsMxvl4l4") TLI_DEFINE_VECFUNC("sincospi", "armpl_vsincospiq_f64", FIXED(2), NOMASK, "_ZGV_LLVM_N2vl8l8") TLI_DEFINE_VECFUNC("sincospif", "armpl_vsincospiq_f32", FIXED(4), NOMASK, "_ZGV_LLVM_N4vl4l4") -TLI_DEFINE_VECFUNC("sincospi", "armpl_svsincospi_f64_x", SCALABLE(2), MASKED, "_ZGVsMxvl8l8") -TLI_DEFINE_VECFUNC("sincospif", "armpl_svsincospi_f32_x", SCALABLE(4), MASKED, "_ZGVsMxvl4l4") TLI_DEFINE_VECFUNC("sinh", "armpl_vsinhq_f64", FIXED(2), NOMASK, "_ZGV_LLVM_N2v") TLI_DEFINE_VECFUNC("sinhf", "armpl_vsinhq_f32", FIXED(4), NOMASK, "_ZGV_LLVM_N4v") diff --git a/llvm/include/llvm/CodeGen/LivePhysRegs.h b/llvm/include/llvm/CodeGen/LivePhysRegs.h index 76bb34d270a2..1d40b1cbb0ea 100644 --- a/llvm/include/llvm/CodeGen/LivePhysRegs.h +++ b/llvm/include/llvm/CodeGen/LivePhysRegs.h @@ -193,11 +193,18 @@ void addLiveIns(MachineBasicBlock &MBB, const LivePhysRegs &LiveRegs); void computeAndAddLiveIns(LivePhysRegs &LiveRegs, MachineBasicBlock &MBB); -/// Convenience function for recomputing live-in's for \p MBB. -static inline void recomputeLiveIns(MachineBasicBlock &MBB) { +/// Convenience function for recomputing live-in's for a MBB. Returns true if +/// any changes were made. +static inline bool recomputeLiveIns(MachineBasicBlock &MBB) { LivePhysRegs LPR; + auto oldLiveIns = MBB.getLiveIns(); + MBB.clearLiveIns(); computeAndAddLiveIns(LPR, MBB); + MBB.sortUniqueLiveIns(); + + auto newLiveIns = MBB.getLiveIns(); + return oldLiveIns != newLiveIns; } } // end namespace llvm diff --git a/llvm/include/llvm/CodeGen/MachineBasicBlock.h b/llvm/include/llvm/CodeGen/MachineBasicBlock.h index c84fd281c6a5..dc2035fa598c 100644 --- a/llvm/include/llvm/CodeGen/MachineBasicBlock.h +++ b/llvm/include/llvm/CodeGen/MachineBasicBlock.h @@ -111,6 +111,10 @@ public: RegisterMaskPair(MCPhysReg PhysReg, LaneBitmask LaneMask) : PhysReg(PhysReg), LaneMask(LaneMask) {} + + bool operator==(const RegisterMaskPair &other) const { + return PhysReg == other.PhysReg && LaneMask == other.LaneMask; + } }; private: @@ -473,6 +477,8 @@ public: /// Remove entry from the livein set and return iterator to the next. livein_iterator removeLiveIn(livein_iterator I); + std::vector<RegisterMaskPair> getLiveIns() const { return LiveIns; } + class liveout_iterator { public: using iterator_category = std::input_iterator_tag; diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td index e6db9da5526a..c5f43d17d1c1 100644 --- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td +++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td @@ -2601,6 +2601,11 @@ def int_amdgcn_ds_bvh_stack_rtn : [ImmArg<ArgIndex<3>>, IntrWillReturn, IntrNoCallback, IntrNoFree] >; +def int_amdgcn_s_wait_event_export_ready : + ClangBuiltin<"__builtin_amdgcn_s_wait_event_export_ready">, + Intrinsic<[], [], [IntrNoMem, IntrHasSideEffects, IntrWillReturn] +>; + // WMMA (Wave Matrix Multiply-Accumulate) intrinsics // // These operations perform a matrix multiplication and accumulation of @@ -2608,10 +2613,10 @@ def int_amdgcn_ds_bvh_stack_rtn : class AMDGPUWmmaIntrinsic<LLVMType AB, LLVMType CD> : Intrinsic< - [CD], // %D + [CD], // %D [ AB, // %A - AB, // %B + LLVMMatchType<1>, // %B LLVMMatchType<0>, // %C ], [IntrNoMem, IntrConvergent, IntrWillReturn, IntrNoCallback, IntrNoFree] @@ -2619,49 +2624,50 @@ class AMDGPUWmmaIntrinsic<LLVMType AB, LLVMType CD> : class AMDGPUWmmaIntrinsicOPSEL<LLVMType AB, LLVMType CD> : Intrinsic< - [CD], // %D + [CD], // %D [ AB, // %A - AB, // %B + LLVMMatchType<1>, // %B LLVMMatchType<0>, // %C - llvm_i1_ty, // %high + llvm_i1_ty, // %high (op_sel) for GFX11, 0 for GFX12 ], [IntrNoMem, IntrConvergent, ImmArg<ArgIndex<3>>, IntrWillReturn, IntrNoCallback, IntrNoFree] >; class AMDGPUWmmaIntrinsicIU<LLVMType AB, LLVMType CD> : Intrinsic< - [CD], // %D + [CD], // %D [ llvm_i1_ty, // %A_sign AB, // %A llvm_i1_ty, // %B_sign - AB, // %B + LLVMMatchType<1>, // %B LLVMMatchType<0>, // %C llvm_i1_ty, // %clamp ], [IntrNoMem, IntrConvergent, ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<2>>, ImmArg<ArgIndex<5>>, IntrWillReturn, IntrNoCallback, IntrNoFree] >; -def int_amdgcn_wmma_f32_16x16x16_f16 : AMDGPUWmmaIntrinsic<llvm_v16f16_ty, llvm_anyfloat_ty>; -def int_amdgcn_wmma_f32_16x16x16_bf16 : AMDGPUWmmaIntrinsic<llvm_v16i16_ty, llvm_anyfloat_ty>; -// The regular, untied f16/bf16 wmma intrinsics only write to one half -// of the registers (set via the op_sel bit). -// The content of the other 16-bit of the registers is undefined. -def int_amdgcn_wmma_f16_16x16x16_f16 : AMDGPUWmmaIntrinsicOPSEL<llvm_v16f16_ty, llvm_anyfloat_ty>; -def int_amdgcn_wmma_bf16_16x16x16_bf16 : AMDGPUWmmaIntrinsicOPSEL<llvm_v16i16_ty, llvm_anyint_ty>; -// The tied versions of the f16/bf16 wmma intrinsics tie the destination matrix -// registers to the input accumulator registers. -// Essentially, the content of the other 16-bit is preserved from the input. -def int_amdgcn_wmma_f16_16x16x16_f16_tied : AMDGPUWmmaIntrinsicOPSEL<llvm_v16f16_ty, llvm_anyfloat_ty>; -def int_amdgcn_wmma_bf16_16x16x16_bf16_tied : AMDGPUWmmaIntrinsicOPSEL<llvm_v16i16_ty, llvm_anyint_ty>; -def int_amdgcn_wmma_i32_16x16x16_iu8 : AMDGPUWmmaIntrinsicIU<llvm_v4i32_ty, llvm_anyint_ty>; -def int_amdgcn_wmma_i32_16x16x16_iu4 : AMDGPUWmmaIntrinsicIU<llvm_v2i32_ty, llvm_anyint_ty>; +// WMMA GFX11Only -def int_amdgcn_s_wait_event_export_ready : - ClangBuiltin<"__builtin_amdgcn_s_wait_event_export_ready">, - Intrinsic<[], [], [IntrNoMem, IntrHasSideEffects, IntrWillReturn] ->; +// The OPSEL intrinsics read from and write to one half of the registers, selected by the op_sel bit. +// The tied versions of the f16/bf16 wmma intrinsics tie the destination matrix registers to the input accumulator registers. +// The content of the other 16-bit half is preserved from the input. +def int_amdgcn_wmma_f16_16x16x16_f16_tied : AMDGPUWmmaIntrinsicOPSEL<llvm_anyfloat_ty, llvm_anyfloat_ty>; +def int_amdgcn_wmma_bf16_16x16x16_bf16_tied : AMDGPUWmmaIntrinsicOPSEL<llvm_anyint_ty, llvm_anyint_ty>; + +// WMMA GFX11Plus + +def int_amdgcn_wmma_f32_16x16x16_f16 : AMDGPUWmmaIntrinsic<llvm_anyfloat_ty, llvm_anyfloat_ty>; +def int_amdgcn_wmma_f32_16x16x16_bf16 : AMDGPUWmmaIntrinsic<llvm_anyint_ty, llvm_anyfloat_ty>; +def int_amdgcn_wmma_i32_16x16x16_iu8 : AMDGPUWmmaIntrinsicIU<llvm_anyint_ty, llvm_anyint_ty>; +def int_amdgcn_wmma_i32_16x16x16_iu4 : AMDGPUWmmaIntrinsicIU<llvm_anyint_ty, llvm_anyint_ty>; + +// GFX11: The OPSEL intrinsics read from and write to one half of the registers, selected by the op_sel bit. +// The content of the other 16-bit half is undefined. +// GFX12: The op_sel bit must be 0. +def int_amdgcn_wmma_f16_16x16x16_f16 : AMDGPUWmmaIntrinsicOPSEL<llvm_anyfloat_ty, llvm_anyfloat_ty>; +def int_amdgcn_wmma_bf16_16x16x16_bf16 : AMDGPUWmmaIntrinsicOPSEL<llvm_anyint_ty, llvm_anyint_ty>; //===----------------------------------------------------------------------===// // GFX12 Intrinsics @@ -2681,6 +2687,65 @@ def int_amdgcn_permlanex16_var : ClangBuiltin<"__builtin_amdgcn_permlanex16_var" [IntrNoMem, IntrConvergent, IntrWillReturn, ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>, IntrNoCallback, IntrNoFree]>; + +// WMMA (Wave Matrix Multiply-Accumulate) intrinsics +// +// These operations perform a matrix multiplication and accumulation of +// the form: D = A * B + C . + +// A and B are <8 x fp8> or <8 x bf8>, but since fp8 and bf8 are not supported by llvm we use <2 x i32>. +def int_amdgcn_wmma_f32_16x16x16_fp8_fp8 : AMDGPUWmmaIntrinsic<llvm_anyint_ty, llvm_anyfloat_ty>; +def int_amdgcn_wmma_f32_16x16x16_fp8_bf8 : AMDGPUWmmaIntrinsic<llvm_anyint_ty, llvm_anyfloat_ty>; +def int_amdgcn_wmma_f32_16x16x16_bf8_fp8 : AMDGPUWmmaIntrinsic<llvm_anyint_ty, llvm_anyfloat_ty>; +def int_amdgcn_wmma_f32_16x16x16_bf8_bf8 : AMDGPUWmmaIntrinsic<llvm_anyint_ty, llvm_anyfloat_ty>; +// A and B are <16 x iu4>. +def int_amdgcn_wmma_i32_16x16x32_iu4 : AMDGPUWmmaIntrinsicIU<llvm_anyint_ty, llvm_anyint_ty>; + +// SWMMAC (Wave Matrix(sparse) Multiply-Accumulate) intrinsics +// +// These operations perform a sparse matrix multiplication and accumulation of +// the form: D = A * B + C. +// A is sparse matrix, half the size of B, and is expanded using sparsity index. + +class AMDGPUSWmmacIntrinsicIdx<LLVMType A, LLVMType B, LLVMType CD, LLVMType Index> : + Intrinsic< + [CD], // %D + [ + A, // %A + B, // %B + LLVMMatchType<0>, // %C + Index // %Sparsity index for A + ], + [IntrNoMem, IntrConvergent, IntrWillReturn] +>; + +class AMDGPUSWmmacIntrinsicIUIdx<LLVMType A, LLVMType B, LLVMType CD, LLVMType Index> : + Intrinsic< + [CD], // %D + [ + llvm_i1_ty, // %A_sign + A, // %A + llvm_i1_ty, // %B_sign + B, // %B + LLVMMatchType<0>, // %C + Index, // %Sparsity index for A + llvm_i1_ty, // %clamp + ], + [IntrNoMem, IntrConvergent, IntrWillReturn, ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<2>>, ImmArg<ArgIndex<6>>] +>; + +def int_amdgcn_swmmac_f32_16x16x32_f16 : AMDGPUSWmmacIntrinsicIdx<llvm_anyfloat_ty, llvm_anyfloat_ty, llvm_anyfloat_ty, llvm_anyint_ty>; +def int_amdgcn_swmmac_f32_16x16x32_bf16 : AMDGPUSWmmacIntrinsicIdx<llvm_anyint_ty, llvm_anyint_ty, llvm_anyfloat_ty, llvm_anyint_ty>; +def int_amdgcn_swmmac_f16_16x16x32_f16 : AMDGPUSWmmacIntrinsicIdx<llvm_anyfloat_ty, llvm_anyfloat_ty, llvm_anyfloat_ty, llvm_anyint_ty>; +def int_amdgcn_swmmac_bf16_16x16x32_bf16 : AMDGPUSWmmacIntrinsicIdx<llvm_anyint_ty, llvm_anyint_ty, llvm_anyint_ty, llvm_anyint_ty>; +def int_amdgcn_swmmac_i32_16x16x32_iu8 : AMDGPUSWmmacIntrinsicIUIdx<llvm_anyint_ty, llvm_anyint_ty, llvm_anyint_ty, llvm_anyint_ty>; +def int_amdgcn_swmmac_i32_16x16x32_iu4 : AMDGPUSWmmacIntrinsicIUIdx<llvm_anyint_ty, llvm_anyint_ty, llvm_anyint_ty, llvm_anyint_ty>; +def int_amdgcn_swmmac_i32_16x16x64_iu4 : AMDGPUSWmmacIntrinsicIUIdx<llvm_anyint_ty, llvm_anyint_ty, llvm_anyint_ty, llvm_anyint_ty>; +def int_amdgcn_swmmac_f32_16x16x32_fp8_fp8 : AMDGPUSWmmacIntrinsicIdx<llvm_anyint_ty, llvm_anyint_ty, llvm_anyfloat_ty, llvm_anyint_ty>; +def int_amdgcn_swmmac_f32_16x16x32_fp8_bf8 : AMDGPUSWmmacIntrinsicIdx<llvm_anyint_ty, llvm_anyint_ty, llvm_anyfloat_ty, llvm_anyint_ty>; +def int_amdgcn_swmmac_f32_16x16x32_bf8_fp8 : AMDGPUSWmmacIntrinsicIdx<llvm_anyint_ty, llvm_anyint_ty, llvm_anyfloat_ty, llvm_anyint_ty>; +def int_amdgcn_swmmac_f32_16x16x32_bf8_bf8 : AMDGPUSWmmacIntrinsicIdx<llvm_anyint_ty, llvm_anyint_ty, llvm_anyfloat_ty, llvm_anyint_ty>; + def int_amdgcn_global_atomic_ordered_add_b64 : AMDGPUAtomicRtn<llvm_i64_ty, global_ptr_ty>; def int_amdgcn_flat_atomic_fmin_num : AMDGPUAtomicRtn<llvm_anyfloat_ty>; @@ -2712,6 +2777,10 @@ class AMDGPULoadTr<LLVMType ptr_ty>: def int_amdgcn_global_load_tr : AMDGPULoadTr<global_ptr_ty>; +// i32 @llvm.amdgcn.wave.id() +def int_amdgcn_wave_id : + DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable]>; + //===----------------------------------------------------------------------===// // Deep learning intrinsics. //===----------------------------------------------------------------------===// diff --git a/llvm/include/llvm/Support/X86FoldTablesUtils.h b/llvm/include/llvm/Support/X86FoldTablesUtils.h index ed244febc38d..77d32cc7fb37 100644 --- a/llvm/include/llvm/Support/X86FoldTablesUtils.h +++ b/llvm/include/llvm/Support/X86FoldTablesUtils.h @@ -46,11 +46,12 @@ enum { // Broadcast type. // (stored in bits 12 - 14) TB_BCAST_TYPE_SHIFT = TB_ALIGN_SHIFT + 3, - TB_BCAST_D = 0 << TB_BCAST_TYPE_SHIFT, - TB_BCAST_Q = 1 << TB_BCAST_TYPE_SHIFT, - TB_BCAST_SS = 2 << TB_BCAST_TYPE_SHIFT, - TB_BCAST_SD = 3 << TB_BCAST_TYPE_SHIFT, - TB_BCAST_SH = 4 << TB_BCAST_TYPE_SHIFT, + TB_BCAST_W = 0 << TB_BCAST_TYPE_SHIFT, + TB_BCAST_D = 1 << TB_BCAST_TYPE_SHIFT, + TB_BCAST_Q = 2 << TB_BCAST_TYPE_SHIFT, + TB_BCAST_SS = 3 << TB_BCAST_TYPE_SHIFT, + TB_BCAST_SD = 4 << TB_BCAST_TYPE_SHIFT, + TB_BCAST_SH = 5 << TB_BCAST_TYPE_SHIFT, TB_BCAST_MASK = 0x7 << TB_BCAST_TYPE_SHIFT, // Unused bits 15-16 diff --git a/llvm/include/llvm/Target/TargetInstrPredicate.td b/llvm/include/llvm/Target/TargetInstrPredicate.td index 82c4c7b23a49..b5419cb9f386 100644 --- a/llvm/include/llvm/Target/TargetInstrPredicate.td +++ b/llvm/include/llvm/Target/TargetInstrPredicate.td @@ -152,6 +152,34 @@ class CheckImmOperand_s<int Index, string Value> : CheckOperandBase<Index> { string ImmVal = Value; } +// Check that the operand at position `Index` is less than `Imm`. +// If field `FunctionMapper` is a non-empty string, then function +// `FunctionMapper` is applied to the operand value, and the return value is then +// compared against `Imm`. +class CheckImmOperandLT<int Index, int Imm> : CheckOperandBase<Index> { + int ImmVal = Imm; +} + +// Check that the operand at position `Index` is greater than `Imm`. +// If field `FunctionMapper` is a non-empty string, then function +// `FunctionMapper` is applied to the operand value, and the return value is then +// compared against `Imm`. +class CheckImmOperandGT<int Index, int Imm> : CheckOperandBase<Index> { + int ImmVal = Imm; +} + +// Check that the operand at position `Index` is less than or equal to `Imm`. +// If field `FunctionMapper` is a non-empty string, then function +// `FunctionMapper` is applied to the operand value, and the return value is then +// compared against `Imm`. +class CheckImmOperandLE<int Index, int Imm> : CheckNot<CheckImmOperandGT<Index, Imm>>; + +// Check that the operand at position `Index` is greater than or equal to `Imm`. +// If field `FunctionMapper` is a non-empty string, then function +// `FunctionMapper` is applied to the operand value, and the return value is then +// compared against `Imm`. +class CheckImmOperandGE<int Index, int Imm> : CheckNot<CheckImmOperandLT<Index, Imm>>; + // Expands to a call to `FunctionMapper` if field `FunctionMapper` is set. // Otherwise, it expands to a CheckNot<CheckInvalidRegOperand<Index>>. class CheckRegOperandSimple<int Index> : CheckOperandBase<Index>; @@ -203,6 +231,12 @@ class CheckAll<list<MCInstPredicate> Sequence> class CheckAny<list<MCInstPredicate> Sequence> : CheckPredicateSequence<Sequence>; +// Check that the operand at position `Index` is in range [Start, End]. +// If field `FunctionMapper` is a non-empty string, then function +// `FunctionMapper` is applied to the operand value, and the return value is then +// compared against range [Start, End]. +class CheckImmOperandRange<int Index, int Start, int End> + : CheckAll<[CheckImmOperandGE<Index, Start>, CheckImmOperandLE<Index, End>]>; // Used to expand the body of a function predicate. See the definition of // TIIPredicate below. diff --git a/llvm/include/llvm/TargetParser/AArch64TargetParser.h b/llvm/include/llvm/TargetParser/AArch64TargetParser.h index 623fdc21ba65..6d82748d8004 100644 --- a/llvm/include/llvm/TargetParser/AArch64TargetParser.h +++ b/llvm/include/llvm/TargetParser/AArch64TargetParser.h @@ -813,7 +813,8 @@ struct CpuAlias { StringRef Name; }; -inline constexpr CpuAlias CpuAliases[] = {{"grace", "neoverse-v2"}}; +inline constexpr CpuAlias CpuAliases[] = {{"cobalt-100", "neoverse-n2"}, + {"grace", "neoverse-v2"}}; bool getExtensionFeatures( const AArch64::ExtensionBitset &Extensions, diff --git a/llvm/include/llvm/TargetParser/Triple.h b/llvm/include/llvm/TargetParser/Triple.h index 870dc75b1c1f..49ec8de9c528 100644 --- a/llvm/include/llvm/TargetParser/Triple.h +++ b/llvm/include/llvm/TargetParser/Triple.h @@ -1033,11 +1033,11 @@ public: isWindowsCygwinEnvironment() || isOHOSFamily(); } - /// Tests whether the target uses TLS Descriptor by default. + /// True if the target supports both general-dynamic and TLSDESC, and TLSDESC + /// is enabled by default. bool hasDefaultTLSDESC() const { // TODO: Improve check for other platforms, like Android, and RISC-V - // Note: This is currently only used on RISC-V. - return isOSBinFormatELF() && isAArch64(); + return false; } /// Tests whether the target uses -data-sections as default. diff --git a/llvm/lib/Analysis/BasicAliasAnalysis.cpp b/llvm/lib/Analysis/BasicAliasAnalysis.cpp index 3178e2d27816..1028b52a7912 100644 --- a/llvm/lib/Analysis/BasicAliasAnalysis.cpp +++ b/llvm/lib/Analysis/BasicAliasAnalysis.cpp @@ -89,7 +89,7 @@ bool BasicAAResult::invalidate(Function &Fn, const PreservedAnalyses &PA, // may be created without handles to some analyses and in that case don't // depend on them. if (Inv.invalidate<AssumptionAnalysis>(Fn, PA) || - (DT && Inv.invalidate<DominatorTreeAnalysis>(Fn, PA))) + (DT_ && Inv.invalidate<DominatorTreeAnalysis>(Fn, PA))) return true; // Otherwise this analysis result remains valid. @@ -1063,6 +1063,7 @@ AliasResult BasicAAResult::aliasGEP( : AliasResult::MayAlias; } + DominatorTree *DT = getDT(AAQI); DecomposedGEP DecompGEP1 = DecomposeGEPExpression(GEP1, DL, &AC, DT); DecomposedGEP DecompGEP2 = DecomposeGEPExpression(V2, DL, &AC, DT); @@ -1556,6 +1557,7 @@ AliasResult BasicAAResult::aliasCheck(const Value *V1, LocationSize V1Size, const Value *HintO1 = getUnderlyingObject(Hint1); const Value *HintO2 = getUnderlyingObject(Hint2); + DominatorTree *DT = getDT(AAQI); auto ValidAssumeForPtrContext = [&](const Value *Ptr) { if (const Instruction *PtrI = dyn_cast<Instruction>(Ptr)) { return isValidAssumeForContext(Assume, PtrI, DT, @@ -1735,7 +1737,7 @@ bool BasicAAResult::isValueEqualInPotentialCycles(const Value *V, if (!Inst || Inst->getParent()->isEntryBlock()) return true; - return isNotInCycle(Inst, DT, /*LI*/ nullptr); + return isNotInCycle(Inst, getDT(AAQI), /*LI*/ nullptr); } /// Computes the symbolic difference between two de-composed GEPs. diff --git a/llvm/lib/Analysis/Lint.cpp b/llvm/lib/Analysis/Lint.cpp index 1ebc593016bc..16635097d20a 100644 --- a/llvm/lib/Analysis/Lint.cpp +++ b/llvm/lib/Analysis/Lint.cpp @@ -657,11 +657,12 @@ Value *Lint::findValueImpl(Value *V, bool OffsetOk, BasicBlock::iterator BBI = L->getIterator(); BasicBlock *BB = L->getParent(); SmallPtrSet<BasicBlock *, 4> VisitedBlocks; + BatchAAResults BatchAA(*AA); for (;;) { if (!VisitedBlocks.insert(BB).second) break; if (Value *U = - FindAvailableLoadedValue(L, BB, BBI, DefMaxInstsToScan, AA)) + FindAvailableLoadedValue(L, BB, BBI, DefMaxInstsToScan, &BatchAA)) return findValueImpl(U, OffsetOk, Visited); if (BBI != BB->begin()) break; diff --git a/llvm/lib/Analysis/Loads.cpp b/llvm/lib/Analysis/Loads.cpp index 97d21db86abf..6bf0d2f56eb4 100644 --- a/llvm/lib/Analysis/Loads.cpp +++ b/llvm/lib/Analysis/Loads.cpp @@ -450,11 +450,10 @@ llvm::DefMaxInstsToScan("available-load-scan-limit", cl::init(6), cl::Hidden, "to scan backward from a given instruction, when searching for " "available loaded value")); -Value *llvm::FindAvailableLoadedValue(LoadInst *Load, - BasicBlock *ScanBB, +Value *llvm::FindAvailableLoadedValue(LoadInst *Load, BasicBlock *ScanBB, BasicBlock::iterator &ScanFrom, unsigned MaxInstsToScan, - AAResults *AA, bool *IsLoad, + BatchAAResults *AA, bool *IsLoad, unsigned *NumScanedInst) { // Don't CSE load that is volatile or anything stronger than unordered. if (!Load->isUnordered()) @@ -583,7 +582,7 @@ static Value *getAvailableLoadStore(Instruction *Inst, const Value *Ptr, Value *llvm::findAvailablePtrLoadStore( const MemoryLocation &Loc, Type *AccessTy, bool AtLeastAtomic, BasicBlock *ScanBB, BasicBlock::iterator &ScanFrom, unsigned MaxInstsToScan, - AAResults *AA, bool *IsLoadCSE, unsigned *NumScanedInst) { + BatchAAResults *AA, bool *IsLoadCSE, unsigned *NumScanedInst) { if (MaxInstsToScan == 0) MaxInstsToScan = ~0U; @@ -664,7 +663,7 @@ Value *llvm::findAvailablePtrLoadStore( return nullptr; } -Value *llvm::FindAvailableLoadedValue(LoadInst *Load, AAResults &AA, +Value *llvm::FindAvailableLoadedValue(LoadInst *Load, BatchAAResults &AA, bool *IsLoadCSE, unsigned MaxInstsToScan) { const DataLayout &DL = Load->getModule()->getDataLayout(); diff --git a/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/llvm/lib/Analysis/LoopAccessAnalysis.cpp index 7e67c9015282..dd6b88fee415 100644 --- a/llvm/lib/Analysis/LoopAccessAnalysis.cpp +++ b/llvm/lib/Analysis/LoopAccessAnalysis.cpp @@ -657,16 +657,18 @@ public: AccessAnalysis(Loop *TheLoop, AAResults *AA, LoopInfo *LI, MemoryDepChecker::DepCandidates &DA, - PredicatedScalarEvolution &PSE) - : TheLoop(TheLoop), BAA(*AA), AST(BAA), LI(LI), DepCands(DA), PSE(PSE) { + PredicatedScalarEvolution &PSE, + SmallPtrSetImpl<MDNode *> &LoopAliasScopes) + : TheLoop(TheLoop), BAA(*AA), AST(BAA), LI(LI), DepCands(DA), PSE(PSE), + LoopAliasScopes(LoopAliasScopes) { // We're analyzing dependences across loop iterations. BAA.enableCrossIterationMode(); } /// Register a load and whether it is only read from. void addLoad(MemoryLocation &Loc, Type *AccessTy, bool IsReadOnly) { - Value *Ptr = const_cast<Value*>(Loc.Ptr); - AST.add(Loc.getWithNewSize(LocationSize::beforeOrAfterPointer())); + Value *Ptr = const_cast<Value *>(Loc.Ptr); + AST.add(adjustLoc(Loc)); Accesses[MemAccessInfo(Ptr, false)].insert(AccessTy); if (IsReadOnly) ReadOnlyPtr.insert(Ptr); @@ -674,8 +676,8 @@ public: /// Register a store. void addStore(MemoryLocation &Loc, Type *AccessTy) { - Value *Ptr = const_cast<Value*>(Loc.Ptr); - AST.add(Loc.getWithNewSize(LocationSize::beforeOrAfterPointer())); + Value *Ptr = const_cast<Value *>(Loc.Ptr); + AST.add(adjustLoc(Loc)); Accesses[MemAccessInfo(Ptr, true)].insert(AccessTy); } @@ -731,6 +733,32 @@ public: private: typedef MapVector<MemAccessInfo, SmallSetVector<Type *, 1>> PtrAccessMap; + /// Adjust the MemoryLocation so that it represents accesses to this + /// location across all iterations, rather than a single one. + MemoryLocation adjustLoc(MemoryLocation Loc) const { + // The accessed location varies within the loop, but remains within the + // underlying object. + Loc.Size = LocationSize::beforeOrAfterPointer(); + Loc.AATags.Scope = adjustAliasScopeList(Loc.AATags.Scope); + Loc.AATags.NoAlias = adjustAliasScopeList(Loc.AATags.NoAlias); + return Loc; + } + + /// Drop alias scopes that are only valid within a single loop iteration. + MDNode *adjustAliasScopeList(MDNode *ScopeList) const { + if (!ScopeList) + return nullptr; + + // For the sake of simplicity, drop the whole scope list if any scope is + // iteration-local. + if (any_of(ScopeList->operands(), [&](Metadata *Scope) { + return LoopAliasScopes.contains(cast<MDNode>(Scope)); + })) + return nullptr; + + return ScopeList; + } + /// Go over all memory access and check whether runtime pointer checks /// are needed and build sets of dependency check candidates. void processMemAccesses(); @@ -775,6 +803,10 @@ private: PredicatedScalarEvolution &PSE; DenseMap<Value *, SmallVector<const Value *, 16>> UnderlyingObjects; + + /// Alias scopes that are declared inside the loop, and as such not valid + /// across iterations. + SmallPtrSetImpl<MDNode *> &LoopAliasScopes; }; } // end anonymous namespace @@ -2283,6 +2315,7 @@ void LoopAccessInfo::analyzeLoop(AAResults *AA, LoopInfo *LI, // Holds the Load and Store instructions. SmallVector<LoadInst *, 16> Loads; SmallVector<StoreInst *, 16> Stores; + SmallPtrSet<MDNode *, 8> LoopAliasScopes; // Holds all the different accesses in the loop. unsigned NumReads = 0; @@ -2326,6 +2359,11 @@ void LoopAccessInfo::analyzeLoop(AAResults *AA, LoopInfo *LI, if (HasComplexMemInst) continue; + // Record alias scopes defined inside the loop. + if (auto *Decl = dyn_cast<NoAliasScopeDeclInst>(&I)) + for (Metadata *Op : Decl->getScopeList()->operands()) + LoopAliasScopes.insert(cast<MDNode>(Op)); + // Many math library functions read the rounding mode. We will only // vectorize a loop if it contains known function calls that don't set // the flag. Therefore, it is safe to ignore this read from memory. @@ -2407,7 +2445,8 @@ void LoopAccessInfo::analyzeLoop(AAResults *AA, LoopInfo *LI, } MemoryDepChecker::DepCandidates DependentAccesses; - AccessAnalysis Accesses(TheLoop, AA, LI, DependentAccesses, *PSE); + AccessAnalysis Accesses(TheLoop, AA, LI, DependentAccesses, *PSE, + LoopAliasScopes); // Holds the analyzed pointers. We don't want to call getUnderlyingObjects // multiple times on the same object. If the ptr is accessed twice, once diff --git a/llvm/lib/Analysis/MemorySSAUpdater.cpp b/llvm/lib/Analysis/MemorySSAUpdater.cpp index e87ae7d71fff..aa550f0b6a7b 100644 --- a/llvm/lib/Analysis/MemorySSAUpdater.cpp +++ b/llvm/lib/Analysis/MemorySSAUpdater.cpp @@ -692,25 +692,9 @@ void MemorySSAUpdater::updateForClonedLoop(const LoopBlocksRPO &LoopBlocks, continue; // Determine incoming value and add it as incoming from IncBB. - if (MemoryUseOrDef *IncMUD = dyn_cast<MemoryUseOrDef>(IncomingAccess)) { - if (!MSSA->isLiveOnEntryDef(IncMUD)) { - Instruction *IncI = IncMUD->getMemoryInst(); - assert(IncI && "Found MemoryUseOrDef with no Instruction."); - if (Instruction *NewIncI = - cast_or_null<Instruction>(VMap.lookup(IncI))) { - IncMUD = MSSA->getMemoryAccess(NewIncI); - assert(IncMUD && - "MemoryUseOrDef cannot be null, all preds processed."); - } - } - NewPhi->addIncoming(IncMUD, IncBB); - } else { - MemoryPhi *IncPhi = cast<MemoryPhi>(IncomingAccess); - if (MemoryAccess *NewDefPhi = MPhiMap.lookup(IncPhi)) - NewPhi->addIncoming(NewDefPhi, IncBB); - else - NewPhi->addIncoming(IncPhi, IncBB); - } + NewPhi->addIncoming( + getNewDefiningAccessForClone(IncomingAccess, VMap, MPhiMap, MSSA), + IncBB); } if (auto *SingleAccess = onlySingleValue(NewPhi)) { MPhiMap[Phi] = SingleAccess; diff --git a/llvm/lib/CodeGen/BranchFolding.cpp b/llvm/lib/CodeGen/BranchFolding.cpp index a9f78358e57b..ecf7bc30913f 100644 --- a/llvm/lib/CodeGen/BranchFolding.cpp +++ b/llvm/lib/CodeGen/BranchFolding.cpp @@ -2048,8 +2048,10 @@ bool BranchFolder::HoistCommonCodeInSuccs(MachineBasicBlock *MBB) { FBB->erase(FBB->begin(), FIB); if (UpdateLiveIns) { - recomputeLiveIns(*TBB); - recomputeLiveIns(*FBB); + bool anyChange = false; + do { + anyChange = recomputeLiveIns(*TBB) || recomputeLiveIns(*FBB); + } while (anyChange); } ++NumHoist; diff --git a/llvm/lib/ProfileData/Coverage/CoverageMapping.cpp b/llvm/lib/ProfileData/Coverage/CoverageMapping.cpp index da8e1d87319d..a357b4cb4921 100644 --- a/llvm/lib/ProfileData/Coverage/CoverageMapping.cpp +++ b/llvm/lib/ProfileData/Coverage/CoverageMapping.cpp @@ -14,6 +14,7 @@ #include "llvm/ProfileData/Coverage/CoverageMapping.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallBitVector.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringExtras.h" @@ -583,6 +584,160 @@ static unsigned getMaxBitmapSize(const CounterMappingContext &Ctx, return MaxBitmapID + (SizeInBits / CHAR_BIT); } +namespace { + +/// Collect Decisions, Branchs, and Expansions and associate them. +class MCDCDecisionRecorder { +private: + /// This holds the DecisionRegion and MCDCBranches under it. + /// Also traverses Expansion(s). + /// The Decision has the number of MCDCBranches and will complete + /// when it is filled with unique ConditionID of MCDCBranches. + struct DecisionRecord { + const CounterMappingRegion *DecisionRegion; + + /// They are reflected from DecisionRegion for convenience. + LineColPair DecisionStartLoc; + LineColPair DecisionEndLoc; + + /// This is passed to `MCDCRecordProcessor`, so this should be compatible + /// to`ArrayRef<const CounterMappingRegion *>`. + SmallVector<const CounterMappingRegion *> MCDCBranches; + + /// IDs that are stored in MCDCBranches + /// Complete when all IDs (1 to NumConditions) are met. + DenseSet<CounterMappingRegion::MCDCConditionID> ConditionIDs; + + /// Set of IDs of Expansion(s) that are relevant to DecisionRegion + /// and its children (via expansions). + /// FileID pointed by ExpandedFileID is dedicated to the expansion, so + /// the location in the expansion doesn't matter. + DenseSet<unsigned> ExpandedFileIDs; + + DecisionRecord(const CounterMappingRegion &Decision) + : DecisionRegion(&Decision), DecisionStartLoc(Decision.startLoc()), + DecisionEndLoc(Decision.endLoc()) { + assert(Decision.Kind == CounterMappingRegion::MCDCDecisionRegion); + } + + /// Determine whether DecisionRecord dominates `R`. + bool dominates(const CounterMappingRegion &R) const { + // Determine whether `R` is included in `DecisionRegion`. + if (R.FileID == DecisionRegion->FileID && + R.startLoc() >= DecisionStartLoc && R.endLoc() <= DecisionEndLoc) + return true; + + // Determine whether `R` is pointed by any of Expansions. + return ExpandedFileIDs.contains(R.FileID); + } + + enum Result { + NotProcessed = 0, /// Irrelevant to this Decision + Processed, /// Added to this Decision + Completed, /// Added and filled this Decision + }; + + /// Add Branch into the Decision + /// \param Branch expects MCDCBranchRegion + /// \returns NotProcessed/Processed/Completed + Result addBranch(const CounterMappingRegion &Branch) { + assert(Branch.Kind == CounterMappingRegion::MCDCBranchRegion); + + auto ConditionID = Branch.MCDCParams.ID; + assert(ConditionID > 0 && "ConditionID should begin with 1"); + + if (ConditionIDs.contains(ConditionID) || + ConditionID > DecisionRegion->MCDCParams.NumConditions) + return NotProcessed; + + if (!this->dominates(Branch)) + return NotProcessed; + + assert(MCDCBranches.size() < DecisionRegion->MCDCParams.NumConditions); + + // Put `ID=1` in front of `MCDCBranches` for convenience + // even if `MCDCBranches` is not topological. + if (ConditionID == 1) + MCDCBranches.insert(MCDCBranches.begin(), &Branch); + else + MCDCBranches.push_back(&Branch); + + // Mark `ID` as `assigned`. + ConditionIDs.insert(ConditionID); + + // `Completed` when `MCDCBranches` is full + return (MCDCBranches.size() == DecisionRegion->MCDCParams.NumConditions + ? Completed + : Processed); + } + + /// Record Expansion if it is relevant to this Decision. + /// Each `Expansion` may nest. + /// \returns true if recorded. + bool recordExpansion(const CounterMappingRegion &Expansion) { + if (!this->dominates(Expansion)) + return false; + + ExpandedFileIDs.insert(Expansion.ExpandedFileID); + return true; + } + }; + +private: + /// Decisions in progress + /// DecisionRecord is added for each MCDCDecisionRegion. + /// DecisionRecord is removed when Decision is completed. + SmallVector<DecisionRecord> Decisions; + +public: + ~MCDCDecisionRecorder() { + assert(Decisions.empty() && "All Decisions have not been resolved"); + } + + /// Register Region and start recording. + void registerDecision(const CounterMappingRegion &Decision) { + Decisions.emplace_back(Decision); + } + + void recordExpansion(const CounterMappingRegion &Expansion) { + any_of(Decisions, [&Expansion](auto &Decision) { + return Decision.recordExpansion(Expansion); + }); + } + + using DecisionAndBranches = + std::pair<const CounterMappingRegion *, /// Decision + SmallVector<const CounterMappingRegion *> /// Branches + >; + + /// Add MCDCBranchRegion to DecisionRecord. + /// \param Branch to be processed + /// \returns DecisionsAndBranches if DecisionRecord completed. + /// Or returns nullopt. + std::optional<DecisionAndBranches> + processBranch(const CounterMappingRegion &Branch) { + // Seek each Decision and apply Region to it. + for (auto DecisionIter = Decisions.begin(), DecisionEnd = Decisions.end(); + DecisionIter != DecisionEnd; ++DecisionIter) + switch (DecisionIter->addBranch(Branch)) { + case DecisionRecord::NotProcessed: + continue; + case DecisionRecord::Processed: + return std::nullopt; + case DecisionRecord::Completed: + DecisionAndBranches Result = + std::make_pair(DecisionIter->DecisionRegion, + std::move(DecisionIter->MCDCBranches)); + Decisions.erase(DecisionIter); // No longer used. + return Result; + } + + llvm_unreachable("Branch not found in Decisions"); + } +}; + +} // namespace + Error CoverageMapping::loadFunctionRecord( const CoverageMappingRecord &Record, IndexedInstrProfReader &ProfileReader) { @@ -639,18 +794,13 @@ Error CoverageMapping::loadFunctionRecord( Record.MappingRegions[0].Count.isZero() && Counts[0] > 0) return Error::success(); - unsigned NumConds = 0; - const CounterMappingRegion *MCDCDecision; - std::vector<const CounterMappingRegion *> MCDCBranches; - + MCDCDecisionRecorder MCDCDecisions; FunctionRecord Function(OrigFuncName, Record.Filenames); for (const auto &Region : Record.MappingRegions) { - // If an MCDCDecisionRegion is seen, track the BranchRegions that follow - // it according to Region.NumConditions. + // MCDCDecisionRegion should be handled first since it overlaps with + // others inside. if (Region.Kind == CounterMappingRegion::MCDCDecisionRegion) { - assert(NumConds == 0); - MCDCDecision = &Region; - NumConds = Region.MCDCParams.NumConditions; + MCDCDecisions.registerDecision(Region); continue; } Expected<int64_t> ExecutionCount = Ctx.evaluate(Region.Count); @@ -665,43 +815,47 @@ Error CoverageMapping::loadFunctionRecord( } Function.pushRegion(Region, *ExecutionCount, *AltExecutionCount); - // If a MCDCDecisionRegion was seen, store the BranchRegions that - // correspond to it in a vector, according to the number of conditions - // recorded for the region (tracked by NumConds). - if (NumConds > 0 && Region.Kind == CounterMappingRegion::MCDCBranchRegion) { - MCDCBranches.push_back(&Region); - - // As we move through all of the MCDCBranchRegions that follow the - // MCDCDecisionRegion, decrement NumConds to make sure we account for - // them all before we calculate the bitmap of executed test vectors. - if (--NumConds == 0) { - // Evaluating the test vector bitmap for the decision region entails - // calculating precisely what bits are pertinent to this region alone. - // This is calculated based on the recorded offset into the global - // profile bitmap; the length is calculated based on the recorded - // number of conditions. - Expected<BitVector> ExecutedTestVectorBitmap = - Ctx.evaluateBitmap(MCDCDecision); - if (auto E = ExecutedTestVectorBitmap.takeError()) { - consumeError(std::move(E)); - return Error::success(); - } + // Record ExpansionRegion. + if (Region.Kind == CounterMappingRegion::ExpansionRegion) { + MCDCDecisions.recordExpansion(Region); + continue; + } - // Since the bitmap identifies the executed test vectors for an MC/DC - // DecisionRegion, all of the information is now available to process. - // This is where the bulk of the MC/DC progressing takes place. - Expected<MCDCRecord> Record = Ctx.evaluateMCDCRegion( - *MCDCDecision, *ExecutedTestVectorBitmap, MCDCBranches); - if (auto E = Record.takeError()) { - consumeError(std::move(E)); - return Error::success(); - } + // Do nothing unless MCDCBranchRegion. + if (Region.Kind != CounterMappingRegion::MCDCBranchRegion) + continue; - // Save the MC/DC Record so that it can be visualized later. - Function.pushMCDCRecord(*Record); - MCDCBranches.clear(); - } + auto Result = MCDCDecisions.processBranch(Region); + if (!Result) // Any Decision doesn't complete. + continue; + + auto MCDCDecision = Result->first; + auto &MCDCBranches = Result->second; + + // Evaluating the test vector bitmap for the decision region entails + // calculating precisely what bits are pertinent to this region alone. + // This is calculated based on the recorded offset into the global + // profile bitmap; the length is calculated based on the recorded + // number of conditions. + Expected<BitVector> ExecutedTestVectorBitmap = + Ctx.evaluateBitmap(MCDCDecision); + if (auto E = ExecutedTestVectorBitmap.takeError()) { + consumeError(std::move(E)); + return Error::success(); } + + // Since the bitmap identifies the executed test vectors for an MC/DC + // DecisionRegion, all of the information is now available to process. + // This is where the bulk of the MC/DC progressing takes place. + Expected<MCDCRecord> Record = Ctx.evaluateMCDCRegion( + *MCDCDecision, *ExecutedTestVectorBitmap, MCDCBranches); + if (auto E = Record.takeError()) { + consumeError(std::move(E)); + return Error::success(); + } + + // Save the MC/DC Record so that it can be visualized later. + Function.pushMCDCRecord(*Record); } // Don't create records for (filenames, function) pairs we've already seen. diff --git a/llvm/lib/ProfileData/Coverage/CoverageMappingWriter.cpp b/llvm/lib/ProfileData/Coverage/CoverageMappingWriter.cpp index 1c7d8a8909c4..27727f216b05 100644 --- a/llvm/lib/ProfileData/Coverage/CoverageMappingWriter.cpp +++ b/llvm/lib/ProfileData/Coverage/CoverageMappingWriter.cpp @@ -167,7 +167,15 @@ void CoverageMappingWriter::write(raw_ostream &OS) { return LHS.FileID < RHS.FileID; if (LHS.startLoc() != RHS.startLoc()) return LHS.startLoc() < RHS.startLoc(); - return LHS.Kind < RHS.Kind; + + // Put `Decision` before `Expansion`. + auto getKindKey = [](CounterMappingRegion::RegionKind Kind) { + return (Kind == CounterMappingRegion::MCDCDecisionRegion + ? 2 * CounterMappingRegion::ExpansionRegion - 1 + : 2 * Kind); + }; + + return getKindKey(LHS.Kind) < getKindKey(RHS.Kind); }); // Write out the fileid -> filename mapping. diff --git a/llvm/lib/Support/RISCVISAInfo.cpp b/llvm/lib/Support/RISCVISAInfo.cpp index 3c02492e99f1..db2e4ca92ae4 100644 --- a/llvm/lib/Support/RISCVISAInfo.cpp +++ b/llvm/lib/Support/RISCVISAInfo.cpp @@ -128,6 +128,7 @@ static const RISCVSupportedExtension SupportedExtensions[] = { {"zicclsm", {1, 0}}, {"ziccrse", {1, 0}}, {"zicntr", {2, 0}}, + {"zicond", {1, 0}}, {"zicsr", {2, 0}}, {"zifencei", {2, 0}}, {"zihintntl", {1, 0}}, @@ -200,8 +201,6 @@ static const RISCVSupportedExtension SupportedExperimentalExtensions[] = { {"zicfilp", {0, 4}}, {"zicfiss", {0, 4}}, - {"zicond", {1, 0}}, - {"zimop", {0, 1}}, {"ztso", {0, 1}}, diff --git a/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp b/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp index 352c61d48e2f..1af064b6de3c 100644 --- a/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp +++ b/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp @@ -1544,6 +1544,12 @@ bool AArch64ExpandPseudo::expandMI(MachineBasicBlock &MBB, NextMBBI = MBB.end(); // The NextMBBI iterator is invalidated. return true; } + case AArch64::COALESCER_BARRIER_FPR16: + case AArch64::COALESCER_BARRIER_FPR32: + case AArch64::COALESCER_BARRIER_FPR64: + case AArch64::COALESCER_BARRIER_FPR128: + MI.eraseFromParent(); + return true; case AArch64::LD1B_2Z_IMM_PSEUDO: return expandMultiVecPseudo( MBB, MBBI, AArch64::ZPR2RegClass, AArch64::ZPR2StridedRegClass, diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp index d55deec97600..732e787d2a32 100644 --- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp @@ -4339,8 +4339,10 @@ AArch64FrameLowering::inlineStackProbeLoopExactMultiple( ExitMBB->transferSuccessorsAndUpdatePHIs(&MBB); MBB.addSuccessor(LoopMBB); // Update liveins. - recomputeLiveIns(*LoopMBB); - recomputeLiveIns(*ExitMBB); + bool anyChange = false; + do { + anyChange = recomputeLiveIns(*ExitMBB) || recomputeLiveIns(*LoopMBB); + } while (anyChange); return ExitMBB->begin(); } diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 332fb3765528..e97f5e322014 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -2375,6 +2375,7 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const { switch ((AArch64ISD::NodeType)Opcode) { case AArch64ISD::FIRST_NUMBER: break; + MAKE_CASE(AArch64ISD::COALESCER_BARRIER) MAKE_CASE(AArch64ISD::SMSTART) MAKE_CASE(AArch64ISD::SMSTOP) MAKE_CASE(AArch64ISD::RESTORE_ZA) @@ -7154,13 +7155,18 @@ void AArch64TargetLowering::saveVarArgRegisters(CCState &CCInfo, } } +static bool isPassedInFPR(EVT VT) { + return VT.isFixedLengthVector() || + (VT.isFloatingPoint() && !VT.isScalableVector()); +} + /// LowerCallResult - Lower the result values of a call into the /// appropriate copies out of appropriate physical registers. SDValue AArch64TargetLowering::LowerCallResult( SDValue Chain, SDValue InGlue, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<CCValAssign> &RVLocs, const SDLoc &DL, SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals, bool isThisReturn, - SDValue ThisVal) const { + SDValue ThisVal, bool RequiresSMChange) const { DenseMap<unsigned, SDValue> CopiedRegs; // Copy all of the result registers out of their specified physreg. for (unsigned i = 0; i != RVLocs.size(); ++i) { @@ -7205,6 +7211,10 @@ SDValue AArch64TargetLowering::LowerCallResult( break; } + if (RequiresSMChange && isPassedInFPR(VA.getValVT())) + Val = DAG.getNode(AArch64ISD::COALESCER_BARRIER, DL, Val.getValueType(), + Val); + InVals.push_back(Val); } @@ -7915,6 +7925,12 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI, return ArgReg.Reg == VA.getLocReg(); }); } else { + // Add an extra level of indirection for streaming mode changes by + // using a pseudo copy node that cannot be rematerialised between a + // smstart/smstop and the call by the simple register coalescer. + if (RequiresSMChange && isPassedInFPR(Arg.getValueType())) + Arg = DAG.getNode(AArch64ISD::COALESCER_BARRIER, DL, + Arg.getValueType(), Arg); RegsToPass.emplace_back(VA.getLocReg(), Arg); RegsUsed.insert(VA.getLocReg()); const TargetOptions &Options = DAG.getTarget().Options; @@ -8151,9 +8167,9 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI, // Handle result values, copying them out of physregs into vregs that we // return. - SDValue Result = LowerCallResult(Chain, InGlue, CallConv, IsVarArg, RVLocs, - DL, DAG, InVals, IsThisReturn, - IsThisReturn ? OutVals[0] : SDValue()); + SDValue Result = LowerCallResult( + Chain, InGlue, CallConv, IsVarArg, RVLocs, DL, DAG, InVals, IsThisReturn, + IsThisReturn ? OutVals[0] : SDValue(), RequiresSMChange); if (!Ins.empty()) InGlue = Result.getValue(Result->getNumValues() - 1); @@ -26899,7 +26915,7 @@ bool AArch64TargetLowering::isComplexDeinterleavingOperationSupported( return false; // If the vector is scalable, SVE is enabled, implying support for complex - // numbers. Otherwirse, we need to ensure complex number support is avaialble + // numbers. Otherwise, we need to ensure complex number support is available if (!VTy->isScalableTy() && !Subtarget->hasComplxNum()) return false; @@ -26915,7 +26931,7 @@ bool AArch64TargetLowering::isComplexDeinterleavingOperationSupported( !llvm::isPowerOf2_32(VTyWidth)) return false; - if (ScalarTy->isIntegerTy() && Subtarget->hasSVE2()) { + if (ScalarTy->isIntegerTy() && Subtarget->hasSVE2() && VTy->isScalableTy()) { unsigned ScalarWidth = ScalarTy->getScalarSizeInBits(); return 8 <= ScalarWidth && ScalarWidth <= 64; } diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h index 6505931e17e1..541a810fb5cb 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h @@ -58,6 +58,8 @@ enum NodeType : unsigned { CALL_BTI, // Function call followed by a BTI instruction. + COALESCER_BARRIER, + SMSTART, SMSTOP, RESTORE_ZA, @@ -1026,7 +1028,7 @@ private: const SmallVectorImpl<CCValAssign> &RVLocs, const SDLoc &DL, SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals, bool isThisReturn, - SDValue ThisVal) const; + SDValue ThisVal, bool RequiresSMChange) const; SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const; diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp index 2e8d8c63d6be..9b4bb7c88bc8 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -4098,16 +4098,6 @@ AArch64InstrInfo::getLdStOffsetOp(const MachineInstr &MI) { return MI.getOperand(Idx); } -const MachineOperand & -AArch64InstrInfo::getLdStAmountOp(const MachineInstr &MI) { - switch (MI.getOpcode()) { - default: - llvm_unreachable("Unexpected opcode"); - case AArch64::LDRBBroX: - return MI.getOperand(4); - } -} - static const TargetRegisterClass *getRegClass(const MachineInstr &MI, Register Reg) { if (MI.getParent() == nullptr) @@ -9597,9 +9587,13 @@ AArch64InstrInfo::probedStackAlloc(MachineBasicBlock::iterator MBBI, // Update liveins. if (MF.getRegInfo().reservedRegsFrozen()) { - recomputeLiveIns(*LoopTestMBB); - recomputeLiveIns(*LoopBodyMBB); - recomputeLiveIns(*ExitMBB); + bool anyChange = false; + do { + anyChange = recomputeLiveIns(*ExitMBB) || + recomputeLiveIns(*LoopBodyMBB) || + recomputeLiveIns(*LoopTestMBB); + } while (anyChange); + ; } return ExitMBB->begin(); diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.h b/llvm/lib/Target/AArch64/AArch64InstrInfo.h index db24a19fe5f8..6526f6740747 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.h +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.h @@ -111,9 +111,6 @@ public: /// Returns the immediate offset operator of a load/store. static const MachineOperand &getLdStOffsetOp(const MachineInstr &MI); - /// Returns the shift amount operator of a load/store. - static const MachineOperand &getLdStAmountOp(const MachineInstr &MI); - /// Returns whether the instruction is FP or NEON. static bool isFpOrNEON(const MachineInstr &MI); diff --git a/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp b/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp index e90b8a8ca7ac..926a89466255 100644 --- a/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp +++ b/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp @@ -62,8 +62,6 @@ STATISTIC(NumUnscaledPairCreated, "Number of load/store from unscaled generated"); STATISTIC(NumZeroStoresPromoted, "Number of narrow zero stores promoted"); STATISTIC(NumLoadsFromStoresPromoted, "Number of loads from stores promoted"); -STATISTIC(NumConstOffsetFolded, - "Number of const offset of index address folded"); DEBUG_COUNTER(RegRenamingCounter, DEBUG_TYPE "-reg-renaming", "Controls which pairs are considered for renaming"); @@ -77,11 +75,6 @@ static cl::opt<unsigned> LdStLimit("aarch64-load-store-scan-limit", static cl::opt<unsigned> UpdateLimit("aarch64-update-scan-limit", cl::init(100), cl::Hidden); -// The LdStConstLimit limits how far we search for const offset instructions -// when we form index address load/store instructions. -static cl::opt<unsigned> LdStConstLimit("aarch64-load-store-const-scan-limit", - cl::init(10), cl::Hidden); - // Enable register renaming to find additional store pairing opportunities. static cl::opt<bool> EnableRenaming("aarch64-load-store-renaming", cl::init(true), cl::Hidden); @@ -178,13 +171,6 @@ struct AArch64LoadStoreOpt : public MachineFunctionPass { findMatchingUpdateInsnForward(MachineBasicBlock::iterator I, int UnscaledOffset, unsigned Limit); - // Scan the instruction list to find a register assigned with a const - // value that can be combined with the current instruction (a load or store) - // using base addressing with writeback. Scan forwards. - MachineBasicBlock::iterator - findMatchingConstOffsetBackward(MachineBasicBlock::iterator I, unsigned Limit, - unsigned &Offset); - // Scan the instruction list to find a base register update that can // be combined with the current instruction (a load or store) using // pre or post indexed addressing with writeback. Scan backwards. @@ -196,19 +182,11 @@ struct AArch64LoadStoreOpt : public MachineFunctionPass { bool isMatchingUpdateInsn(MachineInstr &MemMI, MachineInstr &MI, unsigned BaseReg, int Offset); - bool isMatchingMovConstInsn(MachineInstr &MemMI, MachineInstr &MI, - unsigned IndexReg, unsigned &Offset); - // Merge a pre- or post-index base register update into a ld/st instruction. MachineBasicBlock::iterator mergeUpdateInsn(MachineBasicBlock::iterator I, MachineBasicBlock::iterator Update, bool IsPreIdx); - MachineBasicBlock::iterator - mergeConstOffsetInsn(MachineBasicBlock::iterator I, - MachineBasicBlock::iterator Update, unsigned Offset, - int Scale); - // Find and merge zero store instructions. bool tryToMergeZeroStInst(MachineBasicBlock::iterator &MBBI); @@ -221,9 +199,6 @@ struct AArch64LoadStoreOpt : public MachineFunctionPass { // Find and merge a base register updates before or after a ld/st instruction. bool tryToMergeLdStUpdate(MachineBasicBlock::iterator &MBBI); - // Find and merge a index ldr/st instructions into a base ld/st instruction. - bool tryToMergeIndexLdSt(MachineBasicBlock::iterator &MBBI, int Scale); - bool optimizeBlock(MachineBasicBlock &MBB, bool EnableNarrowZeroStOpt); bool runOnMachineFunction(MachineFunction &Fn) override; @@ -506,16 +481,6 @@ static unsigned getPreIndexedOpcode(unsigned Opc) { } } -static unsigned getBaseAddressOpcode(unsigned Opc) { - // TODO: Add more index address loads/stores. - switch (Opc) { - default: - llvm_unreachable("Opcode has no base address equivalent!"); - case AArch64::LDRBBroX: - return AArch64::LDRBBui; - } -} - static unsigned getPostIndexedOpcode(unsigned Opc) { switch (Opc) { default: @@ -757,20 +722,6 @@ static bool isMergeableLdStUpdate(MachineInstr &MI) { } } -// Make sure this is a reg+reg Ld/St -static bool isMergeableIndexLdSt(MachineInstr &MI, int &Scale) { - unsigned Opc = MI.getOpcode(); - switch (Opc) { - default: - return false; - // Scaled instructions. - // TODO: Add more index address loads/stores. - case AArch64::LDRBBroX: - Scale = 1; - return true; - } -} - static bool isRewritableImplicitDef(unsigned Opc) { switch (Opc) { default: @@ -2097,63 +2048,6 @@ AArch64LoadStoreOpt::mergeUpdateInsn(MachineBasicBlock::iterator I, return NextI; } -MachineBasicBlock::iterator -AArch64LoadStoreOpt::mergeConstOffsetInsn(MachineBasicBlock::iterator I, - MachineBasicBlock::iterator Update, - unsigned Offset, int Scale) { - assert((Update->getOpcode() == AArch64::MOVKWi) && - "Unexpected const mov instruction to merge!"); - MachineBasicBlock::iterator E = I->getParent()->end(); - MachineBasicBlock::iterator NextI = next_nodbg(I, E); - MachineBasicBlock::iterator PrevI = prev_nodbg(Update, E); - MachineInstr &MemMI = *I; - unsigned Mask = (1 << 12) * Scale - 1; - unsigned Low = Offset & Mask; - unsigned High = Offset - Low; - Register BaseReg = AArch64InstrInfo::getLdStBaseOp(MemMI).getReg(); - Register IndexReg = AArch64InstrInfo::getLdStOffsetOp(MemMI).getReg(); - MachineInstrBuilder AddMIB, MemMIB; - - // Add IndexReg, BaseReg, High (the BaseReg may be SP) - AddMIB = - BuildMI(*I->getParent(), I, I->getDebugLoc(), TII->get(AArch64::ADDXri)) - .addDef(IndexReg) - .addUse(BaseReg) - .addImm(High >> 12) // shifted value - .addImm(12); // shift 12 - (void)AddMIB; - // Ld/St DestReg, IndexReg, Imm12 - unsigned NewOpc = getBaseAddressOpcode(I->getOpcode()); - MemMIB = BuildMI(*I->getParent(), I, I->getDebugLoc(), TII->get(NewOpc)) - .add(getLdStRegOp(MemMI)) - .add(AArch64InstrInfo::getLdStOffsetOp(MemMI)) - .addImm(Low / Scale) - .setMemRefs(I->memoperands()) - .setMIFlags(I->mergeFlagsWith(*Update)); - (void)MemMIB; - - ++NumConstOffsetFolded; - LLVM_DEBUG(dbgs() << "Creating base address load/store.\n"); - LLVM_DEBUG(dbgs() << " Replacing instructions:\n "); - LLVM_DEBUG(PrevI->print(dbgs())); - LLVM_DEBUG(dbgs() << " "); - LLVM_DEBUG(Update->print(dbgs())); - LLVM_DEBUG(dbgs() << " "); - LLVM_DEBUG(I->print(dbgs())); - LLVM_DEBUG(dbgs() << " with instruction:\n "); - LLVM_DEBUG(((MachineInstr *)AddMIB)->print(dbgs())); - LLVM_DEBUG(dbgs() << " "); - LLVM_DEBUG(((MachineInstr *)MemMIB)->print(dbgs())); - LLVM_DEBUG(dbgs() << "\n"); - - // Erase the old instructions for the block. - I->eraseFromParent(); - PrevI->eraseFromParent(); - Update->eraseFromParent(); - - return NextI; -} - bool AArch64LoadStoreOpt::isMatchingUpdateInsn(MachineInstr &MemMI, MachineInstr &MI, unsigned BaseReg, int Offset) { @@ -2201,31 +2095,6 @@ bool AArch64LoadStoreOpt::isMatchingUpdateInsn(MachineInstr &MemMI, return false; } -bool AArch64LoadStoreOpt::isMatchingMovConstInsn(MachineInstr &MemMI, - MachineInstr &MI, - unsigned IndexReg, - unsigned &Offset) { - // The update instruction source and destination register must be the - // same as the load/store index register. - if (MI.getOpcode() == AArch64::MOVKWi && - TRI->isSuperOrSubRegisterEq(IndexReg, MI.getOperand(1).getReg())) { - - // movz + movk hold a large offset of a Ld/St instruction. - MachineBasicBlock::iterator B = MI.getParent()->begin(); - MachineBasicBlock::iterator MBBI = &MI; - MBBI = prev_nodbg(MBBI, B); - MachineInstr &MovzMI = *MBBI; - if (MovzMI.getOpcode() == AArch64::MOVZWi) { - unsigned Low = MovzMI.getOperand(1).getImm(); - unsigned High = MI.getOperand(2).getImm() << MI.getOperand(3).getImm(); - Offset = High + Low; - // 12-bit optionally shifted immediates are legal for adds. - return Offset >> 24 == 0; - } - } - return false; -} - MachineBasicBlock::iterator AArch64LoadStoreOpt::findMatchingUpdateInsnForward( MachineBasicBlock::iterator I, int UnscaledOffset, unsigned Limit) { MachineBasicBlock::iterator E = I->getParent()->end(); @@ -2381,60 +2250,6 @@ MachineBasicBlock::iterator AArch64LoadStoreOpt::findMatchingUpdateInsnBackward( return E; } -MachineBasicBlock::iterator -AArch64LoadStoreOpt::findMatchingConstOffsetBackward( - MachineBasicBlock::iterator I, unsigned Limit, unsigned &Offset) { - MachineBasicBlock::iterator B = I->getParent()->begin(); - MachineBasicBlock::iterator E = I->getParent()->end(); - MachineInstr &MemMI = *I; - MachineBasicBlock::iterator MBBI = I; - - // If the load is the first instruction in the block, there's obviously - // not any matching load or store. - if (MBBI == B) - return E; - - // Make sure the IndexReg is killed and the shift amount is zero. - // TODO: Relex this restriction to extend, simplify processing now. - if (!AArch64InstrInfo::getLdStOffsetOp(MemMI).isKill() || - !AArch64InstrInfo::getLdStAmountOp(MemMI).isImm() || - (AArch64InstrInfo::getLdStAmountOp(MemMI).getImm() != 0)) - return E; - - Register IndexReg = AArch64InstrInfo::getLdStOffsetOp(MemMI).getReg(); - - // Track which register units have been modified and used between the first - // insn (inclusive) and the second insn. - ModifiedRegUnits.clear(); - UsedRegUnits.clear(); - unsigned Count = 0; - do { - MBBI = prev_nodbg(MBBI, B); - MachineInstr &MI = *MBBI; - - // Don't count transient instructions towards the search limit since there - // may be different numbers of them if e.g. debug information is present. - if (!MI.isTransient()) - ++Count; - - // If we found a match, return it. - if (isMatchingMovConstInsn(*I, MI, IndexReg, Offset)) { - return MBBI; - } - - // Update the status of what the instruction clobbered and used. - LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits, TRI); - - // Otherwise, if the index register is used or modified, we have no match, - // so return early. - if (!ModifiedRegUnits.available(IndexReg) || - !UsedRegUnits.available(IndexReg)) - return E; - - } while (MBBI != B && Count < Limit); - return E; -} - bool AArch64LoadStoreOpt::tryToPromoteLoadFromStore( MachineBasicBlock::iterator &MBBI) { MachineInstr &MI = *MBBI; @@ -2619,34 +2434,6 @@ bool AArch64LoadStoreOpt::tryToMergeLdStUpdate return false; } -bool AArch64LoadStoreOpt::tryToMergeIndexLdSt(MachineBasicBlock::iterator &MBBI, - int Scale) { - MachineInstr &MI = *MBBI; - MachineBasicBlock::iterator E = MI.getParent()->end(); - MachineBasicBlock::iterator Update; - - // Don't know how to handle unscaled pre/post-index versions below, so bail. - if (TII->hasUnscaledLdStOffset(MI.getOpcode())) - return false; - - // Look back to try to find a const offset for index LdSt instruction. For - // example, - // mov x8, #LargeImm ; = a * (1<<12) + imm12 - // ldr x1, [x0, x8] - // merged into: - // add x8, x0, a * (1<<12) - // ldr x1, [x8, imm12] - unsigned Offset; - Update = findMatchingConstOffsetBackward(MBBI, LdStConstLimit, Offset); - if (Update != E && (Offset & (Scale - 1)) == 0) { - // Merge the imm12 into the ld/st. - MBBI = mergeConstOffsetInsn(MBBI, Update, Offset, Scale); - return true; - } - - return false; -} - bool AArch64LoadStoreOpt::optimizeBlock(MachineBasicBlock &MBB, bool EnableNarrowZeroStOpt) { @@ -2725,22 +2512,6 @@ bool AArch64LoadStoreOpt::optimizeBlock(MachineBasicBlock &MBB, ++MBBI; } - // 5) Find a register assigned with a const value that can be combined with - // into the load or store. e.g., - // mov x8, #LargeImm ; = a * (1<<12) + imm12 - // ldr x1, [x0, x8] - // ; becomes - // add x8, x0, a * (1<<12) - // ldr x1, [x8, imm12] - for (MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); - MBBI != E;) { - int Scale; - if (isMergeableIndexLdSt(*MBBI, Scale) && tryToMergeIndexLdSt(MBBI, Scale)) - Modified = true; - else - ++MBBI; - } - return Modified; } diff --git a/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp b/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp index ea9882160d6f..f86e6947c9cd 100644 --- a/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp @@ -1015,6 +1015,8 @@ bool AArch64RegisterInfo::shouldCoalesce( MachineInstr *MI, const TargetRegisterClass *SrcRC, unsigned SubReg, const TargetRegisterClass *DstRC, unsigned DstSubReg, const TargetRegisterClass *NewRC, LiveIntervals &LIS) const { + MachineRegisterInfo &MRI = MI->getMF()->getRegInfo(); + if (MI->isCopy() && ((DstRC->getID() == AArch64::GPR64RegClassID) || (DstRC->getID() == AArch64::GPR64commonRegClassID)) && @@ -1023,5 +1025,38 @@ bool AArch64RegisterInfo::shouldCoalesce( // which implements a 32 to 64 bit zero extension // which relies on the upper 32 bits being zeroed. return false; + + auto IsCoalescerBarrier = [](const MachineInstr &MI) { + switch (MI.getOpcode()) { + case AArch64::COALESCER_BARRIER_FPR16: + case AArch64::COALESCER_BARRIER_FPR32: + case AArch64::COALESCER_BARRIER_FPR64: + case AArch64::COALESCER_BARRIER_FPR128: + return true; + default: + return false; + } + }; + + // For calls that temporarily have to toggle streaming mode as part of the + // call-sequence, we need to be more careful when coalescing copy instructions + // so that we don't end up coalescing the NEON/FP result or argument register + // with a whole Z-register, such that after coalescing the register allocator + // will try to spill/reload the entire Z register. + // + // We do this by checking if the node has any defs/uses that are + // COALESCER_BARRIER pseudos. These are 'nops' in practice, but they exist to + // instruct the coalescer to avoid coalescing the copy. + if (MI->isCopy() && SubReg != DstSubReg && + (AArch64::ZPRRegClass.hasSubClassEq(DstRC) || + AArch64::ZPRRegClass.hasSubClassEq(SrcRC))) { + unsigned SrcReg = MI->getOperand(1).getReg(); + if (any_of(MRI.def_instructions(SrcReg), IsCoalescerBarrier)) + return false; + unsigned DstReg = MI->getOperand(0).getReg(); + if (any_of(MRI.use_nodbg_instructions(DstReg), IsCoalescerBarrier)) + return false; + } + return true; } diff --git a/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td index eeae5303a3f8..acf067f2cc5a 100644 --- a/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td @@ -28,6 +28,8 @@ def AArch64_restore_zt : SDNode<"AArch64ISD::RESTORE_ZT", SDTypeProfile<0, 2, def AArch64_save_zt : SDNode<"AArch64ISD::SAVE_ZT", SDTypeProfile<0, 2, [SDTCisInt<0>, SDTCisPtrTy<1>]>, [SDNPHasChain, SDNPSideEffect, SDNPMayStore]>; +def AArch64CoalescerBarrier + : SDNode<"AArch64ISD::COALESCER_BARRIER", SDTypeProfile<1, 1, []>, []>; //===----------------------------------------------------------------------===// // Instruction naming conventions. @@ -189,6 +191,26 @@ def : Pat<(int_aarch64_sme_set_tpidr2 i64:$val), (MSR 0xde85, GPR64:$val)>; def : Pat<(i64 (int_aarch64_sme_get_tpidr2)), (MRS 0xde85)>; + +multiclass CoalescerBarrierPseudo<RegisterClass rc, list<ValueType> vts> { + def NAME : Pseudo<(outs rc:$dst), (ins rc:$src), []>, Sched<[]> { + let Constraints = "$dst = $src"; + } + foreach vt = vts in { + def : Pat<(vt (AArch64CoalescerBarrier (vt rc:$src))), + (!cast<Instruction>(NAME) rc:$src)>; + } +} + +multiclass CoalescerBarriers { + defm _FPR16 : CoalescerBarrierPseudo<FPR16, [bf16, f16]>; + defm _FPR32 : CoalescerBarrierPseudo<FPR32, [f32]>; + defm _FPR64 : CoalescerBarrierPseudo<FPR64, [f64, v8i8, v4i16, v2i32, v1i64, v4f16, v2f32, v1f64, v4bf16]>; + defm _FPR128 : CoalescerBarrierPseudo<FPR128, [f128, v16i8, v8i16, v4i32, v2i64, v8f16, v4f32, v2f64, v8bf16]>; +} + +defm COALESCER_BARRIER : CoalescerBarriers; + } // End let Predicates = [HasSME] // Pseudo to match to smstart/smstop. This expands: diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp index d611338fc268..992b11da7eee 100644 --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -233,15 +233,20 @@ static bool hasPossibleIncompatibleOps(const Function *F) { bool AArch64TTIImpl::areInlineCompatible(const Function *Caller, const Function *Callee) const { - SMEAttrs CallerAttrs(*Caller); - SMEAttrs CalleeAttrs(*Callee); + SMEAttrs CallerAttrs(*Caller), CalleeAttrs(*Callee); + + // When inlining, we should consider the body of the function, not the + // interface. + if (CalleeAttrs.hasStreamingBody()) { + CalleeAttrs.set(SMEAttrs::SM_Compatible, false); + CalleeAttrs.set(SMEAttrs::SM_Enabled, true); + } + if (CalleeAttrs.hasNewZABody()) return false; if (CallerAttrs.requiresLazySave(CalleeAttrs) || - (CallerAttrs.requiresSMChange(CalleeAttrs) && - (!CallerAttrs.hasStreamingInterfaceOrBody() || - !CalleeAttrs.hasStreamingBody()))) { + CallerAttrs.requiresSMChange(CalleeAttrs)) { if (hasPossibleIncompatibleOps(Callee)) return false; } @@ -4062,4 +4067,4 @@ bool AArch64TTIImpl::shouldTreatInstructionLikeSelect(const Instruction *I) { cast<BranchInst>(I->getNextNode())->isUnconditional()) return true; return BaseT::shouldTreatInstructionLikeSelect(I); -}
\ No newline at end of file +} diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td index cb29d5d94759..250e3e350c02 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPU.td +++ b/llvm/lib/Target/AMDGPU/AMDGPU.td @@ -1506,6 +1506,7 @@ def FeatureISAVersion12 : FeatureSet< FeatureFlatAtomicFaddF32Inst, FeatureImageInsts, FeatureExtendedImageInsts, + FeatureFP8ConversionInsts, FeaturePackedTID, FeatureVcmpxPermlaneHazard, FeatureSALUFloatInsts, diff --git a/llvm/lib/Target/AMDGPU/AMDGPUGISel.td b/llvm/lib/Target/AMDGPU/AMDGPUGISel.td index a19b03b92923..152f495a452b 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUGISel.td +++ b/llvm/lib/Target/AMDGPU/AMDGPUGISel.td @@ -59,6 +59,30 @@ def gi_wmmaopselvop3pmods : GIComplexOperandMatcher<s32, "selectWMMAOpSelVOP3PMods">, GIComplexPatternEquiv<WMMAOpSelVOP3PMods>; +def gi_wmmavisrc : + GIComplexOperandMatcher<s32, "selectWMMAVISrc">, + GIComplexPatternEquiv<WMMAVISrc>; + +def gi_wmmamods : + GIComplexOperandMatcher<s32, "selectWMMAModsF32NegAbs">, + GIComplexPatternEquiv<WMMAModsF32NegAbs>; + +def gi_wmmamodsf16Neg : + GIComplexOperandMatcher<s32, "selectWMMAModsF16Neg">, + GIComplexPatternEquiv<WMMAModsF16Neg>; + +def gi_wmmamodsf16NegAbs : + GIComplexOperandMatcher<s32, "selectWMMAModsF16NegAbs">, + GIComplexPatternEquiv<WMMAModsF16NegAbs>; + +def gi_swmmacindex8 : + GIComplexOperandMatcher<s32, "selectSWMMACIndex8">, + GIComplexPatternEquiv<SWMMACIndex8>; + +def gi_swmmacindex16 : + GIComplexOperandMatcher<s32, "selectSWMMACIndex16">, + GIComplexPatternEquiv<SWMMACIndex16>; + def gi_vop3opselmods : GIComplexOperandMatcher<s32, "selectVOP3OpSelMods">, GIComplexPatternEquiv<VOP3OpSelMods>; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp index 4c35649cec6c..4f7bf3f7d35e 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp @@ -3048,6 +3048,336 @@ bool AMDGPUDAGToDAGISel::SelectWMMAOpSelVOP3PMods(SDValue In, return true; } +static MachineSDNode *buildRegSequence32(SmallVectorImpl<SDValue> &Elts, + llvm::SelectionDAG *CurDAG, + const SDLoc &DL) { + unsigned DstRegClass; + EVT DstTy; + switch (Elts.size()) { + case 8: + DstRegClass = AMDGPU::VReg_256RegClassID; + DstTy = MVT::v8i32; + break; + case 4: + DstRegClass = AMDGPU::VReg_128RegClassID; + DstTy = MVT::v4i32; + break; + case 2: + DstRegClass = AMDGPU::VReg_64RegClassID; + DstTy = MVT::v2i32; + break; + default: + llvm_unreachable("unhandled Reg sequence size"); + } + + SmallVector<SDValue, 17> Ops; + Ops.push_back(CurDAG->getTargetConstant(DstRegClass, DL, MVT::i32)); + for (unsigned i = 0; i < Elts.size(); ++i) { + Ops.push_back(Elts[i]); + Ops.push_back(CurDAG->getTargetConstant( + SIRegisterInfo::getSubRegFromChannel(i), DL, MVT::i32)); + } + return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, DstTy, Ops); +} + +static MachineSDNode *buildRegSequence16(SmallVectorImpl<SDValue> &Elts, + llvm::SelectionDAG *CurDAG, + const SDLoc &DL) { + SmallVector<SDValue, 8> PackedElts; + assert("unhandled Reg sequence size" && + (Elts.size() == 8 || Elts.size() == 16)); + + // Pack 16-bit elements in pairs into 32-bit register. If both elements are + // unpacked from 32-bit source use it, otherwise pack them using v_perm. + for (unsigned i = 0; i < Elts.size(); i += 2) { + SDValue LoSrc = stripExtractLoElt(stripBitcast(Elts[i])); + SDValue HiSrc; + if (isExtractHiElt(Elts[i + 1], HiSrc) && LoSrc == HiSrc) { + PackedElts.push_back(HiSrc); + } else { + SDValue PackLoLo = CurDAG->getTargetConstant(0x05040100, DL, MVT::i32); + MachineSDNode *Packed = + CurDAG->getMachineNode(AMDGPU::V_PERM_B32_e64, DL, MVT::i32, + {Elts[i + 1], Elts[i], PackLoLo}); + PackedElts.push_back(SDValue(Packed, 0)); + } + } + + return buildRegSequence32(PackedElts, CurDAG, DL); +} + +static MachineSDNode *buildRegSequence(SmallVectorImpl<SDValue> &Elts, + llvm::SelectionDAG *CurDAG, + const SDLoc &DL, unsigned ElementSize) { + if (ElementSize == 16) + return buildRegSequence16(Elts, CurDAG, DL); + if (ElementSize == 32) + return buildRegSequence32(Elts, CurDAG, DL); + llvm_unreachable("Unhandled element size"); +} + +static void selectWMMAModsNegAbs(unsigned ModOpcode, unsigned &Mods, + SmallVectorImpl<SDValue> &Elts, SDValue &Src, + llvm::SelectionDAG *CurDAG, const SDLoc &DL, + unsigned ElementSize) { + if (ModOpcode == ISD::FNEG) { + Mods |= SISrcMods::NEG; + // Check if all elements also have abs modifier + SmallVector<SDValue, 8> NegAbsElts; + for (auto El : Elts) { + if (El.getOpcode() != ISD::FABS) + break; + NegAbsElts.push_back(El->getOperand(0)); + } + if (Elts.size() != NegAbsElts.size()) { + // Neg + Src = SDValue(buildRegSequence(Elts, CurDAG, DL, ElementSize), 0); + } else { + // Neg and Abs + Mods |= SISrcMods::NEG_HI; + Src = SDValue(buildRegSequence(NegAbsElts, CurDAG, DL, ElementSize), 0); + } + } else { + assert(ModOpcode == ISD::FABS); + // Abs + Mods |= SISrcMods::NEG_HI; + Src = SDValue(buildRegSequence(Elts, CurDAG, DL, ElementSize), 0); + } +} + +// Check all f16 elements for modifiers while looking through b32 and v2b16 +// build vector, stop if element does not satisfy ModifierCheck. +static void +checkWMMAElementsModifiersF16(BuildVectorSDNode *BV, + std::function<bool(SDValue)> ModifierCheck) { + for (unsigned i = 0; i < BV->getNumOperands(); ++i) { + if (auto *F16Pair = + dyn_cast<BuildVectorSDNode>(stripBitcast(BV->getOperand(i)))) { + for (unsigned i = 0; i < F16Pair->getNumOperands(); ++i) { + SDValue ElF16 = stripBitcast(F16Pair->getOperand(i)); + if (!ModifierCheck(ElF16)) + break; + } + } + } +} + +bool AMDGPUDAGToDAGISel::SelectWMMAModsF16Neg(SDValue In, SDValue &Src, + SDValue &SrcMods) const { + Src = In; + unsigned Mods = SISrcMods::OP_SEL_1; + + // mods are on f16 elements + if (auto *BV = dyn_cast<BuildVectorSDNode>(stripBitcast(In))) { + SmallVector<SDValue, 8> EltsF16; + + checkWMMAElementsModifiersF16(BV, [&](SDValue Element) -> bool { + if (Element.getOpcode() != ISD::FNEG) + return false; + EltsF16.push_back(Element.getOperand(0)); + return true; + }); + + // All elements have neg modifier + if (BV->getNumOperands() * 2 == EltsF16.size()) { + Src = SDValue(buildRegSequence16(EltsF16, CurDAG, SDLoc(In)), 0); + Mods |= SISrcMods::NEG; + Mods |= SISrcMods::NEG_HI; + } + } + + // mods are on v2f16 elements + if (auto *BV = dyn_cast<BuildVectorSDNode>(stripBitcast(In))) { + SmallVector<SDValue, 8> EltsV2F16; + for (unsigned i = 0; i < BV->getNumOperands(); ++i) { + SDValue ElV2f16 = stripBitcast(BV->getOperand(i)); + // Based on first element decide which mod we match, neg or abs + if (ElV2f16.getOpcode() != ISD::FNEG) + break; + EltsV2F16.push_back(ElV2f16.getOperand(0)); + } + + // All pairs of elements have neg modifier + if (BV->getNumOperands() == EltsV2F16.size()) { + Src = SDValue(buildRegSequence32(EltsV2F16, CurDAG, SDLoc(In)), 0); + Mods |= SISrcMods::NEG; + Mods |= SISrcMods::NEG_HI; + } + } + + SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32); + return true; +} + +bool AMDGPUDAGToDAGISel::SelectWMMAModsF16NegAbs(SDValue In, SDValue &Src, + SDValue &SrcMods) const { + Src = In; + unsigned Mods = SISrcMods::OP_SEL_1; + unsigned ModOpcode; + + // mods are on f16 elements + if (auto *BV = dyn_cast<BuildVectorSDNode>(stripBitcast(In))) { + SmallVector<SDValue, 8> EltsF16; + checkWMMAElementsModifiersF16(BV, [&](SDValue ElF16) -> bool { + // Based on first element decide which mod we match, neg or abs + if (EltsF16.empty()) + ModOpcode = (ElF16.getOpcode() == ISD::FNEG) ? ISD::FNEG : ISD::FABS; + if (ElF16.getOpcode() != ModOpcode) + return false; + EltsF16.push_back(ElF16.getOperand(0)); + return true; + }); + + // All elements have ModOpcode modifier + if (BV->getNumOperands() * 2 == EltsF16.size()) + selectWMMAModsNegAbs(ModOpcode, Mods, EltsF16, Src, CurDAG, SDLoc(In), + 16); + } + + // mods are on v2f16 elements + if (auto *BV = dyn_cast<BuildVectorSDNode>(stripBitcast(In))) { + SmallVector<SDValue, 8> EltsV2F16; + + for (unsigned i = 0; i < BV->getNumOperands(); ++i) { + SDValue ElV2f16 = stripBitcast(BV->getOperand(i)); + // Based on first element decide which mod we match, neg or abs + if (EltsV2F16.empty()) + ModOpcode = (ElV2f16.getOpcode() == ISD::FNEG) ? ISD::FNEG : ISD::FABS; + if (ElV2f16->getOpcode() != ModOpcode) + break; + EltsV2F16.push_back(ElV2f16->getOperand(0)); + } + + // All elements have ModOpcode modifier + if (BV->getNumOperands() == EltsV2F16.size()) + selectWMMAModsNegAbs(ModOpcode, Mods, EltsV2F16, Src, CurDAG, SDLoc(In), + 32); + } + + SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32); + return true; +} + +bool AMDGPUDAGToDAGISel::SelectWMMAModsF32NegAbs(SDValue In, SDValue &Src, + SDValue &SrcMods) const { + Src = In; + unsigned Mods = SISrcMods::OP_SEL_1; + unsigned ModOpcode; + SmallVector<SDValue, 8> EltsF32; + + if (auto *BV = dyn_cast<BuildVectorSDNode>(stripBitcast(In))) { + for (unsigned i = 0; i < BV->getNumOperands(); ++i) { + SDValue ElF32 = stripBitcast(BV->getOperand(i)); + // Based on first element decide which mod we match, neg or abs + if (EltsF32.empty()) + ModOpcode = (ElF32.getOpcode() == ISD::FNEG) ? ISD::FNEG : ISD::FABS; + if (ElF32.getOpcode() != ModOpcode) + break; + EltsF32.push_back(ElF32.getOperand(0)); + } + + // All elements had ModOpcode modifier + if (BV->getNumOperands() == EltsF32.size()) + selectWMMAModsNegAbs(ModOpcode, Mods, EltsF32, Src, CurDAG, SDLoc(In), + 32); + } + + SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32); + return true; +} + +bool AMDGPUDAGToDAGISel::SelectWMMAVISrc(SDValue In, SDValue &Src) const { + if (auto *BV = dyn_cast<BuildVectorSDNode>(In)) { + BitVector UndefElements; + if (SDValue Splat = BV->getSplatValue(&UndefElements)) + if (isInlineImmediate(Splat.getNode())) { + if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Splat)) { + unsigned Imm = C->getAPIntValue().getSExtValue(); + Src = CurDAG->getTargetConstant(Imm, SDLoc(In), MVT::i32); + return true; + } + if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Splat)) { + unsigned Imm = C->getValueAPF().bitcastToAPInt().getSExtValue(); + Src = CurDAG->getTargetConstant(Imm, SDLoc(In), MVT::i32); + return true; + } + llvm_unreachable("unhandled Constant node"); + } + } + + // 16 bit splat + SDValue SplatSrc32 = stripBitcast(In); + if (auto *SplatSrc32BV = dyn_cast<BuildVectorSDNode>(SplatSrc32)) { + if (SDValue Splat32 = SplatSrc32BV->getSplatValue()) { + SDValue SplatSrc16 = stripBitcast(Splat32); + if (auto *SplatSrc16BV = dyn_cast<BuildVectorSDNode>(SplatSrc16)) { + if (SDValue Splat = SplatSrc16BV->getSplatValue()) { + + // f16 + if (isInlineImmediate(Splat.getNode())) { + const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Splat); + int64_t Imm = C->getValueAPF().bitcastToAPInt().getSExtValue(); + Src = CurDAG->getTargetConstant(Imm, SDLoc(In), MVT::i16); + return true; + } + + // bf16 + if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Splat)) { + const SIInstrInfo *TII = Subtarget->getInstrInfo(); + APInt BF16Value = C->getAPIntValue(); + APInt F32Value = BF16Value.zext(32).shl(16); + if (TII->isInlineConstant(F32Value)) { + int64_t Imm = F32Value.getSExtValue(); + Src = CurDAG->getTargetConstant(Imm, SDLoc(In), MVT::i32); + return true; + } + } + } + } + } + } + + return false; +} + +bool AMDGPUDAGToDAGISel::SelectSWMMACIndex8(SDValue In, SDValue &Src, + SDValue &IndexKey) const { + unsigned Key = 0; + Src = In; + + if (In.getOpcode() == ISD::SRL) { + const llvm::SDValue &ShiftSrc = In.getOperand(0); + ConstantSDNode *ShiftAmt = dyn_cast<ConstantSDNode>(In.getOperand(1)); + if (ShiftSrc.getValueType().getSizeInBits() == 32 && ShiftAmt && + ShiftAmt->getZExtValue() % 8 == 0) { + Key = ShiftAmt->getZExtValue() / 8; + Src = ShiftSrc; + } + } + + IndexKey = CurDAG->getTargetConstant(Key, SDLoc(In), MVT::i32); + return true; +} + +bool AMDGPUDAGToDAGISel::SelectSWMMACIndex16(SDValue In, SDValue &Src, + SDValue &IndexKey) const { + unsigned Key = 0; + Src = In; + + if (In.getOpcode() == ISD::SRL) { + const llvm::SDValue &ShiftSrc = In.getOperand(0); + ConstantSDNode *ShiftAmt = dyn_cast<ConstantSDNode>(In.getOperand(1)); + if (ShiftSrc.getValueType().getSizeInBits() == 32 && ShiftAmt && + ShiftAmt->getZExtValue() == 16) { + Key = 1; + Src = ShiftSrc; + } + } + + IndexKey = CurDAG->getTargetConstant(Key, SDLoc(In), MVT::i32); + return true; +} + bool AMDGPUDAGToDAGISel::SelectVOP3OpSel(SDValue In, SDValue &Src, SDValue &SrcMods) const { Src = In; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h index 8645490f0b16..3b42d88df0c2 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h @@ -240,6 +240,16 @@ private: bool SelectVOP3PModsNeg(SDValue In, SDValue &Src) const; bool SelectWMMAOpSelVOP3PMods(SDValue In, SDValue &Src) const; + bool SelectWMMAModsF32NegAbs(SDValue In, SDValue &Src, + SDValue &SrcMods) const; + bool SelectWMMAModsF16Neg(SDValue In, SDValue &Src, SDValue &SrcMods) const; + bool SelectWMMAModsF16NegAbs(SDValue In, SDValue &Src, + SDValue &SrcMods) const; + bool SelectWMMAVISrc(SDValue In, SDValue &Src) const; + + bool SelectSWMMACIndex8(SDValue In, SDValue &Src, SDValue &IndexKey) const; + bool SelectSWMMACIndex16(SDValue In, SDValue &Src, SDValue &IndexKey) const; + bool SelectVOP3OpSel(SDValue In, SDValue &Src, SDValue &SrcMods) const; bool SelectVOP3OpSelMods(SDValue In, SDValue &Src, SDValue &SrcMods) const; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp index 55d95154c758..2af53a664ff1 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -577,6 +577,7 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM, ISD::AssertSext, ISD::INTRINSIC_WO_CHAIN}); setMaxAtomicSizeInBitsSupported(64); + setMaxDivRemBitWidthSupported(64); } bool AMDGPUTargetLowering::mayIgnoreSignedZero(SDValue Op) const { diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp index fdee74d58d26..f255d098b631 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -3956,6 +3956,219 @@ AMDGPUInstructionSelector::selectWMMAOpSelVOP3PMods( }}; } +static Register buildRegSequence(SmallVectorImpl<Register> &Elts, + MachineInstr *InsertPt, + MachineRegisterInfo &MRI) { + const TargetRegisterClass *DstRegClass; + switch (Elts.size()) { + case 8: + DstRegClass = &AMDGPU::VReg_256RegClass; + break; + case 4: + DstRegClass = &AMDGPU::VReg_128RegClass; + break; + case 2: + DstRegClass = &AMDGPU::VReg_64RegClass; + break; + default: + llvm_unreachable("unhandled Reg sequence size"); + } + + MachineIRBuilder B(*InsertPt); + auto MIB = B.buildInstr(AMDGPU::REG_SEQUENCE) + .addDef(MRI.createVirtualRegister(DstRegClass)); + for (unsigned i = 0; i < Elts.size(); ++i) { + MIB.addReg(Elts[i]); + MIB.addImm(SIRegisterInfo::getSubRegFromChannel(i)); + } + return MIB->getOperand(0).getReg(); +} + +static void selectWMMAModsNegAbs(unsigned ModOpcode, unsigned &Mods, + SmallVectorImpl<Register> &Elts, Register &Src, + MachineInstr *InsertPt, + MachineRegisterInfo &MRI) { + if (ModOpcode == TargetOpcode::G_FNEG) { + Mods |= SISrcMods::NEG; + // Check if all elements also have abs modifier + SmallVector<Register, 8> NegAbsElts; + for (auto El : Elts) { + Register FabsSrc; + if (!mi_match(El, MRI, m_GFabs(m_Reg(FabsSrc)))) + break; + NegAbsElts.push_back(FabsSrc); + } + if (Elts.size() != NegAbsElts.size()) { + // Neg + Src = buildRegSequence(Elts, InsertPt, MRI); + } else { + // Neg and Abs + Mods |= SISrcMods::NEG_HI; + Src = buildRegSequence(NegAbsElts, InsertPt, MRI); + } + } else { + assert(ModOpcode == TargetOpcode::G_FABS); + // Abs + Mods |= SISrcMods::NEG_HI; + Src = buildRegSequence(Elts, InsertPt, MRI); + } +} + +InstructionSelector::ComplexRendererFns +AMDGPUInstructionSelector::selectWMMAModsF32NegAbs(MachineOperand &Root) const { + Register Src = Root.getReg(); + unsigned Mods = SISrcMods::OP_SEL_1; + unsigned ModOpcode; + SmallVector<Register, 8> EltsF32; + + if (GBuildVector *BV = dyn_cast<GBuildVector>(MRI->getVRegDef(Src))) { + for (unsigned i = 0; i < BV->getNumSources(); ++i) { + MachineInstr *ElF32 = MRI->getVRegDef(BV->getSourceReg(i)); + // Based on first element decide which mod we match, neg or abs + if (EltsF32.empty()) + ModOpcode = (ElF32->getOpcode() == AMDGPU::G_FNEG) ? AMDGPU::G_FNEG + : AMDGPU::G_FABS; + if (ElF32->getOpcode() != ModOpcode) + break; + EltsF32.push_back(ElF32->getOperand(1).getReg()); + } + + // All elements had ModOpcode modifier + if (BV->getNumSources() == EltsF32.size()) { + selectWMMAModsNegAbs(ModOpcode, Mods, EltsF32, Src, Root.getParent(), + *MRI); + } + } + + return {{[=](MachineInstrBuilder &MIB) { MIB.addReg(Src); }, + [=](MachineInstrBuilder &MIB) { MIB.addImm(Mods); }}}; +} + +InstructionSelector::ComplexRendererFns +AMDGPUInstructionSelector::selectWMMAModsF16Neg(MachineOperand &Root) const { + Register Src = Root.getReg(); + unsigned Mods = SISrcMods::OP_SEL_1; + SmallVector<Register, 8> EltsV2F16; + + if (GConcatVectors *CV = dyn_cast<GConcatVectors>(MRI->getVRegDef(Src))) { + for (unsigned i = 0; i < CV->getNumSources(); ++i) { + Register FNegSrc; + if (!mi_match(CV->getSourceReg(i), *MRI, m_GFNeg(m_Reg(FNegSrc)))) + break; + EltsV2F16.push_back(FNegSrc); + } + + // All elements had ModOpcode modifier + if (CV->getNumSources() == EltsV2F16.size()) { + Mods |= SISrcMods::NEG; + Mods |= SISrcMods::NEG_HI; + Src = buildRegSequence(EltsV2F16, Root.getParent(), *MRI); + } + } + + return {{[=](MachineInstrBuilder &MIB) { MIB.addReg(Src); }, + [=](MachineInstrBuilder &MIB) { MIB.addImm(Mods); }}}; +} + +InstructionSelector::ComplexRendererFns +AMDGPUInstructionSelector::selectWMMAModsF16NegAbs(MachineOperand &Root) const { + Register Src = Root.getReg(); + unsigned Mods = SISrcMods::OP_SEL_1; + unsigned ModOpcode; + SmallVector<Register, 8> EltsV2F16; + + if (GConcatVectors *CV = dyn_cast<GConcatVectors>(MRI->getVRegDef(Src))) { + for (unsigned i = 0; i < CV->getNumSources(); ++i) { + MachineInstr *ElV2F16 = MRI->getVRegDef(CV->getSourceReg(i)); + // Based on first element decide which mod we match, neg or abs + if (EltsV2F16.empty()) + ModOpcode = (ElV2F16->getOpcode() == AMDGPU::G_FNEG) ? AMDGPU::G_FNEG + : AMDGPU::G_FABS; + if (ElV2F16->getOpcode() != ModOpcode) + break; + EltsV2F16.push_back(ElV2F16->getOperand(1).getReg()); + } + + // All elements had ModOpcode modifier + if (CV->getNumSources() == EltsV2F16.size()) { + MachineIRBuilder B(*Root.getParent()); + selectWMMAModsNegAbs(ModOpcode, Mods, EltsV2F16, Src, Root.getParent(), + *MRI); + } + } + + return {{[=](MachineInstrBuilder &MIB) { MIB.addReg(Src); }, + [=](MachineInstrBuilder &MIB) { MIB.addImm(Mods); }}}; +} + +InstructionSelector::ComplexRendererFns +AMDGPUInstructionSelector::selectWMMAVISrc(MachineOperand &Root) const { + std::optional<FPValueAndVReg> FPValReg; + if (mi_match(Root.getReg(), *MRI, m_GFCstOrSplat(FPValReg))) { + if (TII.isInlineConstant(FPValReg->Value.bitcastToAPInt())) { + return {{[=](MachineInstrBuilder &MIB) { + MIB.addImm(FPValReg->Value.bitcastToAPInt().getSExtValue()); + }}}; + } + // Non-inlineable splat floats should not fall-through for integer immediate + // checks. + return {}; + } + + APInt ICst; + if (mi_match(Root.getReg(), *MRI, m_ICstOrSplat(ICst))) { + if (TII.isInlineConstant(ICst)) { + return { + {[=](MachineInstrBuilder &MIB) { MIB.addImm(ICst.getSExtValue()); }}}; + } + } + + return {}; +} + +InstructionSelector::ComplexRendererFns +AMDGPUInstructionSelector::selectSWMMACIndex8(MachineOperand &Root) const { + Register Src = + getDefIgnoringCopies(Root.getReg(), *MRI)->getOperand(0).getReg(); + unsigned Key = 0; + + Register ShiftSrc; + std::optional<ValueAndVReg> ShiftAmt; + if (mi_match(Src, *MRI, m_GLShr(m_Reg(ShiftSrc), m_GCst(ShiftAmt))) && + MRI->getType(ShiftSrc).getSizeInBits() == 32 && + ShiftAmt->Value.getZExtValue() % 8 == 0) { + Key = ShiftAmt->Value.getZExtValue() / 8; + Src = ShiftSrc; + } + + return {{ + [=](MachineInstrBuilder &MIB) { MIB.addReg(Src); }, + [=](MachineInstrBuilder &MIB) { MIB.addImm(Key); } // index_key + }}; +} + +InstructionSelector::ComplexRendererFns +AMDGPUInstructionSelector::selectSWMMACIndex16(MachineOperand &Root) const { + + Register Src = + getDefIgnoringCopies(Root.getReg(), *MRI)->getOperand(0).getReg(); + unsigned Key = 0; + + Register ShiftSrc; + std::optional<ValueAndVReg> ShiftAmt; + if (mi_match(Src, *MRI, m_GLShr(m_Reg(ShiftSrc), m_GCst(ShiftAmt))) && + MRI->getType(ShiftSrc).getSizeInBits() == 32 && + ShiftAmt->Value.getZExtValue() == 16) { + Src = ShiftSrc; + Key = 1; + } + + return {{ + [=](MachineInstrBuilder &MIB) { MIB.addReg(Src); }, + [=](MachineInstrBuilder &MIB) { MIB.addImm(Key); } // index_key + }}; +} + InstructionSelector::ComplexRendererFns AMDGPUInstructionSelector::selectVOP3OpSelMods(MachineOperand &Root) const { Register Src; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h index 12ea46c2895b..ef7630f137ac 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h @@ -200,6 +200,19 @@ private: selectWMMAOpSelVOP3PMods(MachineOperand &Root) const; InstructionSelector::ComplexRendererFns + selectWMMAModsF32NegAbs(MachineOperand &Root) const; + InstructionSelector::ComplexRendererFns + selectWMMAModsF16Neg(MachineOperand &Root) const; + InstructionSelector::ComplexRendererFns + selectWMMAModsF16NegAbs(MachineOperand &Root) const; + InstructionSelector::ComplexRendererFns + selectWMMAVISrc(MachineOperand &Root) const; + InstructionSelector::ComplexRendererFns + selectSWMMACIndex8(MachineOperand &Root) const; + InstructionSelector::ComplexRendererFns + selectSWMMACIndex16(MachineOperand &Root) const; + + InstructionSelector::ComplexRendererFns selectVOP3OpSelMods(MachineOperand &Root) const; InstructionSelector::ComplexRendererFns diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp index 8e74d4c0e945..17ffb7ec988f 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -4178,10 +4178,45 @@ bool AMDGPULegalizerInfo::loadInputValue( Register DstReg, MachineIRBuilder &B, AMDGPUFunctionArgInfo::PreloadedValue ArgType) const { const SIMachineFunctionInfo *MFI = B.getMF().getInfo<SIMachineFunctionInfo>(); - const ArgDescriptor *Arg; + const ArgDescriptor *Arg = nullptr; const TargetRegisterClass *ArgRC; LLT ArgTy; - std::tie(Arg, ArgRC, ArgTy) = MFI->getPreloadedValue(ArgType); + + CallingConv::ID CC = B.getMF().getFunction().getCallingConv(); + const ArgDescriptor WorkGroupIDX = + ArgDescriptor::createRegister(AMDGPU::TTMP9); + // If GridZ is not programmed in an entry function then the hardware will set + // it to all zeros, so there is no need to mask the GridY value in the low + // order bits. + const ArgDescriptor WorkGroupIDY = ArgDescriptor::createRegister( + AMDGPU::TTMP7, + AMDGPU::isEntryFunctionCC(CC) && !MFI->hasWorkGroupIDZ() ? ~0u : 0xFFFFu); + const ArgDescriptor WorkGroupIDZ = + ArgDescriptor::createRegister(AMDGPU::TTMP7, 0xFFFF0000u); + if (ST.hasArchitectedSGPRs() && AMDGPU::isCompute(CC)) { + switch (ArgType) { + case AMDGPUFunctionArgInfo::WORKGROUP_ID_X: + Arg = &WorkGroupIDX; + ArgRC = &AMDGPU::SReg_32RegClass; + ArgTy = LLT::scalar(32); + break; + case AMDGPUFunctionArgInfo::WORKGROUP_ID_Y: + Arg = &WorkGroupIDY; + ArgRC = &AMDGPU::SReg_32RegClass; + ArgTy = LLT::scalar(32); + break; + case AMDGPUFunctionArgInfo::WORKGROUP_ID_Z: + Arg = &WorkGroupIDZ; + ArgRC = &AMDGPU::SReg_32RegClass; + ArgTy = LLT::scalar(32); + break; + default: + break; + } + } + + if (!Arg) + std::tie(Arg, ArgRC, ArgTy) = MFI->getPreloadedValue(ArgType); if (!Arg) { if (ArgType == AMDGPUFunctionArgInfo::KERNARG_SEGMENT_PTR) { @@ -6848,6 +6883,21 @@ bool AMDGPULegalizerInfo::legalizeStackSave(MachineInstr &MI, return true; } +bool AMDGPULegalizerInfo::legalizeWaveID(MachineInstr &MI, + MachineIRBuilder &B) const { + // With architected SGPRs, waveIDinGroup is in TTMP8[29:25]. + if (!ST.hasArchitectedSGPRs()) + return false; + LLT S32 = LLT::scalar(32); + Register DstReg = MI.getOperand(0).getReg(); + auto TTMP8 = B.buildCopy(S32, Register(AMDGPU::TTMP8)); + auto LSB = B.buildConstant(S32, 25); + auto Width = B.buildConstant(S32, 5); + B.buildUbfx(DstReg, TTMP8, LSB, Width); + MI.eraseFromParent(); + return true; +} + bool AMDGPULegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper, MachineInstr &MI) const { MachineIRBuilder &B = Helper.MIRBuilder; @@ -6970,6 +7020,8 @@ bool AMDGPULegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper, case Intrinsic::amdgcn_workgroup_id_z: return legalizePreloadedArgIntrin(MI, MRI, B, AMDGPUFunctionArgInfo::WORKGROUP_ID_Z); + case Intrinsic::amdgcn_wave_id: + return legalizeWaveID(MI, B); case Intrinsic::amdgcn_lds_kernel_id: return legalizePreloadedArgIntrin(MI, MRI, B, AMDGPUFunctionArgInfo::LDS_KERNEL_ID); @@ -7134,6 +7186,29 @@ bool AMDGPULegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper, return legalizeDSAtomicFPIntrinsic(Helper, MI, IntrID); case Intrinsic::amdgcn_image_bvh_intersect_ray: return legalizeBVHIntrinsic(MI, B); + case Intrinsic::amdgcn_swmmac_f16_16x16x32_f16: + case Intrinsic::amdgcn_swmmac_bf16_16x16x32_bf16: + case Intrinsic::amdgcn_swmmac_f32_16x16x32_bf16: + case Intrinsic::amdgcn_swmmac_f32_16x16x32_f16: + case Intrinsic::amdgcn_swmmac_f32_16x16x32_fp8_fp8: + case Intrinsic::amdgcn_swmmac_f32_16x16x32_fp8_bf8: + case Intrinsic::amdgcn_swmmac_f32_16x16x32_bf8_fp8: + case Intrinsic::amdgcn_swmmac_f32_16x16x32_bf8_bf8: { + Register Index = MI.getOperand(5).getReg(); + LLT S32 = LLT::scalar(32); + if (MRI.getType(Index) != S32) + MI.getOperand(5).setReg(B.buildAnyExt(S32, Index).getReg(0)); + return true; + } + case Intrinsic::amdgcn_swmmac_i32_16x16x32_iu4: + case Intrinsic::amdgcn_swmmac_i32_16x16x32_iu8: + case Intrinsic::amdgcn_swmmac_i32_16x16x64_iu4: { + Register Index = MI.getOperand(7).getReg(); + LLT S32 = LLT::scalar(32); + if (MRI.getType(Index) != S32) + MI.getOperand(7).setReg(B.buildAnyExt(S32, Index).getReg(0)); + return true; + } case Intrinsic::amdgcn_fmed3: { GISelChangeObserver &Observer = Helper.Observer; diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h index 56aabd4f6ab7..ecbe42681c66 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h +++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h @@ -212,6 +212,7 @@ public: bool legalizeFPTruncRound(MachineInstr &MI, MachineIRBuilder &B) const; bool legalizeStackSave(MachineInstr &MI, MachineIRBuilder &B) const; + bool legalizeWaveID(MachineInstr &MI, MachineIRBuilder &B) const; bool legalizeImageIntrinsic( MachineInstr &MI, MachineIRBuilder &B, diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp b/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp index 5e73411cae9b..c1b244f50d93 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp @@ -521,10 +521,18 @@ static Value *promoteAllocaUserToVector( // For memset, we don't need to know the previous value because we // currently only allow memsets that cover the whole alloca. Value *Elt = MSI->getOperand(1); - if (DL.getTypeStoreSize(VecEltTy) > 1) { - Value *EltBytes = - Builder.CreateVectorSplat(DL.getTypeStoreSize(VecEltTy), Elt); - Elt = Builder.CreateBitCast(EltBytes, VecEltTy); + const unsigned BytesPerElt = DL.getTypeStoreSize(VecEltTy); + if (BytesPerElt > 1) { + Value *EltBytes = Builder.CreateVectorSplat(BytesPerElt, Elt); + + // If the element type of the vector is a pointer, we need to first cast + // to an integer, then use a PtrCast. + if (VecEltTy->isPointerTy()) { + Type *PtrInt = Builder.getIntNTy(BytesPerElt * 8); + Elt = Builder.CreateBitCast(EltBytes, PtrInt); + Elt = Builder.CreateIntToPtr(Elt, VecEltTy); + } else + Elt = Builder.CreateBitCast(EltBytes, VecEltTy); } return Builder.CreateVectorSplat(VectorTy->getElementCount(), Elt); diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp index bdd4e891f158..09fac963d222 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp @@ -4505,6 +4505,22 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { case Intrinsic::amdgcn_wmma_f32_16x16x16_f16: case Intrinsic::amdgcn_wmma_i32_16x16x16_iu4: case Intrinsic::amdgcn_wmma_i32_16x16x16_iu8: + case Intrinsic::amdgcn_wmma_f32_16x16x16_fp8_fp8: + case Intrinsic::amdgcn_wmma_f32_16x16x16_fp8_bf8: + case Intrinsic::amdgcn_wmma_f32_16x16x16_bf8_fp8: + case Intrinsic::amdgcn_wmma_f32_16x16x16_bf8_bf8: + case Intrinsic::amdgcn_wmma_i32_16x16x32_iu4: + case Intrinsic::amdgcn_swmmac_f32_16x16x32_f16: + case Intrinsic::amdgcn_swmmac_f32_16x16x32_bf16: + case Intrinsic::amdgcn_swmmac_f16_16x16x32_f16: + case Intrinsic::amdgcn_swmmac_bf16_16x16x32_bf16: + case Intrinsic::amdgcn_swmmac_i32_16x16x32_iu8: + case Intrinsic::amdgcn_swmmac_i32_16x16x32_iu4: + case Intrinsic::amdgcn_swmmac_i32_16x16x64_iu4: + case Intrinsic::amdgcn_swmmac_f32_16x16x32_fp8_fp8: + case Intrinsic::amdgcn_swmmac_f32_16x16x32_fp8_bf8: + case Intrinsic::amdgcn_swmmac_f32_16x16x32_bf8_fp8: + case Intrinsic::amdgcn_swmmac_f32_16x16x32_bf8_bf8: return getDefaultMappingVOP(MI); case Intrinsic::amdgcn_log: case Intrinsic::amdgcn_exp2: diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSearchableTables.td b/llvm/lib/Target/AMDGPU/AMDGPUSearchableTables.td index 67263f23b983..bb1c6b733729 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUSearchableTables.td +++ b/llvm/lib/Target/AMDGPU/AMDGPUSearchableTables.td @@ -414,6 +414,22 @@ def : SourceOfDivergence<int_amdgcn_wmma_f16_16x16x16_f16>; def : SourceOfDivergence<int_amdgcn_wmma_bf16_16x16x16_bf16>; def : SourceOfDivergence<int_amdgcn_wmma_i32_16x16x16_iu8>; def : SourceOfDivergence<int_amdgcn_wmma_i32_16x16x16_iu4>; +def : SourceOfDivergence<int_amdgcn_wmma_f32_16x16x16_fp8_fp8>; +def : SourceOfDivergence<int_amdgcn_wmma_f32_16x16x16_fp8_bf8>; +def : SourceOfDivergence<int_amdgcn_wmma_f32_16x16x16_bf8_fp8>; +def : SourceOfDivergence<int_amdgcn_wmma_f32_16x16x16_bf8_bf8>; +def : SourceOfDivergence<int_amdgcn_wmma_i32_16x16x32_iu4>; +def : SourceOfDivergence<int_amdgcn_swmmac_f32_16x16x32_f16>; +def : SourceOfDivergence<int_amdgcn_swmmac_f32_16x16x32_bf16>; +def : SourceOfDivergence<int_amdgcn_swmmac_f16_16x16x32_f16>; +def : SourceOfDivergence<int_amdgcn_swmmac_bf16_16x16x32_bf16>; +def : SourceOfDivergence<int_amdgcn_swmmac_i32_16x16x32_iu8>; +def : SourceOfDivergence<int_amdgcn_swmmac_i32_16x16x32_iu4>; +def : SourceOfDivergence<int_amdgcn_swmmac_i32_16x16x64_iu4>; +def : SourceOfDivergence<int_amdgcn_swmmac_f32_16x16x32_fp8_fp8>; +def : SourceOfDivergence<int_amdgcn_swmmac_f32_16x16x32_fp8_bf8>; +def : SourceOfDivergence<int_amdgcn_swmmac_f32_16x16x32_bf8_fp8>; +def : SourceOfDivergence<int_amdgcn_swmmac_f32_16x16x32_bf8_bf8>; def : SourceOfDivergence<int_amdgcn_global_load_tr>; // The dummy boolean output is divergent from the IR's perspective, diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp index 489cf85693ed..9ab657f4e7bb 100644 --- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp +++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp @@ -151,6 +151,8 @@ public: ImmTyOpSelHi, ImmTyNegLo, ImmTyNegHi, + ImmTyIndexKey8bit, + ImmTyIndexKey16bit, ImmTyDPP8, ImmTyDppCtrl, ImmTyDppRowMask, @@ -383,6 +385,8 @@ public: bool isGDS() const { return isImmTy(ImmTyGDS); } bool isLDS() const { return isImmTy(ImmTyLDS); } bool isCPol() const { return isImmTy(ImmTyCPol); } + bool isIndexKey8bit() const { return isImmTy(ImmTyIndexKey8bit); } + bool isIndexKey16bit() const { return isImmTy(ImmTyIndexKey16bit); } bool isTFE() const { return isImmTy(ImmTyTFE); } bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<7>(getImm()); } bool isDppBankMask() const { return isImmTy(ImmTyDppBankMask); } @@ -656,6 +660,14 @@ public: return isVISrcF16() || isVISrcB32(); } + bool isVISrc_64F16() const { + return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f16); + } + + bool isVISrc_64B32() const { + return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32); + } + bool isVISrc_64B64() const { return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i64); } @@ -672,6 +684,14 @@ public: return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32); } + bool isVISrc_256B32() const { + return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32); + } + + bool isVISrc_256F32() const { + return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32); + } + bool isVISrc_256B64() const { return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i64); } @@ -1047,6 +1067,8 @@ public: case ImmTyOffset1: OS << "Offset1"; break; case ImmTySMEMOffsetMod: OS << "SMEMOffsetMod"; break; case ImmTyCPol: OS << "CPol"; break; + case ImmTyIndexKey8bit: OS << "index_key"; break; + case ImmTyIndexKey16bit: OS << "index_key"; break; case ImmTyTFE: OS << "TFE"; break; case ImmTyD16: OS << "D16"; break; case ImmTyFORMAT: OS << "FORMAT"; break; @@ -1604,6 +1626,11 @@ public: ParseStatus parseRegWithFPInputMods(OperandVector &Operands); ParseStatus parseRegWithIntInputMods(OperandVector &Operands); ParseStatus parseVReg32OrOff(OperandVector &Operands); + ParseStatus tryParseIndexKey(OperandVector &Operands, + AMDGPUOperand::ImmTy ImmTy); + ParseStatus parseIndexKey8bit(OperandVector &Operands); + ParseStatus parseIndexKey16bit(OperandVector &Operands); + ParseStatus parseDfmtNfmt(int64_t &Format); ParseStatus parseUfmt(int64_t &Format); ParseStatus parseSymbolicSplitFormat(StringRef FormatStr, SMLoc Loc, @@ -1784,6 +1811,8 @@ public: void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands); void cvtVOP3(MCInst &Inst, const OperandVector &Operands); void cvtVOP3P(MCInst &Inst, const OperandVector &Operands); + void cvtSWMMAC(MCInst &Inst, const OperandVector &Operands); + void cvtVOPD(MCInst &Inst, const OperandVector &Operands); void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands, OptionalImmIndexMap &OptionalIdx); @@ -3500,6 +3529,9 @@ bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) { return !isInlineConstant(Inst, OpIdx); } else if (MO.isReg()) { auto Reg = MO.getReg(); + if (!Reg) { + return false; + } const MCRegisterInfo *TRI = getContext().getRegisterInfo(); auto PReg = mc2PseudoReg(Reg); return isSGPR(PReg, TRI) && PReg != SGPR_NULL; @@ -4364,7 +4396,11 @@ bool AMDGPUAsmParser::validateNeg(const MCInst &Inst, int OpName) { uint64_t TSFlags = MII.get(Opc).TSFlags; // v_dot4 fp8/bf8 neg_lo/neg_hi not allowed on src0 and src1 (allowed on src2) - if (!(TSFlags & SIInstrFlags::IsDOT)) + // v_wmma iu4/iu8 neg_lo not allowed on src2 (allowed on src0, src1) + // v_swmmac f16/bf16 neg_lo/neg_hi not allowed on src2 (allowed on src0, src1) + // other wmma/swmmac instructions don't have neg_lo/neg_hi operand. + if (!(TSFlags & SIInstrFlags::IsDOT) && !(TSFlags & SIInstrFlags::IsWMMA) && + !(TSFlags & SIInstrFlags::IsSWMMAC)) return true; int NegIdx = AMDGPU::getNamedOperandIdx(Opc, OpName); @@ -6465,6 +6501,33 @@ bool AMDGPUAsmParser::tryParseFmt(const char *Pref, return true; } +ParseStatus AMDGPUAsmParser::tryParseIndexKey(OperandVector &Operands, + AMDGPUOperand::ImmTy ImmTy) { + const char *Pref = "index_key"; + int64_t ImmVal = 0; + SMLoc Loc = getLoc(); + auto Res = parseIntWithPrefix(Pref, ImmVal); + if (!Res.isSuccess()) + return Res; + + if (ImmTy == AMDGPUOperand::ImmTyIndexKey16bit && (ImmVal < 0 || ImmVal > 1)) + return Error(Loc, Twine("out of range ", StringRef(Pref))); + + if (ImmTy == AMDGPUOperand::ImmTyIndexKey8bit && (ImmVal < 0 || ImmVal > 3)) + return Error(Loc, Twine("out of range ", StringRef(Pref))); + + Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, ImmTy)); + return ParseStatus::Success; +} + +ParseStatus AMDGPUAsmParser::parseIndexKey8bit(OperandVector &Operands) { + return tryParseIndexKey(Operands, AMDGPUOperand::ImmTyIndexKey8bit); +} + +ParseStatus AMDGPUAsmParser::parseIndexKey16bit(OperandVector &Operands) { + return tryParseIndexKey(Operands, AMDGPUOperand::ImmTyIndexKey16bit); +} + // dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their // values to live in a joint format operand in the MCInst encoding. ParseStatus AMDGPUAsmParser::parseDfmtNfmt(int64_t &Format) { @@ -8303,12 +8366,20 @@ void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands, const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0; if (Opc == AMDGPU::V_CVT_SR_BF8_F32_vi || - Opc == AMDGPU::V_CVT_SR_FP8_F32_vi) { + Opc == AMDGPU::V_CVT_SR_FP8_F32_vi || + Opc == AMDGPU::V_CVT_SR_BF8_F32_e64_gfx12 || + Opc == AMDGPU::V_CVT_SR_FP8_F32_e64_gfx12) { Inst.addOperand(MCOperand::createImm(0)); // Placeholder for src2_mods Inst.addOperand(Inst.getOperand(0)); } - if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::vdst_in)) { + // Adding vdst_in operand is already covered for these DPP instructions in + // cvtVOP3DPP. + if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::vdst_in) && + !(Opc == AMDGPU::V_CVT_PK_BF8_F32_e64_dpp_gfx12 || + Opc == AMDGPU::V_CVT_PK_FP8_F32_e64_dpp_gfx12 || + Opc == AMDGPU::V_CVT_PK_BF8_F32_e64_dpp8_gfx12 || + Opc == AMDGPU::V_CVT_PK_FP8_F32_e64_dpp8_gfx12)) { assert(!IsPacked); Inst.addOperand(Inst.getOperand(0)); } @@ -8329,10 +8400,12 @@ void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands, } int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo); - if (NegLoIdx != -1) { + if (NegLoIdx != -1) addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo); + + int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi); + if (NegHiIdx != -1) addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi); - } const int Ops[] = { AMDGPU::OpName::src0, AMDGPU::OpName::src1, @@ -8352,11 +8425,11 @@ void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands, if (OpSelHiIdx != -1) OpSelHi = Inst.getOperand(OpSelHiIdx).getImm(); - if (NegLoIdx != -1) { - int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi); + if (NegLoIdx != -1) NegLo = Inst.getOperand(NegLoIdx).getImm(); + + if (NegHiIdx != -1) NegHi = Inst.getOperand(NegHiIdx).getImm(); - } for (int J = 0; J < 3; ++J) { int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]); @@ -8392,6 +8465,43 @@ void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands) { cvtVOP3P(Inst, Operands, OptIdx); } +static void addSrcModifiersAndSrc(MCInst &Inst, const OperandVector &Operands, + unsigned i, unsigned Opc, unsigned OpName) { + if (AMDGPU::getNamedOperandIdx(Opc, OpName) != -1) + ((AMDGPUOperand &)*Operands[i]).addRegOrImmWithFPInputModsOperands(Inst, 2); + else + ((AMDGPUOperand &)*Operands[i]).addRegOperands(Inst, 1); +} + +void AMDGPUAsmParser::cvtSWMMAC(MCInst &Inst, const OperandVector &Operands) { + unsigned Opc = Inst.getOpcode(); + + ((AMDGPUOperand &)*Operands[1]).addRegOperands(Inst, 1); + addSrcModifiersAndSrc(Inst, Operands, 2, Opc, AMDGPU::OpName::src0_modifiers); + addSrcModifiersAndSrc(Inst, Operands, 3, Opc, AMDGPU::OpName::src1_modifiers); + ((AMDGPUOperand &)*Operands[1]).addRegOperands(Inst, 1); // srcTiedDef + ((AMDGPUOperand &)*Operands[4]).addRegOperands(Inst, 1); // src2 + + OptionalImmIndexMap OptIdx; + for (unsigned i = 5; i < Operands.size(); ++i) { + AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); + OptIdx[Op.getImmTy()] = i; + } + + if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::index_key_8bit)) + addOptionalImmOperand(Inst, Operands, OptIdx, + AMDGPUOperand::ImmTyIndexKey8bit); + + if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::index_key_16bit)) + addOptionalImmOperand(Inst, Operands, OptIdx, + AMDGPUOperand::ImmTyIndexKey16bit); + + if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp)) + addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyClampSI); + + cvtVOP3P(Inst, Operands, OptIdx); +} + //===----------------------------------------------------------------------===// // VOPD //===----------------------------------------------------------------------===// @@ -8770,6 +8880,22 @@ void AMDGPUAsmParser::cvtVOP3DPP(MCInst &Inst, const OperandVector &Operands, } } + int VdstInIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in); + if (VdstInIdx == static_cast<int>(Inst.getNumOperands())) { + Inst.addOperand(Inst.getOperand(0)); + } + + bool IsVOP3CvtSrDpp = Opc == AMDGPU::V_CVT_SR_BF8_F32_e64_dpp8_gfx12 || + Opc == AMDGPU::V_CVT_SR_FP8_F32_e64_dpp8_gfx12 || + Opc == AMDGPU::V_CVT_SR_BF8_F32_e64_dpp_gfx12 || + Opc == AMDGPU::V_CVT_SR_FP8_F32_e64_dpp_gfx12; + if (IsVOP3CvtSrDpp) { + if (Src2ModIdx == static_cast<int>(Inst.getNumOperands())) { + Inst.addOperand(MCOperand::createImm(0)); + Inst.addOperand(MCOperand::createReg(0)); + } + } + auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(), MCOI::TIED_TO); if (TiedTo != -1) { diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp index 86096b0d80b4..a9968cfe25b4 100644 --- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp +++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp @@ -260,8 +260,12 @@ DECODE_OPERAND_SRC_REG_OR_IMM_9(VS_32, OPW32, 32) DECODE_OPERAND_SRC_REG_OR_IMM_9(VS_64, OPW64, 64) DECODE_OPERAND_SRC_REG_OR_IMM_9(VS_64, OPW64, 32) DECODE_OPERAND_SRC_REG_OR_IMM_9(VReg_64, OPW64, 64) +DECODE_OPERAND_SRC_REG_OR_IMM_9(VReg_64, OPW64, 32) +DECODE_OPERAND_SRC_REG_OR_IMM_9(VReg_64, OPW64, 16) DECODE_OPERAND_SRC_REG_OR_IMM_9(VReg_128, OPW128, 32) +DECODE_OPERAND_SRC_REG_OR_IMM_9(VReg_128, OPW128, 16) DECODE_OPERAND_SRC_REG_OR_IMM_9(VReg_256, OPW256, 64) +DECODE_OPERAND_SRC_REG_OR_IMM_9(VReg_256, OPW256, 32) DECODE_OPERAND_SRC_REG_OR_IMM_9(VReg_512, OPW512, 32) DECODE_OPERAND_SRC_REG_OR_IMM_9(VReg_1024, OPW1024, 32) @@ -704,6 +708,10 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size, break; Res = tryDecodeInst(DecoderTableWMMAGFX1164, MI, QW, Address, CS); + if (Res) + break; + + Res = tryDecodeInst(DecoderTableWMMAGFX1264, MI, QW, Address, CS); } while (false); if (Res && AMDGPU::isMAC(MI.getOpcode())) { @@ -712,6 +720,13 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size, AMDGPU::OpName::src2_modifiers); } + if (Res && (MI.getOpcode() == AMDGPU::V_CVT_SR_BF8_F32_e64_dpp || + MI.getOpcode() == AMDGPU::V_CVT_SR_FP8_F32_e64_dpp)) { + // Insert dummy unused src2_modifiers. + insertNamedMCOperand(MI, MCOperand::createImm(0), + AMDGPU::OpName::src2_modifiers); + } + if (Res && (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::DS) && !AMDGPU::hasGDS(STI)) { insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::gds); @@ -942,6 +957,7 @@ void AMDGPUDisassembler::convertMacDPPInst(MCInst &MI) const { // first add optional MI operands to check FI DecodeStatus AMDGPUDisassembler::convertDPP8Inst(MCInst &MI) const { unsigned Opc = MI.getOpcode(); + if (MCII->get(Opc).TSFlags & SIInstrFlags::VOP3P) { convertVOP3PDPPInst(MI); } else if ((MCII->get(Opc).TSFlags & SIInstrFlags::VOPC) || @@ -951,6 +967,15 @@ DecodeStatus AMDGPUDisassembler::convertDPP8Inst(MCInst &MI) const { if (isMacDPP(MI)) convertMacDPPInst(MI); + int VDstInIdx = + AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vdst_in); + if (VDstInIdx != -1) + insertNamedMCOperand(MI, MI.getOperand(0), AMDGPU::OpName::vdst_in); + + if (MI.getOpcode() == AMDGPU::V_CVT_SR_BF8_F32_e64_dpp8_gfx12 || + MI.getOpcode() == AMDGPU::V_CVT_SR_FP8_F32_e64_dpp8_gfx12) + insertNamedMCOperand(MI, MI.getOperand(0), AMDGPU::OpName::src2); + unsigned DescNumOps = MCII->get(Opc).getNumOperands(); if (MI.getNumOperands() < DescNumOps && AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel)) { @@ -977,6 +1002,15 @@ DecodeStatus AMDGPUDisassembler::convertVOP3DPPInst(MCInst &MI) const { if (isMacDPP(MI)) convertMacDPPInst(MI); + int VDstInIdx = + AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vdst_in); + if (VDstInIdx != -1) + insertNamedMCOperand(MI, MI.getOperand(0), AMDGPU::OpName::vdst_in); + + if (MI.getOpcode() == AMDGPU::V_CVT_SR_BF8_F32_e64_dpp_gfx12 || + MI.getOpcode() == AMDGPU::V_CVT_SR_FP8_F32_e64_dpp_gfx12) + insertNamedMCOperand(MI, MI.getOperand(0), AMDGPU::OpName::src2); + unsigned Opc = MI.getOpcode(); unsigned DescNumOps = MCII->get(Opc).getNumOperands(); if (MI.getNumOperands() < DescNumOps && diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp index b6e4e65ff5b0..08bef7ad3002 100644 --- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp +++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp @@ -1716,14 +1716,14 @@ bool GCNHazardRecognizer::fixVALUTransUseHazard(MachineInstr *MI) { } bool GCNHazardRecognizer::fixWMMAHazards(MachineInstr *MI) { - if (!SIInstrInfo::isWMMA(*MI)) + if (!SIInstrInfo::isWMMA(*MI) && !SIInstrInfo::isSWMMAC(*MI)) return false; const SIInstrInfo *TII = ST.getInstrInfo(); const SIRegisterInfo *TRI = ST.getRegisterInfo(); - auto IsHazardFn = [MI, TII, TRI](const MachineInstr &I) { - if (!SIInstrInfo::isWMMA(I)) + auto IsHazardFn = [MI, TII, TRI, this](const MachineInstr &I) { + if (!SIInstrInfo::isWMMA(I) && !SIInstrInfo::isSWMMAC(I)) return false; // Src0 or Src1 of the current wmma instruction overlaps with the dest of @@ -1753,6 +1753,7 @@ bool GCNHazardRecognizer::fixWMMAHazards(MachineInstr *MI) { const MachineOperand *Src2Mods = TII->getNamedOperand(*MI, AMDGPU::OpName::src2_modifiers); const bool NoSrc2Mods = + !Src2Mods || (Src2Mods->getImm() & (SISrcMods::NEG | SISrcMods::NEG_HI)) == 0; // Exception: there is no hazard if the wmma instructions are of the same // type and there is no input modifier on src2 of the current instruction. @@ -1760,6 +1761,18 @@ bool GCNHazardRecognizer::fixWMMAHazards(MachineInstr *MI) { TII->pseudoToMCOpcode(MI->getOpcode()))); } + // GFX12+ allows overlap of matrix C with PrevDstReg (hardware will stall) + // but Index can't overlap with PrevDstReg. + if (AMDGPU::isGFX12Plus(ST)) { + if (SIInstrInfo::isSWMMAC(*MI)) { + const Register CurIndex = + TII->getNamedOperand(*MI, AMDGPU::OpName::src2)->getReg(); + if (TRI->regsOverlap(PrevDstReg, CurIndex)) + return true; + } + return false; + } + return false; }; diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp index e73e53aa270f..abfa4a3531e8 100644 --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp @@ -1275,6 +1275,23 @@ void AMDGPUInstPrinter::printPackedModifier(const MCInst *MI, (ModIdx != -1) ? MI->getOperand(ModIdx).getImm() : DefaultValue; } + // Print three values of neg/opsel for wmma instructions (prints 0 when there + // is no src_modifier operand instead of not printing anything). + if (MII.get(MI->getOpcode()).TSFlags & SIInstrFlags::IsSWMMAC || + MII.get(MI->getOpcode()).TSFlags & SIInstrFlags::IsWMMA) { + NumOps = 0; + int DefaultValue = Mod == SISrcMods::OP_SEL_1; + for (int OpName : + {AMDGPU::OpName::src0_modifiers, AMDGPU::OpName::src1_modifiers, + AMDGPU::OpName::src2_modifiers}) { + int Idx = AMDGPU::getNamedOperandIdx(Opc, OpName); + if (Idx != -1) + Ops[NumOps++] = MI->getOperand(Idx).getImm(); + else + Ops[NumOps++] = DefaultValue; + } + } + const bool HasDstSel = NumOps > 0 && Mod == SISrcMods::OP_SEL_0 && @@ -1305,6 +1322,16 @@ void AMDGPUInstPrinter::printOpSel(const MCInst *MI, unsigned, const MCSubtargetInfo &STI, raw_ostream &O) { unsigned Opc = MI->getOpcode(); + if (isCvt_F32_Fp8_Bf8_e64(Opc)) { + auto SrcMod = + AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers); + unsigned Mod = MI->getOperand(SrcMod).getImm(); + unsigned Index0 = !!(Mod & SISrcMods::OP_SEL_0); + unsigned Index1 = !!(Mod & SISrcMods::OP_SEL_1); + if (Index0 || Index1) + O << " op_sel:[" << Index0 << ',' << Index1 << ']'; + return; + } if (isPermlane16(Opc)) { auto FIN = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers); auto BCN = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1_modifiers); @@ -1336,6 +1363,26 @@ void AMDGPUInstPrinter::printNegHi(const MCInst *MI, unsigned OpNo, printPackedModifier(MI, " neg_hi:[", SISrcMods::NEG_HI, O); } +void AMDGPUInstPrinter::printIndexKey8bit(const MCInst *MI, unsigned OpNo, + const MCSubtargetInfo &STI, + raw_ostream &O) { + auto Imm = MI->getOperand(OpNo).getImm() & 0x7; + if (Imm == 0) + return; + + O << " index_key:" << Imm; +} + +void AMDGPUInstPrinter::printIndexKey16bit(const MCInst *MI, unsigned OpNo, + const MCSubtargetInfo &STI, + raw_ostream &O) { + auto Imm = MI->getOperand(OpNo).getImm() & 0x7; + if (Imm == 0) + return; + + O << " index_key:" << Imm; +} + void AMDGPUInstPrinter::printInterpSlot(const MCInst *MI, unsigned OpNum, const MCSubtargetInfo &STI, raw_ostream &O) { diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h index e3958f88277d..e91ff86b219a 100644 --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h @@ -139,6 +139,10 @@ private: const MCSubtargetInfo &STI, raw_ostream &O); void printNegHi(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, raw_ostream &O); + void printIndexKey8bit(const MCInst *MI, unsigned OpNo, + const MCSubtargetInfo &STI, raw_ostream &O); + void printIndexKey16bit(const MCInst *MI, unsigned OpNo, + const MCSubtargetInfo &STI, raw_ostream &O); void printInterpSlot(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, raw_ostream &O); void printInterpAttr(const MCInst *MI, unsigned OpNo, diff --git a/llvm/lib/Target/AMDGPU/SIDefines.h b/llvm/lib/Target/AMDGPU/SIDefines.h index 8ab66d4fd5b8..19596d53b453 100644 --- a/llvm/lib/Target/AMDGPU/SIDefines.h +++ b/llvm/lib/Target/AMDGPU/SIDefines.h @@ -167,6 +167,9 @@ enum : uint64_t { // ds_gws_* instructions. GWS = UINT64_C(1) << 62, + + // Is a SWMMAC instruction. + IsSWMMAC = UINT64_C(1) << 63, }; // v_cmp_class_* etc. use a 10-bit mask for what operation is checked. diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp index 2862a7787e75..a812cdc61500 100644 --- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp +++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp @@ -208,6 +208,7 @@ bool SIFoldOperands::canUseImmWithOpSel(FoldCandidate &Fold) const { assert(Old.isReg() && Fold.isImm()); if (!(TSFlags & SIInstrFlags::IsPacked) || (TSFlags & SIInstrFlags::IsMAI) || + (TSFlags & SIInstrFlags::IsWMMA) || (TSFlags & SIInstrFlags::IsSWMMAC) || (ST->hasDOTOpSelHazard() && (TSFlags & SIInstrFlags::IsDOT))) return false; diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index cf947dccafac..d6bf0d8cb2ef 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -2072,11 +2072,45 @@ SDValue SITargetLowering::getPreloadedValue(SelectionDAG &DAG, const SIMachineFunctionInfo &MFI, EVT VT, AMDGPUFunctionArgInfo::PreloadedValue PVID) const { - const ArgDescriptor *Reg; + const ArgDescriptor *Reg = nullptr; const TargetRegisterClass *RC; LLT Ty; - std::tie(Reg, RC, Ty) = MFI.getPreloadedValue(PVID); + CallingConv::ID CC = DAG.getMachineFunction().getFunction().getCallingConv(); + const ArgDescriptor WorkGroupIDX = + ArgDescriptor::createRegister(AMDGPU::TTMP9); + // If GridZ is not programmed in an entry function then the hardware will set + // it to all zeros, so there is no need to mask the GridY value in the low + // order bits. + const ArgDescriptor WorkGroupIDY = ArgDescriptor::createRegister( + AMDGPU::TTMP7, + AMDGPU::isEntryFunctionCC(CC) && !MFI.hasWorkGroupIDZ() ? ~0u : 0xFFFFu); + const ArgDescriptor WorkGroupIDZ = + ArgDescriptor::createRegister(AMDGPU::TTMP7, 0xFFFF0000u); + if (Subtarget->hasArchitectedSGPRs() && AMDGPU::isCompute(CC)) { + switch (PVID) { + case AMDGPUFunctionArgInfo::WORKGROUP_ID_X: + Reg = &WorkGroupIDX; + RC = &AMDGPU::SReg_32RegClass; + Ty = LLT::scalar(32); + break; + case AMDGPUFunctionArgInfo::WORKGROUP_ID_Y: + Reg = &WorkGroupIDY; + RC = &AMDGPU::SReg_32RegClass; + Ty = LLT::scalar(32); + break; + case AMDGPUFunctionArgInfo::WORKGROUP_ID_Z: + Reg = &WorkGroupIDZ; + RC = &AMDGPU::SReg_32RegClass; + Ty = LLT::scalar(32); + break; + default: + break; + } + } + + if (!Reg) + std::tie(Reg, RC, Ty) = MFI.getPreloadedValue(PVID); if (!Reg) { if (PVID == AMDGPUFunctionArgInfo::PreloadedValue::KERNARG_SEGMENT_PTR) { // It's possible for a kernarg intrinsic call to appear in a kernel with @@ -2505,28 +2539,24 @@ void SITargetLowering::allocateSystemSGPRs(CCState &CCInfo, } } - if (Info.hasWorkGroupIDX()) { - Register Reg = Info.addWorkGroupIDX(HasArchitectedSGPRs); - if (!HasArchitectedSGPRs) + if (!HasArchitectedSGPRs) { + if (Info.hasWorkGroupIDX()) { + Register Reg = Info.addWorkGroupIDX(); MF.addLiveIn(Reg, &AMDGPU::SGPR_32RegClass); + CCInfo.AllocateReg(Reg); + } - CCInfo.AllocateReg(Reg); - } - - if (Info.hasWorkGroupIDY()) { - Register Reg = Info.addWorkGroupIDY(HasArchitectedSGPRs); - if (!HasArchitectedSGPRs) + if (Info.hasWorkGroupIDY()) { + Register Reg = Info.addWorkGroupIDY(); MF.addLiveIn(Reg, &AMDGPU::SGPR_32RegClass); + CCInfo.AllocateReg(Reg); + } - CCInfo.AllocateReg(Reg); - } - - if (Info.hasWorkGroupIDZ()) { - Register Reg = Info.addWorkGroupIDZ(HasArchitectedSGPRs); - if (!HasArchitectedSGPRs) + if (Info.hasWorkGroupIDZ()) { + Register Reg = Info.addWorkGroupIDZ(); MF.addLiveIn(Reg, &AMDGPU::SGPR_32RegClass); - - CCInfo.AllocateReg(Reg); + CCInfo.AllocateReg(Reg); + } } if (Info.hasWorkGroupInfo()) { @@ -7890,6 +7920,17 @@ SDValue SITargetLowering::lowerSBuffer(EVT VT, SDLoc DL, SDValue Rsrc, return Loads[0]; } +SDValue SITargetLowering::lowerWaveID(SelectionDAG &DAG, SDValue Op) const { + // With architected SGPRs, waveIDinGroup is in TTMP8[29:25]. + if (!Subtarget->hasArchitectedSGPRs()) + return {}; + SDLoc SL(Op); + MVT VT = MVT::i32; + SDValue TTMP8 = DAG.getCopyFromReg(DAG.getEntryNode(), SL, AMDGPU::TTMP8, VT); + return DAG.getNode(AMDGPUISD::BFE_U32, SL, VT, TTMP8, + DAG.getConstant(25, SL, VT), DAG.getConstant(5, SL, VT)); +} + SDValue SITargetLowering::lowerWorkitemID(SelectionDAG &DAG, SDValue Op, unsigned Dim, const ArgDescriptor &Arg) const { @@ -8060,6 +8101,8 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, case Intrinsic::amdgcn_workgroup_id_z: return getPreloadedValue(DAG, *MFI, VT, AMDGPUFunctionArgInfo::WORKGROUP_ID_Z); + case Intrinsic::amdgcn_wave_id: + return lowerWaveID(DAG, Op); case Intrinsic::amdgcn_lds_kernel_id: { if (MFI->isEntryFunction()) return getLDSKernelId(DAG, DL); @@ -8242,6 +8285,36 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SIInstrInfo::MO_ABS32_LO); return {DAG.getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32, GA), 0}; } + case Intrinsic::amdgcn_swmmac_f16_16x16x32_f16: + case Intrinsic::amdgcn_swmmac_bf16_16x16x32_bf16: + case Intrinsic::amdgcn_swmmac_f32_16x16x32_bf16: + case Intrinsic::amdgcn_swmmac_f32_16x16x32_f16: + case Intrinsic::amdgcn_swmmac_f32_16x16x32_fp8_fp8: + case Intrinsic::amdgcn_swmmac_f32_16x16x32_fp8_bf8: + case Intrinsic::amdgcn_swmmac_f32_16x16x32_bf8_fp8: + case Intrinsic::amdgcn_swmmac_f32_16x16x32_bf8_bf8: { + if (Op.getOperand(4).getValueType() == MVT::i32) + return SDValue(); + + SDLoc SL(Op); + auto IndexKeyi32 = DAG.getAnyExtOrTrunc(Op.getOperand(4), SL, MVT::i32); + return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, SL, Op.getValueType(), + Op.getOperand(0), Op.getOperand(1), Op.getOperand(2), + Op.getOperand(3), IndexKeyi32); + } + case Intrinsic::amdgcn_swmmac_i32_16x16x32_iu4: + case Intrinsic::amdgcn_swmmac_i32_16x16x32_iu8: + case Intrinsic::amdgcn_swmmac_i32_16x16x64_iu4: { + if (Op.getOperand(6).getValueType() == MVT::i32) + return SDValue(); + + SDLoc SL(Op); + auto IndexKeyi32 = DAG.getAnyExtOrTrunc(Op.getOperand(6), SL, MVT::i32); + return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, SL, Op.getValueType(), + {Op.getOperand(0), Op.getOperand(1), Op.getOperand(2), + Op.getOperand(3), Op.getOperand(4), Op.getOperand(5), + IndexKeyi32, Op.getOperand(7)}); + } default: if (const AMDGPU::ImageDimIntrinsicInfo *ImageDimIntr = AMDGPU::getImageDimIntrinsicInfo(IntrinsicID)) diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.h b/llvm/lib/Target/AMDGPU/SIISelLowering.h index d66ba0b59ba9..e436c23af5bc 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.h +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.h @@ -80,6 +80,7 @@ private: SDValue lowerStructBufferAtomicIntrin(SDValue Op, SelectionDAG &DAG, unsigned NewOpcode) const; + SDValue lowerWaveID(SelectionDAG &DAG, SDValue Op) const; SDValue lowerWorkitemID(SelectionDAG &DAG, SDValue Op, unsigned Dim, const ArgDescriptor &ArgDesc) const; diff --git a/llvm/lib/Target/AMDGPU/SIInstrFormats.td b/llvm/lib/Target/AMDGPU/SIInstrFormats.td index 1b66d163714f..ab536f8f49d5 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrFormats.td +++ b/llvm/lib/Target/AMDGPU/SIInstrFormats.td @@ -161,6 +161,9 @@ class InstSI <dag outs, dag ins, string asm = "", // ds_gws_* instructions. field bit GWS = 0; + // This bit indicates that this is one of SWMMAC instructions. + field bit IsSWMMAC = 0; + // These need to be kept in sync with the enum in SIInstrFlags. let TSFlags{0} = SALU; let TSFlags{1} = VALU; @@ -248,6 +251,8 @@ class InstSI <dag outs, dag ins, string asm = "", let TSFlags{62} = GWS; + let TSFlags{63} = IsSWMMAC; + let SchedRW = [Write32Bit]; let AsmVariantName = AMDGPUAsmVariants.Default; diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/llvm/lib/Target/AMDGPU/SIInstrInfo.h index fc85b089aa47..1c9dacc09f81 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.h +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.h @@ -802,6 +802,14 @@ public: return isMFMA(MI) || isWMMA(MI); } + static bool isSWMMAC(const MachineInstr &MI) { + return MI.getDesc().TSFlags & SIInstrFlags::IsSWMMAC; + } + + bool isSWMMAC(uint16_t Opcode) const { + return get(Opcode).TSFlags & SIInstrFlags::IsSWMMAC; + } + bool isDOT(uint16_t Opcode) const { return get(Opcode).TSFlags & SIInstrFlags::IsDOT; } diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td index a6820544f4b4..45be81950aa3 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td @@ -1088,6 +1088,9 @@ def op_sel_hi0 : ArrayOperand0<"op_sel_hi", "OpSelHi">; def neg_lo0 : ArrayOperand0<"neg_lo", "NegLo">; def neg_hi0 : ArrayOperand0<"neg_hi", "NegHi">; +def IndexKey16bit : CustomOperand<i32, 1>; +def IndexKey8bit : CustomOperand<i32, 1>; + def dpp8 : CustomOperand<i32, 0, "DPP8">; def dpp_ctrl : CustomOperand<i32, 0, "DPPCtrl">; @@ -1344,6 +1347,13 @@ def VOP3PModsDOT : ComplexPattern<untyped, 2, "SelectVOP3PModsDOT">; def VOP3PModsNeg : ComplexPattern<untyped, 1, "SelectVOP3PModsNeg">; def WMMAOpSelVOP3PMods : ComplexPattern<untyped, 1, "SelectWMMAOpSelVOP3PMods">; +def WMMAModsF32NegAbs : ComplexPattern<untyped, 2, "SelectWMMAModsF32NegAbs">; +def WMMAModsF16Neg : ComplexPattern<untyped, 2, "SelectWMMAModsF16Neg">; +def WMMAModsF16NegAbs : ComplexPattern<untyped, 2, "SelectWMMAModsF16NegAbs">; +def WMMAVISrc : ComplexPattern<untyped, 1, "SelectWMMAVISrc">; +def SWMMACIndex8 : ComplexPattern<untyped, 2, "SelectSWMMACIndex8">; +def SWMMACIndex16 : ComplexPattern<untyped, 2, "SelectSWMMACIndex16">; + def VOP3OpSel : ComplexPattern<untyped, 2, "SelectVOP3OpSel">; def VOP3OpSelMods : ComplexPattern<untyped, 2, "SelectVOP3OpSelMods">; @@ -1684,8 +1694,9 @@ class getIns64 <RegisterOperand Src0RC, RegisterOperand Src1RC, !if(HasOMod, (ins Src0Mod:$src0_modifiers, Src0RC:$src0, clampmod0:$clamp, omod0:$omod), - (ins Src0Mod:$src0_modifiers, Src0RC:$src0, - clampmod0:$clamp)) + !if (HasClamp, + (ins Src0Mod:$src0_modifiers, Src0RC:$src0, clampmod0:$clamp), + (ins Src0Mod:$src0_modifiers, Src0RC:$src0))) /* else */, // VOP1 without modifiers !if (HasClamp, @@ -2278,6 +2289,9 @@ class VOPProfile <list<ValueType> _ArgVT, bit _EnableClamp = 0> { field bit IsDOT = 0; field bit IsSingle = 0; field bit IsWMMA = 0; + field bit IsSWMMAC = 0; + + field bit IsFP8 = 0; field bit HasDst = !ne(DstVT.Value, untyped.Value); field bit HasDst32 = HasDst; diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h index 9ff66a094f99..0336ec4985ea 100644 --- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h +++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h @@ -751,35 +751,21 @@ public: } // Add system SGPRs. - Register addWorkGroupIDX(bool HasArchitectedSGPRs) { - Register Reg = - HasArchitectedSGPRs ? (MCPhysReg)AMDGPU::TTMP9 : getNextSystemSGPR(); - ArgInfo.WorkGroupIDX = ArgDescriptor::createRegister(Reg); - if (!HasArchitectedSGPRs) - NumSystemSGPRs += 1; - + Register addWorkGroupIDX() { + ArgInfo.WorkGroupIDX = ArgDescriptor::createRegister(getNextSystemSGPR()); + NumSystemSGPRs += 1; return ArgInfo.WorkGroupIDX.getRegister(); } - Register addWorkGroupIDY(bool HasArchitectedSGPRs) { - Register Reg = - HasArchitectedSGPRs ? (MCPhysReg)AMDGPU::TTMP7 : getNextSystemSGPR(); - unsigned Mask = HasArchitectedSGPRs && hasWorkGroupIDZ() ? 0xffff : ~0u; - ArgInfo.WorkGroupIDY = ArgDescriptor::createRegister(Reg, Mask); - if (!HasArchitectedSGPRs) - NumSystemSGPRs += 1; - + Register addWorkGroupIDY() { + ArgInfo.WorkGroupIDY = ArgDescriptor::createRegister(getNextSystemSGPR()); + NumSystemSGPRs += 1; return ArgInfo.WorkGroupIDY.getRegister(); } - Register addWorkGroupIDZ(bool HasArchitectedSGPRs) { - Register Reg = - HasArchitectedSGPRs ? (MCPhysReg)AMDGPU::TTMP7 : getNextSystemSGPR(); - unsigned Mask = HasArchitectedSGPRs ? 0xffff << 16 : ~0u; - ArgInfo.WorkGroupIDZ = ArgDescriptor::createRegister(Reg, Mask); - if (!HasArchitectedSGPRs) - NumSystemSGPRs += 1; - + Register addWorkGroupIDZ() { + ArgInfo.WorkGroupIDZ = ArgDescriptor::createRegister(getNextSystemSGPR()); + NumSystemSGPRs += 1; return ArgInfo.WorkGroupIDZ.getRegister(); } diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td index f42af89cf5e6..b3265b73fa7e 100644 --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td @@ -1341,9 +1341,14 @@ def VCSrc_v2f16 : RegOrV2F16 <"VS_32", "OPERAND_REG_INLINE_C">; // VISrc_* Operands with a VGPR or an inline constant //===----------------------------------------------------------------------===// +def VISrc_64_f16 : RegOrF16 <"VReg_64", "OPERAND_REG_INLINE_C">; +def VISrc_64_b32 : RegOrB32 <"VReg_64", "OPERAND_REG_INLINE_C">; def VISrc_64_f64 : RegOrF64 <"VReg_64", "OPERAND_REG_INLINE_C">; +def VISrc_128_f16 : RegOrF16 <"VReg_128", "OPERAND_REG_INLINE_C">; def VISrc_128_b32 : RegOrB32 <"VReg_128", "OPERAND_REG_INLINE_C">; def VISrc_128_f32 : RegOrF32 <"VReg_128", "OPERAND_REG_INLINE_C">; +def VISrc_256_b32 : RegOrB32 <"VReg_256", "OPERAND_REG_INLINE_C">; +def VISrc_256_f32 : RegOrF32 <"VReg_256", "OPERAND_REG_INLINE_C">; def VISrc_256_f64 : RegOrF64 <"VReg_256", "OPERAND_REG_INLINE_C">; def VISrc_512_b32 : RegOrB32 <"VReg_512", "OPERAND_REG_INLINE_C">; def VISrc_512_f32 : RegOrF32 <"VReg_512", "OPERAND_REG_INLINE_C">; diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp index 0bf9452d822e..106fdb19f278 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp @@ -529,6 +529,17 @@ bool isPermlane16(unsigned Opc) { Opc == AMDGPU::V_PERMLANEX16_VAR_B32_e64_gfx12; } +bool isCvt_F32_Fp8_Bf8_e64(unsigned Opc) { + return Opc == AMDGPU::V_CVT_F32_BF8_e64_gfx12 || + Opc == AMDGPU::V_CVT_F32_FP8_e64_gfx12 || + Opc == AMDGPU::V_CVT_F32_BF8_e64_dpp_gfx12 || + Opc == AMDGPU::V_CVT_F32_FP8_e64_dpp_gfx12 || + Opc == AMDGPU::V_CVT_F32_BF8_e64_dpp8_gfx12 || + Opc == AMDGPU::V_CVT_F32_FP8_e64_dpp8_gfx12 || + Opc == AMDGPU::V_CVT_PK_F32_BF8_e64_gfx12 || + Opc == AMDGPU::V_CVT_PK_F32_FP8_e64_gfx12; +} + bool isGenericAtomic(unsigned Opc) { return Opc == AMDGPU::G_AMDGPU_ATOMIC_FMIN || Opc == AMDGPU::G_AMDGPU_ATOMIC_FMAX || diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h index d3f55c792017..11b0bc5c8171 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h @@ -535,6 +535,9 @@ bool isPermlane16(unsigned Opc); LLVM_READNONE bool isGenericAtomic(unsigned Opc); +LLVM_READNONE +bool isCvt_F32_Fp8_Bf8_e64(unsigned Opc); + namespace VOPD { enum Component : unsigned { diff --git a/llvm/lib/Target/AMDGPU/VOP1Instructions.td b/llvm/lib/Target/AMDGPU/VOP1Instructions.td index 95a1d8696347..ef652fce6548 100644 --- a/llvm/lib/Target/AMDGPU/VOP1Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP1Instructions.td @@ -571,6 +571,7 @@ let SubtargetPredicate = isGFX9Only in { } // End SubtargetPredicate = isGFX9Only class VOPProfile_Base_CVT_F32_F8<ValueType vt> : VOPProfileI2F <vt, i32> { + let HasExtDPP = 1; let HasExtSDWA = 1; let HasExtSDWA9 = 1; let HasExt = 1; @@ -599,6 +600,7 @@ class Cvt_F32_F8_Pat<SDPatternOperator node, int index, (inst_sdwa 0, $src, 0, 0, index) >; +let SubtargetPredicate = isGFX9Only in { let OtherPredicates = [HasCvtFP8VOP1Bug] in { def : GCNPat<(f32 (int_amdgcn_cvt_f32_fp8 i32:$src, 0)), (V_CVT_F32_FP8_sdwa 0, $src, 0, 0, 0)>; @@ -617,6 +619,7 @@ foreach Index = [1, 2, 3] in { def : Cvt_F32_F8_Pat<int_amdgcn_cvt_f32_fp8, Index, V_CVT_F32_FP8_sdwa>; def : Cvt_F32_F8_Pat<int_amdgcn_cvt_f32_bf8, Index, V_CVT_F32_BF8_sdwa>; } +} // End SubtargetPredicate = isGFX9Only class Cvt_PK_F32_F8_Pat<SDPatternOperator node, int index, VOP1_Pseudo inst_e32, VOP1_SDWA_Pseudo inst_sdwa> : GCNPat< @@ -626,11 +629,77 @@ class Cvt_PK_F32_F8_Pat<SDPatternOperator node, int index, (inst_e32 $src)) >; -foreach Index = [0, -1] in { - def : Cvt_PK_F32_F8_Pat<int_amdgcn_cvt_pk_f32_fp8, Index, - V_CVT_PK_F32_FP8_e32, V_CVT_PK_F32_FP8_sdwa>; - def : Cvt_PK_F32_F8_Pat<int_amdgcn_cvt_pk_f32_bf8, Index, - V_CVT_PK_F32_BF8_e32, V_CVT_PK_F32_BF8_sdwa>; +let SubtargetPredicate = isGFX9Only in { + foreach Index = [0, -1] in { + def : Cvt_PK_F32_F8_Pat<int_amdgcn_cvt_pk_f32_fp8, Index, + V_CVT_PK_F32_FP8_e32, V_CVT_PK_F32_FP8_sdwa>; + def : Cvt_PK_F32_F8_Pat<int_amdgcn_cvt_pk_f32_bf8, Index, + V_CVT_PK_F32_BF8_e32, V_CVT_PK_F32_BF8_sdwa>; + } +} + + +// Similar to VOPProfile_Base_CVT_F32_F8, but for VOP3 instructions. +def VOPProfile_Base_CVT_PK_F32_F8_OpSel : VOPProfileI2F <v2f32, i32> { + let HasOpSel = 1; + let HasExtVOP3DPP = 0; +} + +def VOPProfile_Base_CVT_F32_F8_OpSel : VOPProfile<[f32, i32, untyped, untyped]> { + let HasOpSel = 1; + let HasExtDPP = 1; + let HasExtVOP3DPP = 1; + let IsFP8 = 1; + let HasClamp = 0; + let HasOMod = 0; + let HasModifiers = 1; + let Src1VOP3DPP = Src1RC64; +} + +let SubtargetPredicate = isGFX12Plus, mayRaiseFPException = 0, + SchedRW = [WriteFloatCvt] in { + defm V_CVT_F32_FP8_OP_SEL : VOP1Inst<"v_cvt_f32_fp8_op_sel", VOPProfile_Base_CVT_F32_F8_OpSel>; + defm V_CVT_F32_BF8_OP_SEL : VOP1Inst<"v_cvt_f32_bf8_op_sel", VOPProfile_Base_CVT_F32_F8_OpSel>; + defm V_CVT_PK_F32_FP8_OP_SEL : VOP1Inst<"v_cvt_pk_f32_fp8_op_sel", VOPProfile_Base_CVT_PK_F32_F8_OpSel>; + defm V_CVT_PK_F32_BF8_OP_SEL : VOP1Inst<"v_cvt_pk_f32_bf8_op_sel", VOPProfile_Base_CVT_PK_F32_F8_OpSel>; +} + +class Cvt_F32_F8_Pat_OpSel<SDPatternOperator node, bits<2> index, + VOP1_Pseudo inst_e32, VOP3_Pseudo inst_e64> : GCNPat< + (f32 (node i32:$src, index)), + !if (index, + (inst_e64 !if(index{0}, + !if(index{1}, !or(SRCMODS.OP_SEL_0, SRCMODS.OP_SEL_1), + SRCMODS.OP_SEL_0), + !if(index{1}, SRCMODS.OP_SEL_1, 0)), + $src, 0), + (inst_e32 $src)) +>; + +let SubtargetPredicate = isGFX12Plus in { + foreach Index = [0, 1, 2, 3] in { + def : Cvt_F32_F8_Pat_OpSel<int_amdgcn_cvt_f32_fp8, Index, + V_CVT_F32_FP8_e32, V_CVT_F32_FP8_OP_SEL_e64>; + def : Cvt_F32_F8_Pat_OpSel<int_amdgcn_cvt_f32_bf8, Index, + V_CVT_F32_BF8_e32, V_CVT_F32_BF8_OP_SEL_e64>; + } +} + +class Cvt_PK_F32_F8_Pat_OpSel<SDPatternOperator node, int index, + VOP1_Pseudo inst_e32, VOP3_Pseudo inst_e64> : GCNPat< + (v2f32 (node i32:$src, index)), + !if (index, + (inst_e64 SRCMODS.OP_SEL_0, $src, 0, 0, SRCMODS.NONE), + (inst_e32 $src)) +>; + +let SubtargetPredicate = isGFX12Plus in { + foreach Index = [0, -1] in { + def : Cvt_PK_F32_F8_Pat_OpSel<int_amdgcn_cvt_pk_f32_fp8, Index, + V_CVT_PK_F32_FP8_e32, V_CVT_PK_F32_FP8_OP_SEL_e64>; + def : Cvt_PK_F32_F8_Pat_OpSel<int_amdgcn_cvt_pk_f32_bf8, Index, + V_CVT_PK_F32_BF8_e32, V_CVT_PK_F32_BF8_OP_SEL_e64>; + } } let SubtargetPredicate = isGFX10Plus in { @@ -853,6 +922,20 @@ multiclass VOP1_Real_NO_DPP_OP_SEL_with_name<GFXGen Gen, bits<9> op, VOP3_Real_with_name<Gen, {0, 1, 1, op{6-0}}, opName, asmName>; +// Define VOP1 instructions using the pseudo instruction with its old profile and +// VOP3 using the OpSel profile for the pseudo instruction. +defm V_CVT_F32_FP8 : VOP1_Real_NO_VOP3_with_name_gfx12<0x06c, "V_CVT_F32_FP8", "v_cvt_f32_fp8">; +defm V_CVT_F32_FP8 : VOP1_Realtriple_e64_with_name<GFX12Gen, 0x06c, "V_CVT_F32_FP8_OP_SEL", "v_cvt_f32_fp8">; + +defm V_CVT_F32_BF8 : VOP1_Real_NO_VOP3_with_name_gfx12<0x06d, "V_CVT_F32_BF8", "v_cvt_f32_bf8">; +defm V_CVT_F32_BF8 : VOP1_Realtriple_e64_with_name<GFX12Gen, 0x06d, "V_CVT_F32_BF8_OP_SEL", "v_cvt_f32_bf8">; + +defm V_CVT_PK_F32_FP8 : VOP1_Real_e32_with_name<GFX12Gen, 0x06e, "V_CVT_PK_F32_FP8", "v_cvt_pk_f32_fp8">; +defm V_CVT_PK_F32_FP8 : VOP3_Real_with_name<GFX12Gen, 0x1ee, "V_CVT_PK_F32_FP8_OP_SEL", "v_cvt_pk_f32_fp8">; + +defm V_CVT_PK_F32_BF8 : VOP1_Real_e32_with_name<GFX12Gen, 0x06f, "V_CVT_PK_F32_BF8", "v_cvt_pk_f32_bf8">; +defm V_CVT_PK_F32_BF8 : VOP3_Real_with_name<GFX12Gen, 0x1ef, "V_CVT_PK_F32_BF8_OP_SEL", "v_cvt_pk_f32_bf8">; + defm V_CVT_NEAREST_I32_F32 : VOP1_Real_FULL_with_name_gfx11_gfx12<0x00c, "V_CVT_RPI_I32_F32", "v_cvt_nearest_i32_f32">; defm V_CVT_FLOOR_I32_F32 : VOP1_Real_FULL_with_name_gfx11_gfx12<0x00d, diff --git a/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/llvm/lib/Target/AMDGPU/VOP3Instructions.td index 713b4712d563..14db52210214 100644 --- a/llvm/lib/Target/AMDGPU/VOP3Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP3Instructions.td @@ -520,8 +520,26 @@ def VOP3_CVT_PK_F8_F32_Profile : VOP3_Profile<VOP_I32_F32_F32, VOP3_OPSEL> { let InsVOP3OpSel = (ins FP32InputMods:$src0_modifiers, Src0RC64:$src0, FP32InputMods:$src1_modifiers, Src1RC64:$src1, VGPR_32:$vdst_in, op_sel0:$op_sel); + let InsVOP3DPP = (ins VGPR_32:$old, + FP32InputMods:$src0_modifiers, Src0VOP3DPP:$src0, + FP32InputMods:$src1_modifiers, Src1VOP3DPP:$src1, + VGPR_32:$vdst_in, op_sel0:$op_sel, + dpp_ctrl:$dpp_ctrl, row_mask:$row_mask, + bank_mask:$bank_mask, bound_ctrl:$bound_ctrl); + + let InsVOP3DPP16 = (ins VGPR_32:$old, + FP32InputMods:$src0_modifiers, Src0VOP3DPP:$src0, + FP32InputMods:$src1_modifiers, Src1VOP3DPP:$src1, + VGPR_32:$vdst_in, op_sel0:$op_sel, + dpp_ctrl:$dpp_ctrl, row_mask:$row_mask, + bank_mask:$bank_mask, bound_ctrl:$bound_ctrl, FI:$fi); + let InsVOP3DPP8 = (ins VGPR_32:$old, + FP32InputMods:$src0_modifiers, Src0VOP3DPP:$src0, + FP32InputMods:$src1_modifiers, Src1VOP3DPP:$src1, + VGPR_32:$vdst_in, op_sel0:$op_sel, dpp8:$dpp8, FI:$fi); + let HasClamp = 0; - let HasExtVOP3DPP = 0; + let HasExtVOP3DPP = 1; } def VOP3_CVT_SR_F8_F32_Profile : VOP3_Profile<VOPProfile<[i32, f32, i32, f32]>, @@ -530,14 +548,36 @@ def VOP3_CVT_SR_F8_F32_Profile : VOP3_Profile<VOPProfile<[i32, f32, i32, f32]>, FP32InputMods:$src1_modifiers, Src1RC64:$src1, FP32InputMods:$src2_modifiers, VGPR_32:$src2, op_sel0:$op_sel); + let InsVOP3DPP16 = (ins VGPR_32:$old, + FP32InputMods:$src0_modifiers, Src0VOP3DPP:$src0, + FP32InputMods:$src1_modifiers, Src1VOP3DPP:$src1, + FP32InputMods:$src2_modifiers, VGPR_32:$src2, + op_sel0:$op_sel, dpp_ctrl:$dpp_ctrl, row_mask:$row_mask, + bank_mask:$bank_mask, bound_ctrl:$bound_ctrl, FI:$fi); + let InsVOP3DPP8 = (ins VGPR_32:$old, + FP32InputMods:$src0_modifiers, Src0VOP3DPP:$src0, + FP32InputMods:$src1_modifiers, Src1VOP3DPP:$src1, + FP32InputMods:$src2_modifiers, VGPR_32:$src2, + op_sel0:$op_sel, dpp8:$dpp8, FI:$fi); let HasClamp = 0; let HasSrc2 = 0; let HasSrc2Mods = 1; + let HasExtVOP3DPP = 1; + let HasOpSel = 1; let AsmVOP3OpSel = !subst(", $src2_modifiers", "", getAsmVOP3OpSel<3, HasClamp, HasOMod, HasSrc0FloatMods, HasSrc1FloatMods, HasSrc2FloatMods>.ret); - let HasExtVOP3DPP = 0; + let AsmVOP3DPP16 = !subst(", $src2_modifiers", "", + getAsmVOP3DPP16<getAsmVOP3Base<3, 1, HasClamp, 1, + HasOMod, 0, 1, HasSrc0FloatMods, + HasSrc1FloatMods, + HasSrc2FloatMods>.ret>.ret); + let AsmVOP3DPP8 = !subst(", $src2_modifiers", "", + getAsmVOP3DPP8<getAsmVOP3Base<3, 1, HasClamp, 1, + HasOMod, 0, 1, HasSrc0FloatMods, + HasSrc1FloatMods, + HasSrc2FloatMods>.ret>.ret); } def IsPow2Plus1: PatLeaf<(i32 imm), [{ @@ -618,13 +658,13 @@ let SubtargetPredicate = HasFP8ConversionInsts, mayRaiseFPException = 0, class Cvt_PK_F8_F32_Pat<SDPatternOperator node, int index, VOP3_Pseudo inst> : GCNPat< (i32 (node f32:$src0, f32:$src1, i32:$old, index)), - (inst !if(index, SRCMODS.DST_OP_SEL, 0), $src0, 0, $src1, $old, !if(index, SRCMODS.OP_SEL_0, 0)) + (inst !if(index, SRCMODS.DST_OP_SEL, 0), $src0, 0, $src1, $old, 0) >; class Cvt_SR_F8_F32_Pat<SDPatternOperator node, bits<2> index, VOP3_Pseudo inst> : GCNPat< (i32 (node f32:$src0, i32:$src1, i32:$old, index)), (inst !if(index{1}, SRCMODS.DST_OP_SEL, 0), $src0, 0, $src1, - !if(index{0}, SRCMODS.OP_SEL_0, 0), $old, !if(index{1}, SRCMODS.OP_SEL_0, 0)) + !if(index{0}, SRCMODS.OP_SEL_0, 0), $old, 0) >; foreach Index = [0, -1] in { @@ -998,6 +1038,11 @@ defm V_MAXIMUM_F16 : VOP3Only_Realtriple_t16_gfx12<0x368>; defm V_PERMLANE16_VAR_B32 : VOP3Only_Real_Base_gfx12<0x30f>; defm V_PERMLANEX16_VAR_B32 : VOP3Only_Real_Base_gfx12<0x310>; +defm V_CVT_PK_FP8_F32 : VOP3Only_Realtriple_gfx12<0x369>; +defm V_CVT_PK_BF8_F32 : VOP3Only_Realtriple_gfx12<0x36a>; +defm V_CVT_SR_FP8_F32 : VOP3Only_Realtriple_gfx12<0x36b>; +defm V_CVT_SR_BF8_F32 : VOP3Only_Realtriple_gfx12<0x36c>; + //===----------------------------------------------------------------------===// // GFX11, GFX12 //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/AMDGPU/VOP3PInstructions.td b/llvm/lib/Target/AMDGPU/VOP3PInstructions.td index 0c7a08cd4bc9..107b95a9ca8e 100644 --- a/llvm/lib/Target/AMDGPU/VOP3PInstructions.td +++ b/llvm/lib/Target/AMDGPU/VOP3PInstructions.td @@ -936,16 +936,19 @@ multiclass WMMAInst<string Suffix, string Instr, VOPProfile P, SDPatternOperator !cast<Instruction>(NAME # _threeaddr # Suffix)>; } - if !eq(Type, WMMAOpSel) then { - def : WMMAOpSelPat<!cast<Instruction>(NAME # _twoaddr # Suffix), node, P>; - } else if !eq(Type, WMMAUIClamp) then { - def : WMMAUIClampPat<!cast<Instruction>(NAME # _twoaddr # Suffix), node, P>; - } else { - def : WMMARegularPat<!cast<Instruction>(NAME # _twoaddr # Suffix), node, P>; + let SubtargetPredicate = isGFX11Only in { + if !eq(Type, WMMAOpSel) then { + def : WMMAOpSelPat<!cast<Instruction>(NAME # _twoaddr # Suffix), node, P>; + } else if !eq(Type, WMMAUIClamp) then { + def : WMMAUIClampPat<!cast<Instruction>(NAME # _twoaddr # Suffix), node, P>; + } else { + def : WMMARegularPat<!cast<Instruction>(NAME # _twoaddr # Suffix), node, P>; + } } } + let WaveSizePredicate = isWave32 in { defm V_WMMA_F32_16X16X16_F16 : WMMAInst<"_w32", "v_wmma_f32_16x16x16_f16", VOP_V8F32_V16F16_V16F16_V8F32, int_amdgcn_wmma_f32_16x16x16_f16, VRegSrc_256, WMMARegular, 1>; defm V_WMMA_F32_16X16X16_BF16 : WMMAInst<"_w32", "v_wmma_f32_16x16x16_bf16", VOP_V8F32_V16I16_V16I16_V8F32, int_amdgcn_wmma_f32_16x16x16_bf16, VRegSrc_256, WMMARegular, 1>; @@ -969,6 +972,398 @@ let WaveSizePredicate = isWave64 in { } +class VOP3PWMMA_Profile<list<ValueType> ArgTy, bit _IsSWMMAC, int _IndexType, + bit _IsIU, bit _IsFP8BF8> + : VOP3P_Profile<VOPProfile<ArgTy>> { + bit IsIU = _IsIU; + bit IsFP8BF8 = _IsFP8BF8; + bit IsF16BF16 = !not(!or(IsIU, IsFP8BF8)); + + int IndexType = _IndexType; + + let IsPacked = 1; + let IsWMMA = !not(_IsSWMMAC); + let IsSWMMAC = _IsSWMMAC; + + bit IsAB_F16 = !and(IsF16BF16, ArgTy[1].isFP); + bit IsAB_BF16 = !and(IsF16BF16, isIntType<ArgTy[1]>.ret); + bit IsC_F32 = !or(!eq(ArgTy[3], v8f32), !eq(ArgTy[3], v4f32)); + bit IsC_BF16 = !or(!eq(ArgTy[3], v8i16), !eq(ArgTy[3], v4i16)); + bit IsC_F16 = !or(!eq(ArgTy[3], v8f16), !eq(ArgTy[3], v4f16)); + + bit NegLo01 = !or(IsF16BF16, IsIU); + bit NegLo2 = !and(!or(IsF16BF16, IsFP8BF8), IsWMMA); + bit NegHi01 = IsF16BF16; + bit NegHi2 = !and(!or(IsF16BF16, IsFP8BF8), IsWMMA); + bit NegLoAny = !or(NegLo01, NegLo2); + bit NegHiAny = !or(NegHi01, NegHi2); + + let DstRC = !cond(!eq(ArgTy[0], v8f32): VDst_256, + !eq(ArgTy[0], v8i32): VDst_256, + !eq(ArgTy[0], v8f16): VDst_128, + !eq(ArgTy[0], v8i16): VDst_128, + !eq(ArgTy[0], v4f32): VDst_128, + !eq(ArgTy[0], v4i32): VDst_128, + !eq(ArgTy[0], v4f16): VDst_64, + !eq(ArgTy[0], v4i16): VDst_64); + let Src0RC64 = !cond(!eq(ArgTy[1], v8f16): VRegSrc_128, + !eq(ArgTy[1], v4f16): VRegSrc_64, + !eq(ArgTy[1], v4i16): VRegSrc_64, + !eq(ArgTy[1], v8i16): VRegSrc_128, + !eq(ArgTy[1], v4i32): VRegSrc_128, + !eq(ArgTy[1], v2i32): VRegSrc_64, + !eq(ArgTy[1], i32) : VRegSrc_32); + let Src1RC64 = !cond(!eq(ArgTy[2], v16f16): VRegSrc_256, + !eq(ArgTy[2], v16i16): VRegSrc_256, + !eq(ArgTy[2], v8f16): VRegSrc_128, + !eq(ArgTy[2], v8i16): VRegSrc_128, + !eq(ArgTy[2], v4i32): VRegSrc_128, + !eq(ArgTy[1], v4i16): VRegSrc_64, + !eq(ArgTy[1], v4f16): VRegSrc_64, + !eq(ArgTy[2], v2i32): VRegSrc_64, + !eq(ArgTy[2], i32) : VRegSrc_32); + let Src2RC64 = !if(IsSWMMAC, DstRC, + !cond(!eq(ArgTy[3], v8f32): VISrc_256_f32, + !eq(ArgTy[3], v8i32): VISrc_256_b32, + !eq(ArgTy[3], v8f16): VISrc_128_f16, + !eq(ArgTy[3], v8i16): VISrc_128_f32, // bf16 + !eq(ArgTy[3], v4f16): VISrc_64_f16, + !eq(ArgTy[3], v4i16): VISrc_64_b32, + !eq(ArgTy[3], v4i32): VISrc_128_b32, + !eq(ArgTy[3], v4f32): VISrc_128_f32)); + + // For f16 and bf16 matrices A and B, each element can be modified by + // fneg(neg_lo,neg_hi = 1). For iu4 and iu8 matrices A and B neg_lo is + // overloaded to mean unsigned/signed: neg_lo = 0 (u4 and u8) unsigned(zext) + // neg_lo = 1 (i4 and i8) signed(sext). For f16, bf16 and f32 matrix C each + // element can be modified by fneg(neg_lo = 1) or fabs(neg_hi = 1). + + // Opcode | src0/src1 - matrix A/B | src2 - matrix C or Index + // --------------------------------------------------------------------------- + // wmma f32_f16 | both neg_lo,neg_hi = 1 | neg_lo = 1 neg C(f32) + // wmma f32_bf16 | neg A/B (f16 or bf16) | neg_hi = 1 abs C(f32) + // --------------------------------------------------------------------------- + // wmma f16_f16 | both neg_lo,neg_hi = 1 | neg_lo = 1 neg C(f16 or bf16) + // wmma bf16_bf16 | neg A/B (f16 or bf16) | neg_hi = 1 abs C(f16 or bf16) + // --------------------------------------------------------------------------- + // wmma i32_iu8/iu4 | neg_lo = 0 u4/u8(zext) | not allowed for + // | neg_lo = 1 i4/i8(sext) | i32 matrices + // --------------------------------------------------------------------------- + // wmma f32_fp8/bf8 | not allowed for | neg_lo = 1 neg C(f32) + // (4 instructions) | f8 and bf8 matrices | neg_hi = 1 abs C(f32) + // --------------------------------------------------------------------------- + // swmmac f32_f16 | both neg_lo,neg_hi = 1 | not allowed for sparse matrix + // swmmac f32_bf16 | neg A/B (f16 or bf16) | A Index - matrix C is in dst + // --------------------------------------------------------------------------- + // swmmac f16_f16 | both neg_lo,neg_hi = 1 | not allowed for sparse matrix + // swmmac bf16_bf16 | neg A/B (f16 or bf16) | A Index - matrix C is in dst + // --------------------------------------------------------------------------- + // swmmac i32_iu8/iu4 | neg_lo = 0 u4/u8(zext) | not allowed for sparse matrix + // | neg_lo = 1 i4/i8(sext) | A Index - matrix C is in dst + // --------------------------------------------------------------------------- + // swmmac f32_fp8/bf8 | not allowed for | not allowed for sparse matrix + // (4 instructions) | f8 and bf8 matrices | A Index - matrix C is in dst + + // pseudo + + // fp8bf8 wmmas don't use src (0 and 1) modifiers, iu use neg_lo, f16 and bf16 + // use neg_lo and neg_hi. iu wmmas (C is i32) don't use src 2 modifiers, + // remaining wmmas(f16, bf16 and f8bf8) use neg_lo and neg_hi for C (C is f32 + // f16 or bf16). swmmac use index_key and don't use src 2 modifiers. + + dag Src0Mods = !if(IsFP8BF8, (ins), (ins PackedF16InputMods:$src0_modifiers)); + dag Src1Mods = !if(IsFP8BF8, (ins), (ins PackedF16InputMods:$src1_modifiers)); + dag Src2Mods = !if(IsIU, (ins), (ins PackedF16InputMods:$src2_modifiers)); + dag IndexKey = !cond(!eq(IndexType, 0) : (ins), + !eq(IndexType, 8) : (ins IndexKey8bit:$index_key_8bit), + !eq(IndexType, 16): (ins IndexKey16bit:$index_key_16bit)); + dag Clamp = !if(IsIU, (ins clampmod0:$clamp), (ins)); + dag Neg = !cond(!and(NegLoAny, NegHiAny) : (ins neg_lo0:$neg_lo, neg_hi0:$neg_hi), + !and(NegLoAny, !not(NegHiAny)) : (ins neg_lo0:$neg_lo), + !and(!not(NegLoAny), !not(NegHiAny)) : (ins)); + + let InsVOP3P = !con(Src0Mods, (ins Src0RC64:$src0), Src1Mods, (ins Src1RC64:$src1), + !cond(IsWMMA : !con(Src2Mods, (ins Src2RC64:$src2)), + IsSWMMAC : !con((ins DstRC:$srcTiedDef), (ins VRegSrc_32:$src2), IndexKey)), + Clamp, Neg); + + // asm + + string IndexKeyAsm = !cond(!eq(IndexType, 0) : "", + !eq(IndexType, 8) : "$index_key_8bit", + !eq(IndexType, 16) : "$index_key_16bit"); + string ClampAsm = !if(IsIU, "$clamp", ""); + string NegAsm = !cond(!and(NegLoAny, NegHiAny) : "$neg_lo$neg_hi", + !and(NegLoAny, !not(NegHiAny)) : "$neg_lo", + !and(!not(NegLoAny), !not(NegHiAny)) : ""); + + let AsmVOP3P = "$vdst, $src0, $src1, $src2"#IndexKeyAsm#NegAsm#ClampAsm; + + // isel patterns + + dag Src0InPat = !cond(IsAB_F16 : (ins (Src0VT (WMMAModsF16Neg Src0VT:$src0, i32:$src0_modifiers))), + IsAB_BF16 : (ins Src0VT:$src0), + IsIU : (ins (VOP3PModsNeg i32:$src0_modifiers), Src0VT:$src0), + IsFP8BF8 : (ins Src0VT:$src0)); + dag Src0OutPat = !cond(IsAB_F16 : (ins i32:$src0_modifiers, Src0VT:$src0), + IsAB_BF16 : (ins (i32 8), Src0VT:$src0), + IsIU : (ins i32:$src0_modifiers, Src0VT:$src0), + IsFP8BF8 : (ins Src0VT:$src0)); + dag Src1InPat = !cond(IsAB_F16 : (ins (Src1VT (WMMAModsF16Neg Src1VT:$src1, i32:$src1_modifiers))), + IsAB_BF16 : (ins Src1VT:$src1), + IsIU : (ins (VOP3PModsNeg i32:$src1_modifiers), Src1VT:$src1), + IsFP8BF8 : (ins Src1VT:$src1)); + dag Src1OutPat = !cond(IsAB_F16 : (ins i32:$src1_modifiers, Src1VT:$src1), + IsAB_BF16 : (ins (i32 8), Src1VT:$src1), + IsIU : (ins i32:$src1_modifiers, Src1VT:$src1), + IsFP8BF8 : (ins Src1VT:$src1)); + dag Src2InPatWmma = !cond(IsC_F32 : (ins (Src2VT (WMMAModsF32NegAbs Src2VT:$src2, i32:$src2_modifiers))), + IsC_F16 : (ins (Src2VT (WMMAModsF16NegAbs Src2VT:$src2, i32:$src2_modifiers))), + IsC_BF16 : (ins Src2VT:$src2), + IsIU : (ins Src2VT:$src2), + IsSWMMAC : (ins)); + dag Src2OutPatWmma = !cond(IsC_F32 : (ins i32:$src2_modifiers, Src2VT:$src2), + IsC_F16 : (ins i32:$src2_modifiers, Src2VT:$src2), + IsC_BF16 : (ins (i32 8), Src2VT:$src2), + IsIU : (ins Src2VT:$src2), + IsSWMMAC : (ins)); + dag ClampPat = !if(IsIU, (ins i1:$clamp), (ins)); + dag IndexInPat = !cond(!eq(IndexType, 0) : (ins i32:$src2), + !eq(IndexType, 8) : (ins (i32 (SWMMACIndex8 i32:$src2, i32:$index_key_8bit))), + !eq(IndexType, 16): (ins (i32 (SWMMACIndex16 i32:$src2, i32:$index_key_16bit)))); + dag IndexOutPat = !cond(!eq(IndexType, 0) : (ins i32:$src2), + !eq(IndexType, 8) : (ins i32:$src2, i32:$index_key_8bit), + !eq(IndexType, 16): (ins i32:$src2, i32:$index_key_16bit)); + dag Src2InlineInPat = (ins (Src2VT (WMMAVISrc Src2VT:$src2))); + dag Src2InlineOutPat = !con(!if(IsIU, (ins), (ins (i32 8))), (ins Src2VT:$src2)); + + + dag WmmaInPat = !con(Src0InPat, Src1InPat, Src2InPatWmma, ClampPat); + dag WmmaOutPat = !con(Src0OutPat, Src1OutPat, Src2OutPatWmma, ClampPat); + + dag SwmmacInPat = !con(Src0InPat, Src1InPat, (ins Src2VT:$srcTiedDef), IndexInPat, ClampPat); + dag SwmmacOutPat = !con(Src0OutPat, Src1OutPat, (ins Src2VT:$srcTiedDef), IndexOutPat, ClampPat); + + // wmma pattern where src2 is inline imm uses _threeaddr pseudo, + // can't use _twoaddr since it would violate src2 tied to vdst constraint. + dag WmmaInlineInPat = !con(Src0InPat, Src1InPat, Src2InlineInPat, ClampPat); + dag WmmaInlineOutPat = !con(Src0OutPat, Src1OutPat, Src2InlineOutPat, ClampPat); +} + +multiclass WMMAInstGFX12<string Instr, VOP3PWMMA_Profile WMMAProfile, string PseudoInstrSuffix> { + let Mnemonic = Instr, mayRaiseFPException = 0, ReadsModeReg = 0 in { + let Constraints = "@earlyclobber $vdst,$vdst = $src2", isConvertibleToThreeAddress = 1 in + def _twoaddr : VOP3P_Pseudo<Instr, WMMAProfile>{ + let PseudoInstr = Instr#PseudoInstrSuffix; + } + + let Constraints = "@earlyclobber $vdst", SchedRW = [Write32Bit, Write32Bit] in + def _threeaddr : VOP3P_Pseudo<Instr, WMMAProfile>{ + let PseudoInstr = Instr#PseudoInstrSuffix; + } + + } + def : WMMAOpcodeMapping<!cast<Instruction>(NAME # _twoaddr), + !cast<Instruction>(NAME # _threeaddr)>; +} + +multiclass SWMMACInstGFX12<string Instr, VOP3PWMMA_Profile WMMAProfile, string PseudoInstrSuffix> { + def _twoaddr : VOP3P_Pseudo<Instr, WMMAProfile>{ + let Mnemonic = Instr; + let PseudoInstr = Instr#PseudoInstrSuffix; + let mayRaiseFPException = 0; + let ReadsModeReg = 0; + let AsmMatchConverter = "cvtSWMMAC"; + + let Constraints = "@earlyclobber $vdst,$vdst = $srcTiedDef"; + } +} + +// First argument in Profile is types for matrices D, A, B and C (D = A * B + C) +// as used by llvm ir, types are vectors(with matrix elements) +// wave32: +// For 16x16 matrices, lanes 0 to 31 will have 8 matrix elts, +// for 16 x 32 16 elts and for 16 x 64 lanes have 32 elts. +// wave64: +// lanes will have half the size of elements in lanes compared to wave32 with +// exception of 16x16_iu4: lanes0-31 will have 8xi4, remaining lanes are ignored + +// general idea on element distribution differences: +// wave32: lane n has 8 matrix elements +// wave64: lane n has first 4, lane n+32 has other 4 elements + +// index size, for each 2 elements in lane you need 4bits in index + +// Non-standard types (iu8, iu4, fp8, bf8) will be packed in vectors of i32s. +// Original type for them is in comment on the right and refers to A and B. + +def F32_F16_WMMA_w32 : VOP3PWMMA_Profile<[v8f32, v8f16, v8f16, v8f32], 0, 0, 0, 0>; +def F32_BF16_WMMA_w32 : VOP3PWMMA_Profile<[v8f32, v8i16, v8i16, v8f32], 0, 0, 0, 0>; +def F16_F16_WMMA_w32 : VOP3PWMMA_Profile<[v8f16, v8f16, v8f16, v8f16], 0, 0, 0, 0>; +def BF16_BF16_WMMA_w32 : VOP3PWMMA_Profile<[v8i16, v8i16, v8i16, v8i16], 0, 0, 0, 0>; +def I32_IU8_WMMA_w32 : VOP3PWMMA_Profile<[v8i32, v2i32, v2i32, v8i32], 0, 0, 1, 0>; // 8xi8 +def I32_IU4X16_WMMA_w32 : VOP3PWMMA_Profile<[v8i32, i32, i32, v8i32], 0, 0, 1, 0>; // 8xi4 +def F32_FP8BF8_WMMA_w32 : VOP3PWMMA_Profile<[v8f32, v2i32, v2i32, v8f32], 0, 0, 0, 1>; // 8xf8 +def I32_IU4X32_WMMA_w32 : VOP3PWMMA_Profile<[v8i32, v2i32, v2i32, v8i32], 0, 0, 1, 0>; // 16xi4 + +def F32_F16_WMMA_w64 : VOP3PWMMA_Profile<[v4f32, v4f16, v4f16, v4f32], 0, 0, 0, 0>; +def F32_BF16_WMMA_w64 : VOP3PWMMA_Profile<[v4f32, v4i16, v4i16, v4f32], 0, 0, 0, 0>; +def F16_F16_WMMA_w64 : VOP3PWMMA_Profile<[v4f16, v4f16, v4f16, v4f16], 0, 0, 0, 0>; +def BF16_BF16_WMMA_w64 : VOP3PWMMA_Profile<[v4i16, v4i16, v4i16, v4i16], 0, 0, 0, 0>; +def I32_IU8_WMMA_w64 : VOP3PWMMA_Profile<[v4i32, i32, i32, v4i32], 0, 0, 1, 0>; // 4xi8 +def I32_IU4X16_WMMA_w64 : VOP3PWMMA_Profile<[v4i32, i32, i32, v4i32], 0, 0, 1, 0>; // 8xi4 * +def F32_FP8BF8_WMMA_w64 : VOP3PWMMA_Profile<[v4f32, i32, i32, v4f32], 0, 0, 0, 1>; // 4xf8 +def I32_IU4X32_WMMA_w64 : VOP3PWMMA_Profile<[v4i32, i32, i32, v4i32], 0, 0, 1, 0>; // 8xi4 + +def F32_F16_SWMMAC_w32 : VOP3PWMMA_Profile<[v8f32, v8f16, v16f16, v8f32], 1, 16, 0, 0>; +def F32_BF16_SWMMAC_w32 : VOP3PWMMA_Profile<[v8f32, v8i16, v16i16, v8f32], 1, 16, 0, 0>; +def F16_F16_SWMMAC_w32 : VOP3PWMMA_Profile<[v8f16, v8f16, v16f16, v8f16], 1, 16, 0, 0>; +def BF16_BF16_SWMMAC_w32 : VOP3PWMMA_Profile<[v8i16, v8i16, v16i16, v8i16], 1, 16, 0, 0>; +def I32_IU8_SWMMAC_w32 : VOP3PWMMA_Profile<[v8i32, v2i32, v4i32, v8i32], 1, 16, 1, 0>; // 8xi8, 16xi8 +def I32_IU4X32_SWMMAC_w32 : VOP3PWMMA_Profile<[v8i32, i32, v2i32, v8i32], 1, 16, 1, 0>; // 8xi4, 16xi4 +def I32_IU4X64_SWMMAC_w32 : VOP3PWMMA_Profile<[v8i32, v2i32, v4i32, v8i32], 1, 0, 1, 0>; // 16xi4, 32xi4 ** +def F32_FP8BF8_SWMMAC_w32 : VOP3PWMMA_Profile<[v8f32, v2i32, v4i32, v8f32], 1, 16, 0, 1>; // 8xf8, 16xf8 + +def F32_F16_SWMMAC_w64 : VOP3PWMMA_Profile<[v4f32, v4f16, v8f16, v4f32], 1, 8, 0, 0>; +def F32_BF16_SWMMAC_w64 : VOP3PWMMA_Profile<[v4f32, v4i16, v8i16, v4f32], 1, 8, 0, 0>; +def F16_F16_SWMMAC_w64 : VOP3PWMMA_Profile<[v4f16, v4f16, v8f16, v4f16], 1, 8, 0, 0>; +def BF16_BF16_SWMMAC_w64 : VOP3PWMMA_Profile<[v4i16, v4i16, v8i16, v4i16], 1, 8, 0, 0>; +def I32_IU8_SWMMAC_w64 : VOP3PWMMA_Profile<[v4i32, i32, v2i32, v4i32], 1, 8, 1, 0>; // 4xi8, 8xi8 +def I32_IU4X32_SWMMAC_w64 : VOP3PWMMA_Profile<[v4i32, i32, i32, v4i32], 1, 16, 1, 0>; // 8xi4, 8xi4 *** +def I32_IU4X64_SWMMAC_w64 : VOP3PWMMA_Profile<[v4i32, i32, v2i32, v4i32], 1, 16, 1, 0>; // 8xi4, 16xi4 +def F32_FP8BF8_SWMMAC_w64 : VOP3PWMMA_Profile<[v4f32, i32, v2i32, v4f32], 1, 8, 0, 1>; // 4xf8, 8xf8 + +// * IU4X16_WMMA_w64 lanes 0-31 will have 8xi4, remaining lanes are ignored +// ** IU4X64_SWMMAC_w32 index is i32, index_key is not used +// *** IU4X32_SWMMAC_w64 lanes 0-31 will have 8xi4 remaining lanes are ignored +// for matrix A, index is i16; Matrix B uses all lanes + +let WaveSizePredicate = isWave32 in { +defm V_WMMA_F32_16X16X16_F16_w32 : WMMAInstGFX12<"v_wmma_f32_16x16x16_f16", F32_F16_WMMA_w32, "_w32">; +defm V_WMMA_F32_16X16X16_BF16_w32 : WMMAInstGFX12<"v_wmma_f32_16x16x16_bf16", F32_BF16_WMMA_w32, "_w32">; +defm V_WMMA_F16_16X16X16_F16_w32 : WMMAInstGFX12<"v_wmma_f16_16x16x16_f16", F16_F16_WMMA_w32, "_w32">; +defm V_WMMA_BF16_16X16X16_BF16_w32 : WMMAInstGFX12<"v_wmma_bf16_16x16x16_bf16", BF16_BF16_WMMA_w32, "_w32">; +defm V_WMMA_I32_16X16X16_IU8_w32 : WMMAInstGFX12<"v_wmma_i32_16x16x16_iu8", I32_IU8_WMMA_w32, "_w32">; +defm V_WMMA_I32_16X16X16_IU4_w32 : WMMAInstGFX12<"v_wmma_i32_16x16x16_iu4", I32_IU4X16_WMMA_w32, "_w32">; +defm V_WMMA_F32_16X16X16_FP8_FP8_w32 : WMMAInstGFX12<"v_wmma_f32_16x16x16_fp8_fp8", F32_FP8BF8_WMMA_w32, "_w32">; +defm V_WMMA_F32_16X16X16_FP8_BF8_w32 : WMMAInstGFX12<"v_wmma_f32_16x16x16_fp8_bf8", F32_FP8BF8_WMMA_w32, "_w32">; +defm V_WMMA_F32_16X16X16_BF8_FP8_w32 : WMMAInstGFX12<"v_wmma_f32_16x16x16_bf8_fp8", F32_FP8BF8_WMMA_w32, "_w32">; +defm V_WMMA_F32_16X16X16_BF8_BF8_w32 : WMMAInstGFX12<"v_wmma_f32_16x16x16_bf8_bf8", F32_FP8BF8_WMMA_w32, "_w32">; +defm V_WMMA_I32_16X16X32_IU4_w32 : WMMAInstGFX12<"v_wmma_i32_16x16x32_iu4", I32_IU4X32_WMMA_w32, "_w32">; + +defm V_SWMMAC_F32_16X16X32_F16_w32 : SWMMACInstGFX12<"v_swmmac_f32_16x16x32_f16", F32_F16_SWMMAC_w32, "_w32">; +defm V_SWMMAC_F32_16X16X32_BF16_w32 : SWMMACInstGFX12<"v_swmmac_f32_16x16x32_bf16", F32_BF16_SWMMAC_w32, "_w32">; +defm V_SWMMAC_F16_16X16X32_F16_w32 : SWMMACInstGFX12<"v_swmmac_f16_16x16x32_f16", F16_F16_SWMMAC_w32, "_w32">; +defm V_SWMMAC_BF16_16X16X32_BF16_w32 : SWMMACInstGFX12<"v_swmmac_bf16_16x16x32_bf16", BF16_BF16_SWMMAC_w32, "_w32">; +defm V_SWMMAC_I32_16X16X32_IU8_w32 : SWMMACInstGFX12<"v_swmmac_i32_16x16x32_iu8", I32_IU8_SWMMAC_w32, "_w32">; +defm V_SWMMAC_I32_16X16X32_IU4_w32 : SWMMACInstGFX12<"v_swmmac_i32_16x16x32_iu4", I32_IU4X32_SWMMAC_w32, "_w32">; +defm V_SWMMAC_I32_16X16X64_IU4_w32 : SWMMACInstGFX12<"v_swmmac_i32_16x16x64_iu4", I32_IU4X64_SWMMAC_w32, "_w32">; +defm V_SWMMAC_F32_16X16X32_FP8_FP8_w32 : SWMMACInstGFX12<"v_swmmac_f32_16x16x32_fp8_fp8", F32_FP8BF8_SWMMAC_w32, "_w32">; +defm V_SWMMAC_F32_16X16X32_FP8_BF8_w32 : SWMMACInstGFX12<"v_swmmac_f32_16x16x32_fp8_bf8", F32_FP8BF8_SWMMAC_w32, "_w32">; +defm V_SWMMAC_F32_16X16X32_BF8_FP8_w32 : SWMMACInstGFX12<"v_swmmac_f32_16x16x32_bf8_fp8", F32_FP8BF8_SWMMAC_w32, "_w32">; +defm V_SWMMAC_F32_16X16X32_BF8_BF8_w32 : SWMMACInstGFX12<"v_swmmac_f32_16x16x32_bf8_bf8", F32_FP8BF8_SWMMAC_w32, "_w32">; +} + +let WaveSizePredicate = isWave64 in { +defm V_WMMA_F32_16X16X16_F16_w64 : WMMAInstGFX12<"v_wmma_f32_16x16x16_f16", F32_F16_WMMA_w64, "_w64">; +defm V_WMMA_F32_16X16X16_BF16_w64 : WMMAInstGFX12<"v_wmma_f32_16x16x16_bf16", F32_BF16_WMMA_w64, "_w64">; +defm V_WMMA_F16_16X16X16_F16_w64 : WMMAInstGFX12<"v_wmma_f16_16x16x16_f16", F16_F16_WMMA_w64, "_w64">; +defm V_WMMA_BF16_16X16X16_BF16_w64 : WMMAInstGFX12<"v_wmma_bf16_16x16x16_bf16", BF16_BF16_WMMA_w64, "_w64">; +defm V_WMMA_I32_16X16X16_IU8_w64 : WMMAInstGFX12<"v_wmma_i32_16x16x16_iu8", I32_IU8_WMMA_w64, "_w64">; +defm V_WMMA_I32_16X16X16_IU4_w64 : WMMAInstGFX12<"v_wmma_i32_16x16x16_iu4", I32_IU4X16_WMMA_w64, "_w64">; +defm V_WMMA_F32_16X16X16_FP8_FP8_w64 : WMMAInstGFX12<"v_wmma_f32_16x16x16_fp8_fp8", F32_FP8BF8_WMMA_w64, "_w64">; +defm V_WMMA_F32_16X16X16_FP8_BF8_w64 : WMMAInstGFX12<"v_wmma_f32_16x16x16_fp8_bf8", F32_FP8BF8_WMMA_w64, "_w64">; +defm V_WMMA_F32_16X16X16_BF8_FP8_w64 : WMMAInstGFX12<"v_wmma_f32_16x16x16_bf8_fp8", F32_FP8BF8_WMMA_w64, "_w64">; +defm V_WMMA_F32_16X16X16_BF8_BF8_w64 : WMMAInstGFX12<"v_wmma_f32_16x16x16_bf8_bf8", F32_FP8BF8_WMMA_w64, "_w64">; +defm V_WMMA_I32_16X16X32_IU4_w64 : WMMAInstGFX12<"v_wmma_i32_16x16x32_iu4", I32_IU4X32_WMMA_w64, "_w64">; + +defm V_SWMMAC_F32_16X16X32_F16_w64 : SWMMACInstGFX12<"v_swmmac_f32_16x16x32_f16", F32_F16_SWMMAC_w64, "_w64">; +defm V_SWMMAC_F32_16X16X32_BF16_w64 : SWMMACInstGFX12<"v_swmmac_f32_16x16x32_bf16", F32_BF16_SWMMAC_w64, "_w64">; +defm V_SWMMAC_F16_16X16X32_F16_w64 : SWMMACInstGFX12<"v_swmmac_f16_16x16x32_f16", F16_F16_SWMMAC_w64, "_w64">; +defm V_SWMMAC_BF16_16X16X32_BF16_w64 : SWMMACInstGFX12<"v_swmmac_bf16_16x16x32_bf16", BF16_BF16_SWMMAC_w64, "_w64">; +defm V_SWMMAC_I32_16X16X32_IU8_w64 : SWMMACInstGFX12<"v_swmmac_i32_16x16x32_iu8", I32_IU8_SWMMAC_w64, "_w64">; +defm V_SWMMAC_I32_16X16X32_IU4_w64 : SWMMACInstGFX12<"v_swmmac_i32_16x16x32_iu4", I32_IU4X32_SWMMAC_w64, "_w64">; +defm V_SWMMAC_I32_16X16X64_IU4_w64 : SWMMACInstGFX12<"v_swmmac_i32_16x16x64_iu4", I32_IU4X64_SWMMAC_w64, "_w64">; +defm V_SWMMAC_F32_16X16X32_FP8_FP8_w64 : SWMMACInstGFX12<"v_swmmac_f32_16x16x32_fp8_fp8", F32_FP8BF8_SWMMAC_w64, "_w64">; +defm V_SWMMAC_F32_16X16X32_FP8_BF8_w64 : SWMMACInstGFX12<"v_swmmac_f32_16x16x32_fp8_bf8", F32_FP8BF8_SWMMAC_w64, "_w64">; +defm V_SWMMAC_F32_16X16X32_BF8_FP8_w64 : SWMMACInstGFX12<"v_swmmac_f32_16x16x32_bf8_fp8", F32_FP8BF8_SWMMAC_w64, "_w64">; +defm V_SWMMAC_F32_16X16X32_BF8_BF8_w64 : SWMMACInstGFX12<"v_swmmac_f32_16x16x32_bf8_bf8", F32_FP8BF8_SWMMAC_w64, "_w64">; +} + +// IsGFX11OpselIntrinsic: f16_f16 and bf16_bf16 Intrinsics have imm operand that +// controls opsel. Used by gfx11, removed in gfx12 (operand must be 0). +multiclass WMMAPat<string Inst, SDPatternOperator node, VOP3PWMMA_Profile P, bit IsGFX11OpselIntrinsic = 0> { + def : GCNPat <(P.DstVT !setdagop(!con(P.WmmaInPat, !if(IsGFX11OpselIntrinsic, (ins 0), (ins))), node)), + (P.DstVT !setdagop(P.WmmaOutPat, !cast<Instruction>(Inst#"_twoaddr")))>; + let AddedComplexity = 4 in + def : GCNPat <(P.DstVT !setdagop(!con(P.WmmaInlineInPat, !if(IsGFX11OpselIntrinsic, (ins 0), (ins))), node)), + (P.DstVT !setdagop(P.WmmaInlineOutPat, !cast<Instruction>(Inst#"_threeaddr")))>; +} + +class SWMMACPat<Instruction Inst, SDPatternOperator node, VOP3PWMMA_Profile P> : + GCNPat <(P.DstVT !setdagop(P.SwmmacInPat, node)), + (P.DstVT !setdagop(P.SwmmacOutPat, Inst))>; + +class SWMMACPat_w64<Instruction Inst, SDPatternOperator node, VOP3PWMMA_Profile P> : + GCNPat <(P.DstVT !setdagop(P.SwmmacInPat, node)), + (P.DstVT !setdagop(P.SwmmacOutPat, Inst))>{ + let WaveSizePredicate = isWave64; + } + +let WaveSizePredicate = isWave32, SubtargetPredicate = isGFX12Plus in { + defm : WMMAPat<"V_WMMA_F32_16X16X16_F16_w32", int_amdgcn_wmma_f32_16x16x16_f16, F32_F16_WMMA_w32>; + defm : WMMAPat<"V_WMMA_F32_16X16X16_BF16_w32", int_amdgcn_wmma_f32_16x16x16_bf16, F32_BF16_WMMA_w32>; + defm : WMMAPat<"V_WMMA_F16_16X16X16_F16_w32", int_amdgcn_wmma_f16_16x16x16_f16, F16_F16_WMMA_w32,1>; + defm : WMMAPat<"V_WMMA_BF16_16X16X16_BF16_w32", int_amdgcn_wmma_bf16_16x16x16_bf16, BF16_BF16_WMMA_w32,1>; + defm : WMMAPat<"V_WMMA_I32_16X16X16_IU8_w32", int_amdgcn_wmma_i32_16x16x16_iu8, I32_IU8_WMMA_w32>; + defm : WMMAPat<"V_WMMA_I32_16X16X16_IU4_w32", int_amdgcn_wmma_i32_16x16x16_iu4, I32_IU4X16_WMMA_w32>; + defm : WMMAPat<"V_WMMA_F32_16X16X16_FP8_FP8_w32", int_amdgcn_wmma_f32_16x16x16_fp8_fp8, F32_FP8BF8_WMMA_w32>; + defm : WMMAPat<"V_WMMA_F32_16X16X16_FP8_BF8_w32", int_amdgcn_wmma_f32_16x16x16_fp8_bf8, F32_FP8BF8_WMMA_w32>; + defm : WMMAPat<"V_WMMA_F32_16X16X16_BF8_FP8_w32", int_amdgcn_wmma_f32_16x16x16_bf8_fp8, F32_FP8BF8_WMMA_w32>; + defm : WMMAPat<"V_WMMA_F32_16X16X16_BF8_BF8_w32", int_amdgcn_wmma_f32_16x16x16_bf8_bf8, F32_FP8BF8_WMMA_w32>; + defm : WMMAPat<"V_WMMA_I32_16X16X32_IU4_w32", int_amdgcn_wmma_i32_16x16x32_iu4, I32_IU4X32_WMMA_w32>; + + def : SWMMACPat<V_SWMMAC_F32_16X16X32_F16_w32_twoaddr, int_amdgcn_swmmac_f32_16x16x32_f16, F32_F16_SWMMAC_w32>; + def : SWMMACPat<V_SWMMAC_F32_16X16X32_BF16_w32_twoaddr, int_amdgcn_swmmac_f32_16x16x32_bf16, F32_BF16_SWMMAC_w32>; + def : SWMMACPat<V_SWMMAC_F16_16X16X32_F16_w32_twoaddr, int_amdgcn_swmmac_f16_16x16x32_f16, F16_F16_SWMMAC_w32>; + def : SWMMACPat<V_SWMMAC_BF16_16X16X32_BF16_w32_twoaddr, int_amdgcn_swmmac_bf16_16x16x32_bf16, BF16_BF16_SWMMAC_w32>; + def : SWMMACPat<V_SWMMAC_I32_16X16X32_IU8_w32_twoaddr, int_amdgcn_swmmac_i32_16x16x32_iu8, I32_IU8_SWMMAC_w32>; + def : SWMMACPat<V_SWMMAC_I32_16X16X32_IU4_w32_twoaddr, int_amdgcn_swmmac_i32_16x16x32_iu4, I32_IU4X32_SWMMAC_w32>; + def : GCNPat <(I32_IU4X64_SWMMAC_w32.DstVT !setdagop(I32_IU4X64_SWMMAC_w32.SwmmacInPat, int_amdgcn_swmmac_i32_16x16x64_iu4)), + (I32_IU4X64_SWMMAC_w32.DstVT !setdagop(I32_IU4X64_SWMMAC_w32.SwmmacOutPat, V_SWMMAC_I32_16X16X64_IU4_w32_twoaddr))>; + def : SWMMACPat<V_SWMMAC_F32_16X16X32_FP8_FP8_w32_twoaddr, int_amdgcn_swmmac_f32_16x16x32_fp8_fp8, F32_FP8BF8_SWMMAC_w32>; + def : SWMMACPat<V_SWMMAC_F32_16X16X32_FP8_BF8_w32_twoaddr, int_amdgcn_swmmac_f32_16x16x32_fp8_bf8, F32_FP8BF8_SWMMAC_w32>; + def : SWMMACPat<V_SWMMAC_F32_16X16X32_BF8_FP8_w32_twoaddr, int_amdgcn_swmmac_f32_16x16x32_bf8_fp8, F32_FP8BF8_SWMMAC_w32>; + def : SWMMACPat<V_SWMMAC_F32_16X16X32_BF8_BF8_w32_twoaddr, int_amdgcn_swmmac_f32_16x16x32_bf8_bf8, F32_FP8BF8_SWMMAC_w32>; +} + +let WaveSizePredicate = isWave64, SubtargetPredicate = isGFX12Plus in { + defm : WMMAPat<"V_WMMA_F32_16X16X16_F16_w64", int_amdgcn_wmma_f32_16x16x16_f16, F32_F16_WMMA_w64>; + defm : WMMAPat<"V_WMMA_F32_16X16X16_BF16_w64", int_amdgcn_wmma_f32_16x16x16_bf16, F32_BF16_WMMA_w64>; + defm : WMMAPat<"V_WMMA_F16_16X16X16_F16_w64", int_amdgcn_wmma_f16_16x16x16_f16, F16_F16_WMMA_w64,1>; + defm : WMMAPat<"V_WMMA_BF16_16X16X16_BF16_w64", int_amdgcn_wmma_bf16_16x16x16_bf16, BF16_BF16_WMMA_w64,1>; + defm : WMMAPat<"V_WMMA_I32_16X16X16_IU8_w64", int_amdgcn_wmma_i32_16x16x16_iu8, I32_IU8_WMMA_w64>; + defm : WMMAPat<"V_WMMA_I32_16X16X16_IU4_w64", int_amdgcn_wmma_i32_16x16x16_iu4, I32_IU4X16_WMMA_w64>; + defm : WMMAPat<"V_WMMA_F32_16X16X16_FP8_FP8_w64", int_amdgcn_wmma_f32_16x16x16_fp8_fp8, F32_FP8BF8_WMMA_w64>; + defm : WMMAPat<"V_WMMA_F32_16X16X16_FP8_BF8_w64", int_amdgcn_wmma_f32_16x16x16_fp8_bf8, F32_FP8BF8_WMMA_w64>; + defm : WMMAPat<"V_WMMA_F32_16X16X16_BF8_FP8_w64", int_amdgcn_wmma_f32_16x16x16_bf8_fp8, F32_FP8BF8_WMMA_w64>; + defm : WMMAPat<"V_WMMA_F32_16X16X16_BF8_BF8_w64", int_amdgcn_wmma_f32_16x16x16_bf8_bf8, F32_FP8BF8_WMMA_w64>; + defm : WMMAPat<"V_WMMA_I32_16X16X32_IU4_w64", int_amdgcn_wmma_i32_16x16x32_iu4, I32_IU4X32_WMMA_w64>; + + def : SWMMACPat<V_SWMMAC_F32_16X16X32_F16_w64_twoaddr, int_amdgcn_swmmac_f32_16x16x32_f16, F32_F16_SWMMAC_w64>; + def : SWMMACPat<V_SWMMAC_F32_16X16X32_BF16_w64_twoaddr, int_amdgcn_swmmac_f32_16x16x32_bf16, F32_BF16_SWMMAC_w64>; + def : SWMMACPat<V_SWMMAC_F16_16X16X32_F16_w64_twoaddr, int_amdgcn_swmmac_f16_16x16x32_f16, F16_F16_SWMMAC_w64>; + def : SWMMACPat<V_SWMMAC_BF16_16X16X32_BF16_w64_twoaddr, int_amdgcn_swmmac_bf16_16x16x32_bf16, BF16_BF16_SWMMAC_w64>; + def : SWMMACPat<V_SWMMAC_I32_16X16X32_IU8_w64_twoaddr, int_amdgcn_swmmac_i32_16x16x32_iu8, I32_IU8_SWMMAC_w64>; + def : SWMMACPat<V_SWMMAC_I32_16X16X32_IU4_w64_twoaddr, int_amdgcn_swmmac_i32_16x16x32_iu4, I32_IU4X32_SWMMAC_w64>; + def : SWMMACPat<V_SWMMAC_I32_16X16X64_IU4_w64_twoaddr, int_amdgcn_swmmac_i32_16x16x64_iu4, I32_IU4X64_SWMMAC_w64>; + def : SWMMACPat<V_SWMMAC_F32_16X16X32_FP8_FP8_w64_twoaddr, int_amdgcn_swmmac_f32_16x16x32_fp8_fp8, F32_FP8BF8_SWMMAC_w64>; + def : SWMMACPat<V_SWMMAC_F32_16X16X32_FP8_BF8_w64_twoaddr, int_amdgcn_swmmac_f32_16x16x32_fp8_bf8, F32_FP8BF8_SWMMAC_w64>; + def : SWMMACPat<V_SWMMAC_F32_16X16X32_BF8_FP8_w64_twoaddr, int_amdgcn_swmmac_f32_16x16x32_bf8_fp8, F32_FP8BF8_SWMMAC_w64>; + def : SWMMACPat<V_SWMMAC_F32_16X16X32_BF8_BF8_w64_twoaddr, int_amdgcn_swmmac_f32_16x16x32_bf8_bf8, F32_FP8BF8_SWMMAC_w64>; +} + + //===----------------------------------------------------------------------===// // Begin Real Encodings //===----------------------------------------------------------------------===// @@ -1005,6 +1400,99 @@ multiclass VOP3P_Real_Base<GFXGen Gen, bits<7> op, string backing_ps_name = NAME VOP3Pe_gfx11_gfx12<op, !cast<VOP3P_Pseudo>(backing_ps_name).Pfl>; } +class VOP3PeWmma<bits<7> op, VOPProfile P, VOP3PWMMA_Profile WMMAP> + : VOP3Pe_gfx11_gfx12<op, P>{ + // opsel + let Inst{11} = !cond(!eq(WMMAP.IndexType, 0) : 0, + !eq(WMMAP.IndexType, 8) : index_key_8bit{0}, + !eq(WMMAP.IndexType, 16) : index_key_16bit{0}); + let Inst{12} = !if(!eq(WMMAP.IndexType, 8), index_key_8bit{1}, 0); + let Inst{13} = 0; + // opsel_hi + let Inst{59} = 1; + let Inst{60} = 1; + let Inst{14} = 1; + // neg_lo + let Inst{61} = !if(WMMAP.NegLo01, src0_modifiers{0}, 0); + let Inst{62} = !if(WMMAP.NegLo01, src1_modifiers{0}, 0); + let Inst{63} = !if(WMMAP.NegLo2, src2_modifiers{0}, 0); + // neg_hi + let Inst{8} = !if(WMMAP.NegHi01, src0_modifiers{1}, 0); + let Inst{9} = !if(WMMAP.NegHi01, src1_modifiers{1}, 0); + let Inst{10} = !if(WMMAP.NegHi2, src2_modifiers{1}, 0); + // clamp + let Inst{15} = !if(WMMAP.IsIU, clamp{0}, 0); +} + +multiclass VOP3P_WMMA_Real_Base<GFXGen Gen, bits<7> op, VOP3PWMMA_Profile WMMAP, + string backing_ps_name = NAME, + string asmName = !cast<VOP3P_Pseudo>(NAME).Mnemonic> { + def Gen.Suffix : + VOP3P_Real_Gen<!cast<VOP3P_Pseudo>(backing_ps_name), Gen, asmName>, + VOP3PeWmma<op, !cast<VOP3P_Pseudo>(backing_ps_name).Pfl, WMMAP>; +} + +multiclass VOP3P_Real_WMMA_gfx12 <bits<7> op, VOP3PWMMA_Profile WMMAP> { + let WaveSizePredicate = isWave32, DecoderNamespace = "GFX12" in { + defm _twoaddr : VOP3P_WMMA_Real_Base <GFX12Gen, op, WMMAP>; + } +} + +multiclass VOP3P_Real_WMMA_gfx12w64 <bits<7> op, VOP3PWMMA_Profile WMMAP> { + let WaveSizePredicate = isWave64, DecoderNamespace = "WMMAGFX12" in { + defm _twoaddr : VOP3P_WMMA_Real_Base <GFX12Gen, op, WMMAP>; + } +} + +defm V_WMMA_F32_16X16X16_F16_w32 : VOP3P_Real_WMMA_gfx12 <0x040, F32_F16_WMMA_w32>; +defm V_WMMA_F32_16X16X16_BF16_w32 : VOP3P_Real_WMMA_gfx12 <0x041, F32_BF16_WMMA_w32>; +defm V_WMMA_F16_16X16X16_F16_w32 : VOP3P_Real_WMMA_gfx12 <0x042, F16_F16_WMMA_w32>; +defm V_WMMA_BF16_16X16X16_BF16_w32 : VOP3P_Real_WMMA_gfx12 <0x043, BF16_BF16_WMMA_w32>; +defm V_WMMA_I32_16X16X16_IU8_w32 : VOP3P_Real_WMMA_gfx12 <0x044, I32_IU8_WMMA_w32>; +defm V_WMMA_I32_16X16X16_IU4_w32 : VOP3P_Real_WMMA_gfx12 <0x045, I32_IU4X16_WMMA_w32>; +defm V_WMMA_F32_16X16X16_FP8_FP8_w32 : VOP3P_Real_WMMA_gfx12 <0x046, F32_FP8BF8_WMMA_w32>; +defm V_WMMA_F32_16X16X16_FP8_BF8_w32 : VOP3P_Real_WMMA_gfx12 <0x047, F32_FP8BF8_WMMA_w32>; +defm V_WMMA_F32_16X16X16_BF8_FP8_w32 : VOP3P_Real_WMMA_gfx12 <0x048, F32_FP8BF8_WMMA_w32>; +defm V_WMMA_F32_16X16X16_BF8_BF8_w32 : VOP3P_Real_WMMA_gfx12 <0x049, F32_FP8BF8_WMMA_w32>; +defm V_WMMA_I32_16X16X32_IU4_w32 : VOP3P_Real_WMMA_gfx12 <0x04a, I32_IU4X32_WMMA_w32>; + +defm V_WMMA_F32_16X16X16_F16_w64 : VOP3P_Real_WMMA_gfx12w64 <0x040, F32_F16_WMMA_w64>; +defm V_WMMA_F32_16X16X16_BF16_w64 : VOP3P_Real_WMMA_gfx12w64 <0x041, F32_BF16_WMMA_w64>; +defm V_WMMA_F16_16X16X16_F16_w64 : VOP3P_Real_WMMA_gfx12w64 <0x042, F16_F16_WMMA_w64>; +defm V_WMMA_BF16_16X16X16_BF16_w64 : VOP3P_Real_WMMA_gfx12w64 <0x043, BF16_BF16_WMMA_w64>; +defm V_WMMA_I32_16X16X16_IU8_w64 : VOP3P_Real_WMMA_gfx12w64 <0x044, I32_IU8_WMMA_w64>; +defm V_WMMA_I32_16X16X16_IU4_w64 : VOP3P_Real_WMMA_gfx12w64 <0x045, I32_IU4X16_WMMA_w64>; +defm V_WMMA_F32_16X16X16_FP8_FP8_w64 : VOP3P_Real_WMMA_gfx12w64 <0x046, F32_FP8BF8_WMMA_w64>; +defm V_WMMA_F32_16X16X16_FP8_BF8_w64 : VOP3P_Real_WMMA_gfx12w64 <0x047, F32_FP8BF8_WMMA_w64>; +defm V_WMMA_F32_16X16X16_BF8_FP8_w64 : VOP3P_Real_WMMA_gfx12w64 <0x048, F32_FP8BF8_WMMA_w64>; +defm V_WMMA_F32_16X16X16_BF8_BF8_w64 : VOP3P_Real_WMMA_gfx12w64 <0x049, F32_FP8BF8_WMMA_w64>; +defm V_WMMA_I32_16X16X32_IU4_w64 : VOP3P_Real_WMMA_gfx12w64 <0x04a, I32_IU4X32_WMMA_w64>; + + +defm V_SWMMAC_F32_16X16X32_F16_w32 : VOP3P_Real_WMMA_gfx12 <0x050, F32_F16_SWMMAC_w32>; +defm V_SWMMAC_F32_16X16X32_BF16_w32 : VOP3P_Real_WMMA_gfx12 <0x051, F32_BF16_SWMMAC_w32>; +defm V_SWMMAC_F16_16X16X32_F16_w32 : VOP3P_Real_WMMA_gfx12 <0x052, F16_F16_SWMMAC_w32>; +defm V_SWMMAC_BF16_16X16X32_BF16_w32 : VOP3P_Real_WMMA_gfx12 <0x053, BF16_BF16_SWMMAC_w32>; +defm V_SWMMAC_I32_16X16X32_IU8_w32 : VOP3P_Real_WMMA_gfx12 <0x054, I32_IU8_SWMMAC_w32>; +defm V_SWMMAC_I32_16X16X32_IU4_w32 : VOP3P_Real_WMMA_gfx12 <0x055, I32_IU4X32_SWMMAC_w32>; +defm V_SWMMAC_I32_16X16X64_IU4_w32 : VOP3P_Real_WMMA_gfx12 <0x056, I32_IU4X64_SWMMAC_w32>; +defm V_SWMMAC_F32_16X16X32_FP8_FP8_w32 : VOP3P_Real_WMMA_gfx12 <0x057, F32_FP8BF8_SWMMAC_w32>; +defm V_SWMMAC_F32_16X16X32_FP8_BF8_w32 : VOP3P_Real_WMMA_gfx12 <0x058, F32_FP8BF8_SWMMAC_w32>; +defm V_SWMMAC_F32_16X16X32_BF8_FP8_w32 : VOP3P_Real_WMMA_gfx12 <0x059, F32_FP8BF8_SWMMAC_w32>; +defm V_SWMMAC_F32_16X16X32_BF8_BF8_w32 : VOP3P_Real_WMMA_gfx12 <0x05a, F32_FP8BF8_SWMMAC_w32>; + +defm V_SWMMAC_F32_16X16X32_F16_w64 : VOP3P_Real_WMMA_gfx12w64 <0x050, F32_F16_SWMMAC_w64>; +defm V_SWMMAC_F32_16X16X32_BF16_w64 : VOP3P_Real_WMMA_gfx12w64 <0x051, F32_BF16_SWMMAC_w64>; +defm V_SWMMAC_F16_16X16X32_F16_w64 : VOP3P_Real_WMMA_gfx12w64 <0x052, F16_F16_SWMMAC_w64>; +defm V_SWMMAC_BF16_16X16X32_BF16_w64 : VOP3P_Real_WMMA_gfx12w64 <0x053, BF16_BF16_SWMMAC_w64>; +defm V_SWMMAC_I32_16X16X32_IU8_w64 : VOP3P_Real_WMMA_gfx12w64 <0x054, I32_IU8_SWMMAC_w64>; +defm V_SWMMAC_I32_16X16X32_IU4_w64 : VOP3P_Real_WMMA_gfx12w64 <0x055, I32_IU4X32_SWMMAC_w64>; +defm V_SWMMAC_I32_16X16X64_IU4_w64 : VOP3P_Real_WMMA_gfx12w64 <0x056, I32_IU4X64_SWMMAC_w64>; +defm V_SWMMAC_F32_16X16X32_FP8_FP8_w64 : VOP3P_Real_WMMA_gfx12w64 <0x057, F32_FP8BF8_SWMMAC_w64>; +defm V_SWMMAC_F32_16X16X32_FP8_BF8_w64 : VOP3P_Real_WMMA_gfx12w64 <0x058, F32_FP8BF8_SWMMAC_w64>; +defm V_SWMMAC_F32_16X16X32_BF8_FP8_w64 : VOP3P_Real_WMMA_gfx12w64 <0x059, F32_FP8BF8_SWMMAC_w64>; +defm V_SWMMAC_F32_16X16X32_BF8_BF8_w64 : VOP3P_Real_WMMA_gfx12w64 <0x05a, F32_FP8BF8_SWMMAC_w64>; + multiclass VOP3P_Real_with_name<GFXGen Gen, bits<7> op, string backing_ps_name = NAME, string asmName = !cast<VOP3P_Pseudo>(NAME).Mnemonic> { diff --git a/llvm/lib/Target/AMDGPU/VOPInstructions.td b/llvm/lib/Target/AMDGPU/VOPInstructions.td index df505c3365cb..20d7c88fb7e5 100644 --- a/llvm/lib/Target/AMDGPU/VOPInstructions.td +++ b/llvm/lib/Target/AMDGPU/VOPInstructions.td @@ -124,6 +124,7 @@ class VOP3_Pseudo <string opName, VOPProfile P, list<dag> pattern = [], let IsPacked = P.IsPacked; let IsMAI = P.IsMAI; let IsWMMA = P.IsWMMA; + let IsSWMMAC = P.IsSWMMAC; let AsmOperands = !if(isVop3OpSel, P.AsmVOP3OpSel, @@ -305,6 +306,11 @@ class VOP3OpSel_gfx10<bits<10> op, VOPProfile p> : VOP3e_gfx10<op, p> { class VOP3OpSel_gfx11_gfx12<bits<10> op, VOPProfile p> : VOP3OpSel_gfx10<op, p>; +class VOP3FP8OpSel_gfx11_gfx12<bits<10> op, VOPProfile p> : VOP3e_gfx10<op, p> { + let Inst{11} = !if(p.HasSrc0, src0_modifiers{2}, 0); + let Inst{12} = !if(p.HasSrc0, src0_modifiers{3}, 0); +} + class VOP3DotOpSel_gfx11_gfx12<bits<10> op, VOPProfile p> : VOP3OpSel_gfx11_gfx12<op, p>{ let Inst{11} = ?; let Inst{12} = ?; @@ -378,6 +384,8 @@ class VOP3Pe <bits<7> op, VOPProfile P> : Enc64 { bits<4> src2_modifiers; bits<9> src2; bits<1> clamp; + bits<2> index_key_8bit; + bits<1> index_key_16bit; let Inst{7-0} = vdst; let Inst{8} = !if(P.HasSrc0Mods, src0_modifiers{1}, 0); // neg_hi src0 @@ -738,7 +746,7 @@ class VOP3_DPPe_Common_Base<bits<10> op, VOPProfile P> : Enc96 { let Inst{10} = !if(P.HasSrc2Mods, src2_modifiers{1}, 0); // OPSEL must be set such that the low result only uses low inputs, and the high result only uses high inputs. let Inst{11} = !if(P.HasOpSel,!if(P.HasSrc0Mods, src0_modifiers{2}, 0),?); - let Inst{12} = !if(P.HasOpSel,!if(P.HasSrc1Mods, src1_modifiers{2}, 0),?); + let Inst{12} = !if(P.HasOpSel,!if(P.HasSrc1Mods, src1_modifiers{2}, !if((P.IsFP8), src0_modifiers{3}, 0)), ?); let Inst{13} = !if(P.HasOpSel,!if(P.HasSrc2Mods, src2_modifiers{2}, 0),?); let Inst{14} = !if(P.HasOpSel,!if(P.HasSrc0Mods, src0_modifiers{3}, 0),?); let Inst{15} = !if(P.HasClamp, clamp, 0); @@ -1406,14 +1414,20 @@ multiclass VOP3_Real_with_name<GFXGen Gen, bits<10> op, string opName, defvar ps = !cast<VOP_Pseudo>(opName#"_e64"); let AsmString = asmName # ps.AsmOperands, IsSingle = !or(isSingle, ps.Pfl.IsSingle) in { - if ps.Pfl.HasOpSel then - def _e64#Gen.Suffix : - VOP3_Real_Gen<ps, Gen>, - VOP3OpSel_gfx11_gfx12<op, ps.Pfl>; - if !not(ps.Pfl.HasOpSel) then - def _e64#Gen.Suffix : - VOP3_Real_Gen<ps, Gen>, - VOP3e_gfx11_gfx12<op, ps.Pfl>; + if ps.Pfl.IsFP8 then { + def _e64#Gen.Suffix : + VOP3_Real_Gen<ps, Gen>, + VOP3FP8OpSel_gfx11_gfx12<op, ps.Pfl>; + } else { + if ps.Pfl.HasOpSel then + def _e64#Gen.Suffix : + VOP3_Real_Gen<ps, Gen>, + VOP3OpSel_gfx11_gfx12<op, ps.Pfl>; + if !not(ps.Pfl.HasOpSel) then + def _e64#Gen.Suffix : + VOP3_Real_Gen<ps, Gen>, + VOP3e_gfx11_gfx12<op, ps.Pfl>; + } } def Gen.Suffix#"_VOP3_alias" : MnemonicAlias<ps.Mnemonic, asmName>, Requires<[Gen.AssemblerPredicate]>, LetDummies; } diff --git a/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp b/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp index 5c1c7046fdbf..8629551152cb 100644 --- a/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp +++ b/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp @@ -1806,12 +1806,13 @@ void ARMLowOverheadLoops::Expand(LowOverheadLoop &LoLoop) { PostOrderLoopTraversal DFS(LoLoop.ML, *MLI); DFS.ProcessLoop(); const SmallVectorImpl<MachineBasicBlock*> &PostOrder = DFS.getOrder(); - for (auto *MBB : PostOrder) { - recomputeLiveIns(*MBB); - // FIXME: For some reason, the live-in print order is non-deterministic for - // our tests and I can't out why... So just sort them. - MBB->sortUniqueLiveIns(); - } + bool anyChange = false; + do { + anyChange = false; + for (auto *MBB : PostOrder) { + anyChange = recomputeLiveIns(*MBB) || anyChange; + } + } while (anyChange); for (auto *MBB : reverse(PostOrder)) recomputeLivenessFlags(*MBB); diff --git a/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.cpp b/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.cpp index 04349aa52b54..d47dded9ea6e 100644 --- a/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.cpp @@ -21,17 +21,20 @@ using namespace llvm; TypeSize LoongArchTTIImpl::getRegisterBitWidth( TargetTransformInfo::RegisterKind K) const { + TypeSize DefSize = TargetTransformInfoImplBase::getRegisterBitWidth(K); switch (K) { case TargetTransformInfo::RGK_Scalar: return TypeSize::getFixed(ST->is64Bit() ? 64 : 32); case TargetTransformInfo::RGK_FixedWidthVector: - if (ST->hasExtLASX() && ST->hasExpAutoVec()) + if (!ST->hasExpAutoVec()) + return DefSize; + if (ST->hasExtLASX()) return TypeSize::getFixed(256); - if (ST->hasExtLSX() && ST->hasExpAutoVec()) + if (ST->hasExtLSX()) return TypeSize::getFixed(128); - return TypeSize::getFixed(0); + [[fallthrough]]; case TargetTransformInfo::RGK_ScalableVector: - return TypeSize::getScalable(0); + return DefSize; } llvm_unreachable("Unsupported register kind"); diff --git a/llvm/lib/Target/Mips/MipsAsmPrinter.cpp b/llvm/lib/Target/Mips/MipsAsmPrinter.cpp index 718844bc36ff..66b2b0de8d52 100644 --- a/llvm/lib/Target/Mips/MipsAsmPrinter.cpp +++ b/llvm/lib/Target/Mips/MipsAsmPrinter.cpp @@ -471,45 +471,6 @@ void MipsAsmPrinter::emitBasicBlockEnd(const MachineBasicBlock &MBB) { TS.emitDirectiveInsn(); } -/// isBlockOnlyReachableByFallthough - Return true if the basic block has -/// exactly one predecessor and the control transfer mechanism between -/// the predecessor and this block is a fall-through. -bool MipsAsmPrinter::isBlockOnlyReachableByFallthrough(const MachineBasicBlock* - MBB) const { - // The predecessor has to be immediately before this block. - const MachineBasicBlock *Pred = *MBB->pred_begin(); - - // If the predecessor is a switch statement, assume a jump table - // implementation, so it is not a fall through. - if (const BasicBlock *bb = Pred->getBasicBlock()) - if (isa<SwitchInst>(bb->getTerminator())) - return false; - - // If this is a landing pad, it isn't a fall through. If it has no preds, - // then nothing falls through to it. - if (MBB->isEHPad() || MBB->pred_empty()) - return false; - - // If there isn't exactly one predecessor, it can't be a fall through. - if (MBB->pred_size() != 1) - return false; - - // The predecessor has to be immediately before this block. - if (!Pred->isLayoutSuccessor(MBB)) - return false; - - // If the block is completely empty, then it definitely does fall through. - if (Pred->empty()) - return true; - - // Otherwise, check the last instruction. - // Check if the last terminator is an unconditional branch. - MachineBasicBlock::const_iterator I = Pred->end(); - while (I != Pred->begin() && !(--I)->isTerminator()) ; - - return !I->isBarrier(); -} - // Print out an operand for an inline asm expression. bool MipsAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum, const char *ExtraCode, raw_ostream &O) { diff --git a/llvm/lib/Target/Mips/MipsAsmPrinter.h b/llvm/lib/Target/Mips/MipsAsmPrinter.h index 64424b181504..0b55089385d7 100644 --- a/llvm/lib/Target/Mips/MipsAsmPrinter.h +++ b/llvm/lib/Target/Mips/MipsAsmPrinter.h @@ -142,8 +142,6 @@ public: void emitFunctionBodyStart() override; void emitFunctionBodyEnd() override; void emitBasicBlockEnd(const MachineBasicBlock &MBB) override; - bool isBlockOnlyReachableByFallthrough( - const MachineBasicBlock* MBB) const override; bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, const char *ExtraCode, raw_ostream &O) override; bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNum, diff --git a/llvm/lib/Target/PowerPC/PPCExpandAtomicPseudoInsts.cpp b/llvm/lib/Target/PowerPC/PPCExpandAtomicPseudoInsts.cpp index aee57a5075ff..b43eee8fdd8c 100644 --- a/llvm/lib/Target/PowerPC/PPCExpandAtomicPseudoInsts.cpp +++ b/llvm/lib/Target/PowerPC/PPCExpandAtomicPseudoInsts.cpp @@ -208,8 +208,10 @@ bool PPCExpandAtomicPseudo::expandAtomicRMW128( .addMBB(LoopMBB); CurrentMBB->addSuccessor(LoopMBB); CurrentMBB->addSuccessor(ExitMBB); - recomputeLiveIns(*LoopMBB); - recomputeLiveIns(*ExitMBB); + bool anyChange = false; + do { + anyChange = recomputeLiveIns(*ExitMBB) || recomputeLiveIns(*LoopMBB); + } while (anyChange); NMBBI = MBB.end(); MI.eraseFromParent(); return true; @@ -286,9 +288,11 @@ bool PPCExpandAtomicPseudo::expandAtomicCmpSwap128( CurrentMBB->addSuccessor(LoopCmpMBB); CurrentMBB->addSuccessor(ExitMBB); - recomputeLiveIns(*LoopCmpMBB); - recomputeLiveIns(*CmpSuccMBB); - recomputeLiveIns(*ExitMBB); + bool anyChange = false; + do { + anyChange = recomputeLiveIns(*ExitMBB) || recomputeLiveIns(*CmpSuccMBB) || + recomputeLiveIns(*LoopCmpMBB); + } while (anyChange); NMBBI = MBB.end(); MI.eraseFromParent(); return true; diff --git a/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp b/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp index 245e78641ed6..6792842f8550 100644 --- a/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp @@ -1441,8 +1441,11 @@ void PPCFrameLowering::inlineStackProbe(MachineFunction &MF, ProbeLoopBodyMBB->addSuccessor(ProbeLoopBodyMBB); } // Update liveins. - recomputeLiveIns(*ProbeLoopBodyMBB); - recomputeLiveIns(*ProbeExitMBB); + bool anyChange = false; + do { + anyChange = recomputeLiveIns(*ProbeExitMBB) || + recomputeLiveIns(*ProbeLoopBodyMBB); + } while (anyChange); return ProbeExitMBB; }; // For case HasBP && MaxAlign > 1, we have to realign the SP by performing @@ -1534,8 +1537,10 @@ void PPCFrameLowering::inlineStackProbe(MachineFunction &MF, buildDefCFAReg(*ExitMBB, ExitMBB->begin(), SPReg); } // Update liveins. - recomputeLiveIns(*LoopMBB); - recomputeLiveIns(*ExitMBB); + bool anyChange = false; + do { + anyChange = recomputeLiveIns(*ExitMBB) || recomputeLiveIns(*LoopMBB); + } while (anyChange); } } ++NumPrologProbed; diff --git a/llvm/lib/Target/RISCV/RISCV.td b/llvm/lib/Target/RISCV/RISCV.td index e6e879282241..27d52c16a4f3 100644 --- a/llvm/lib/Target/RISCV/RISCV.td +++ b/llvm/lib/Target/RISCV/RISCV.td @@ -31,6 +31,12 @@ include "RISCVInstrInfo.td" include "GISel/RISCVRegisterBanks.td" //===----------------------------------------------------------------------===// +// RISC-V macro fusions. +//===----------------------------------------------------------------------===// + +include "RISCVMacroFusion.td" + +//===----------------------------------------------------------------------===// // RISC-V Scheduling Models //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/RISCV/RISCVFeatures.td b/llvm/lib/Target/RISCV/RISCVFeatures.td index 3878be680c04..26451c80f57b 100644 --- a/llvm/lib/Target/RISCV/RISCVFeatures.td +++ b/llvm/lib/Target/RISCV/RISCVFeatures.td @@ -72,7 +72,7 @@ def FeatureStdExtZicntr [FeatureStdExtZicsr]>; def FeatureStdExtZicond - : SubtargetFeature<"experimental-zicond", "HasStdExtZicond", "true", + : SubtargetFeature<"zicond", "HasStdExtZicond", "true", "'Zicond' (Integer Conditional Operations)">; def HasStdExtZicond : Predicate<"Subtarget->hasStdExtZicond()">, AssemblerPredicate<(all_of FeatureStdExtZicond), @@ -1044,30 +1044,6 @@ def TuneDLenFactor2 : SubtargetFeature<"dlen-factor-2", "DLenFactor2", "true", "Vector unit DLEN(data path width) is half of VLEN">; -def TuneLUIADDIFusion - : SubtargetFeature<"lui-addi-fusion", "HasLUIADDIFusion", - "true", "Enable LUI+ADDI macrofusion">; - -def TuneAUIPCADDIFusion - : SubtargetFeature<"auipc-addi-fusion", "HasAUIPCADDIFusion", - "true", "Enable AUIPC+ADDI macrofusion">; - -def TuneZExtHFusion - : SubtargetFeature<"zexth-fusion", "HasZExtHFusion", - "true", "Enable SLLI+SRLI to be fused to zero extension of halfword">; - -def TuneZExtWFusion - : SubtargetFeature<"zextw-fusion", "HasZExtWFusion", - "true", "Enable SLLI+SRLI to be fused to zero extension of word">; - -def TuneShiftedZExtWFusion - : SubtargetFeature<"shifted-zextw-fusion", "HasShiftedZExtWFusion", - "true", "Enable SLLI+SRLI to be fused when computing (shifted) zero extension of word">; - -def TuneLDADDFusion - : SubtargetFeature<"ld-add-fusion", "HasLDADDFusion", - "true", "Enable LD+ADD macrofusion.">; - def TuneNoDefaultUnroll : SubtargetFeature<"no-default-unroll", "EnableDefaultUnroll", "false", "Disable default unroll preference.">; diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 47c6cd6e5487..7895d74f06d1 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -4718,7 +4718,7 @@ static SDValue lowerShuffleViaVRegSplitting(ShuffleVectorSDNode *SVN, if (SrcVecIdx == -1) continue; unsigned ExtractIdx = (SrcVecIdx % VRegsPerSrc) * NumOpElts; - SDValue SrcVec = (unsigned)SrcVecIdx > VRegsPerSrc ? V2 : V1; + SDValue SrcVec = (unsigned)SrcVecIdx >= VRegsPerSrc ? V2 : V1; SDValue SubVec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, M1VT, SrcVec, DAG.getVectorIdxConstant(ExtractIdx, DL)); SubVec = convertFromScalableVector(OneRegVT, SubVec, DAG, Subtarget); diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZicond.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZicond.td index 0790a941823b..35d3fdae0bd7 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoZicond.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZicond.td @@ -8,8 +8,6 @@ // // This file describes the RISC-V instructions from the standard Integer // Conditional operations extension (Zicond). -// This version is still experimental as the 'Zicond' extension hasn't been -// ratified yet. It is based on v1.0-rc1 of the specification. // //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/RISCV/RISCVMacroFusion.cpp b/llvm/lib/Target/RISCV/RISCVMacroFusion.cpp deleted file mode 100644 index f948f05b22f7..000000000000 --- a/llvm/lib/Target/RISCV/RISCVMacroFusion.cpp +++ /dev/null @@ -1,210 +0,0 @@ -//===- RISCVMacroFusion.cpp - RISC-V Macro Fusion -------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -/// \file This file contains the RISC-V implementation of the DAG scheduling -/// mutation to pair instructions back to back. -// -//===----------------------------------------------------------------------===// -// -#include "RISCVMacroFusion.h" -#include "RISCVSubtarget.h" -#include "llvm/CodeGen/MacroFusion.h" -#include "llvm/CodeGen/TargetInstrInfo.h" - -using namespace llvm; - -static bool checkRegisters(Register FirstDest, const MachineInstr &SecondMI) { - if (!SecondMI.getOperand(1).isReg()) - return false; - - if (SecondMI.getOperand(1).getReg() != FirstDest) - return false; - - // If the input is virtual make sure this is the only user. - if (FirstDest.isVirtual()) { - auto &MRI = SecondMI.getMF()->getRegInfo(); - return MRI.hasOneNonDBGUse(FirstDest); - } - - return SecondMI.getOperand(0).getReg() == FirstDest; -} - -// Fuse load with add: -// add rd, rs1, rs2 -// ld rd, 0(rd) -static bool isLDADD(const MachineInstr *FirstMI, const MachineInstr &SecondMI) { - if (SecondMI.getOpcode() != RISCV::LD) - return false; - - if (!SecondMI.getOperand(2).isImm()) - return false; - - if (SecondMI.getOperand(2).getImm() != 0) - return false; - - // Given SecondMI, when FirstMI is unspecified, we must return - // if SecondMI may be part of a fused pair at all. - if (!FirstMI) - return true; - - if (FirstMI->getOpcode() != RISCV::ADD) - return true; - - return checkRegisters(FirstMI->getOperand(0).getReg(), SecondMI); -} - -// Fuse zero extension of halfword: -// slli rd, rs1, 48 -// srli rd, rd, 48 -static bool isZExtH(const MachineInstr *FirstMI, const MachineInstr &SecondMI) { - if (SecondMI.getOpcode() != RISCV::SRLI) - return false; - - if (!SecondMI.getOperand(2).isImm()) - return false; - - if (SecondMI.getOperand(2).getImm() != 48) - return false; - - // Given SecondMI, when FirstMI is unspecified, we must return - // if SecondMI may be part of a fused pair at all. - if (!FirstMI) - return true; - - if (FirstMI->getOpcode() != RISCV::SLLI) - return false; - - if (FirstMI->getOperand(2).getImm() != 48) - return false; - - return checkRegisters(FirstMI->getOperand(0).getReg(), SecondMI); -} - -// Fuse zero extension of word: -// slli rd, rs1, 32 -// srli rd, rd, 32 -static bool isZExtW(const MachineInstr *FirstMI, const MachineInstr &SecondMI) { - if (SecondMI.getOpcode() != RISCV::SRLI) - return false; - - if (!SecondMI.getOperand(2).isImm()) - return false; - - if (SecondMI.getOperand(2).getImm() != 32) - return false; - - // Given SecondMI, when FirstMI is unspecified, we must return - // if SecondMI may be part of a fused pair at all. - if (!FirstMI) - return true; - - if (FirstMI->getOpcode() != RISCV::SLLI) - return false; - - if (FirstMI->getOperand(2).getImm() != 32) - return false; - - return checkRegisters(FirstMI->getOperand(0).getReg(), SecondMI); -} - -// Fuse shifted zero extension of word: -// slli rd, rs1, 32 -// srli rd, rd, x -// where 0 <= x < 32 -static bool isShiftedZExtW(const MachineInstr *FirstMI, - const MachineInstr &SecondMI) { - if (SecondMI.getOpcode() != RISCV::SRLI) - return false; - - if (!SecondMI.getOperand(2).isImm()) - return false; - - unsigned SRLIImm = SecondMI.getOperand(2).getImm(); - if (SRLIImm >= 32) - return false; - - // Given SecondMI, when FirstMI is unspecified, we must return - // if SecondMI may be part of a fused pair at all. - if (!FirstMI) - return true; - - if (FirstMI->getOpcode() != RISCV::SLLI) - return false; - - if (FirstMI->getOperand(2).getImm() != 32) - return false; - - return checkRegisters(FirstMI->getOperand(0).getReg(), SecondMI); -} - -// Fuse AUIPC followed by ADDI -// auipc rd, imm20 -// addi rd, rd, imm12 -static bool isAUIPCADDI(const MachineInstr *FirstMI, - const MachineInstr &SecondMI) { - if (SecondMI.getOpcode() != RISCV::ADDI) - return false; - // Assume the 1st instr to be a wildcard if it is unspecified. - if (!FirstMI) - return true; - - if (FirstMI->getOpcode() != RISCV::AUIPC) - return false; - - return checkRegisters(FirstMI->getOperand(0).getReg(), SecondMI); -} - -// Fuse LUI followed by ADDI or ADDIW. -// rd = imm[31:0] which decomposes to -// lui rd, imm[31:12] -// addi(w) rd, rd, imm[11:0] -static bool isLUIADDI(const MachineInstr *FirstMI, - const MachineInstr &SecondMI) { - if (SecondMI.getOpcode() != RISCV::ADDI && - SecondMI.getOpcode() != RISCV::ADDIW) - return false; - // Assume the 1st instr to be a wildcard if it is unspecified. - if (!FirstMI) - return true; - - if (FirstMI->getOpcode() != RISCV::LUI) - return false; - - return checkRegisters(FirstMI->getOperand(0).getReg(), SecondMI); -} - -static bool shouldScheduleAdjacent(const TargetInstrInfo &TII, - const TargetSubtargetInfo &TSI, - const MachineInstr *FirstMI, - const MachineInstr &SecondMI) { - const RISCVSubtarget &ST = static_cast<const RISCVSubtarget &>(TSI); - - if (ST.hasLUIADDIFusion() && isLUIADDI(FirstMI, SecondMI)) - return true; - - if (ST.hasAUIPCADDIFusion() && isAUIPCADDI(FirstMI, SecondMI)) - return true; - - if (ST.hasZExtHFusion() && isZExtH(FirstMI, SecondMI)) - return true; - - if (ST.hasZExtWFusion() && isZExtW(FirstMI, SecondMI)) - return true; - - if (ST.hasShiftedZExtWFusion() && isShiftedZExtW(FirstMI, SecondMI)) - return true; - - if (ST.hasLDADDFusion() && isLDADD(FirstMI, SecondMI)) - return true; - - return false; -} - -std::unique_ptr<ScheduleDAGMutation> llvm::createRISCVMacroFusionDAGMutation() { - return createMacroFusionDAGMutation(shouldScheduleAdjacent); -} diff --git a/llvm/lib/Target/RISCV/RISCVMacroFusion.h b/llvm/lib/Target/RISCV/RISCVMacroFusion.h deleted file mode 100644 index 7598db3f8fe1..000000000000 --- a/llvm/lib/Target/RISCV/RISCVMacroFusion.h +++ /dev/null @@ -1,28 +0,0 @@ -//===- RISCVMacroFusion.h - RISC-V Macro Fusion -----------------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -/// \file This file contains the RISC-V definition of the DAG scheduling -/// mutation to pair instructions back to back. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_LIB_TARGET_RISCV_RISCVMACROFUSION_H -#define LLVM_LIB_TARGET_RISCV_RISCVMACROFUSION_H - -#include "llvm/CodeGen/MachineScheduler.h" - -namespace llvm { - -/// Note that you have to add: -/// DAG.addMutation(createRISCVMacroFusionDAGMutation()); -/// to RISCVPassConfig::createMachineScheduler() to have an effect. -std::unique_ptr<ScheduleDAGMutation> createRISCVMacroFusionDAGMutation(); - -} // namespace llvm - -#endif diff --git a/llvm/lib/Target/RISCV/RISCVMacroFusion.td b/llvm/lib/Target/RISCV/RISCVMacroFusion.td new file mode 100644 index 000000000000..875a93d09a2c --- /dev/null +++ b/llvm/lib/Target/RISCV/RISCVMacroFusion.td @@ -0,0 +1,93 @@ +//==----- RISCVMacroFusion.td - Macro Fusion Definitions -----*- tablegen -*-=// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// ===---------------------------------------------------------------------===// +// The following definitions describe the macro fusion predicators. + +// Fuse LUI followed by ADDI or ADDIW: +// rd = imm[31:0] which decomposes to +// lui rd, imm[31:12] +// addi(w) rd, rd, imm[11:0] +def TuneLUIADDIFusion + : SimpleFusion<"lui-addi-fusion", "HasLUIADDIFusion", + "Enable LUI+ADDI macro fusion", + CheckOpcode<[LUI]>, + CheckOpcode<[ADDI, ADDIW]>>; + +// Fuse AUIPC followed by ADDI: +// auipc rd, imm20 +// addi rd, rd, imm12 +def TuneAUIPCADDIFusion + : SimpleFusion<"auipc-addi-fusion", "HasAUIPCADDIFusion", + "Enable AUIPC+ADDI macrofusion", + CheckOpcode<[AUIPC]>, + CheckOpcode<[ADDI]>>; + +// Fuse zero extension of halfword: +// slli rd, rs1, 48 +// srli rd, rd, 48 +def TuneZExtHFusion + : SimpleFusion<"zexth-fusion", "HasZExtHFusion", + "Enable SLLI+SRLI to be fused to zero extension of halfword", + CheckAll<[ + CheckOpcode<[SLLI]>, + CheckIsImmOperand<2>, + CheckImmOperand<2, 48> + ]>, + CheckAll<[ + CheckOpcode<[SRLI]>, + CheckIsImmOperand<2>, + CheckImmOperand<2, 48> + ]>>; + +// Fuse zero extension of word: +// slli rd, rs1, 32 +// srli rd, rd, 32 +def TuneZExtWFusion + : SimpleFusion<"zextw-fusion", "HasZExtWFusion", + "Enable SLLI+SRLI to be fused to zero extension of word", + CheckAll<[ + CheckOpcode<[SLLI]>, + CheckIsImmOperand<2>, + CheckImmOperand<2, 32> + ]>, + CheckAll<[ + CheckOpcode<[SRLI]>, + CheckIsImmOperand<2>, + CheckImmOperand<2, 32> + ]>>; + +// Fuse shifted zero extension of word: +// slli rd, rs1, 32 +// srli rd, rd, x +// where 0 <= x < 32 +def TuneShiftedZExtWFusion + : SimpleFusion<"shifted-zextw-fusion", "HasShiftedZExtWFusion", + "Enable SLLI+SRLI to be fused when computing (shifted) word zero extension", + CheckAll<[ + CheckOpcode<[SLLI]>, + CheckIsImmOperand<2>, + CheckImmOperand<2, 32> + ]>, + CheckAll<[ + CheckOpcode<[SRLI]>, + CheckIsImmOperand<2>, + CheckImmOperandRange<2, 0, 31> + ]>>; + +// Fuse load with add: +// add rd, rs1, rs2 +// ld rd, 0(rd) +def TuneLDADDFusion + : SimpleFusion<"ld-add-fusion", "HasLDADDFusion", "Enable LD+ADD macrofusion", + CheckOpcode<[ADD]>, + CheckAll<[ + CheckOpcode<[LD]>, + CheckIsImmOperand<2>, + CheckImmOperand<2, 0> + ]>>; diff --git a/llvm/lib/Target/RISCV/RISCVSubtarget.cpp b/llvm/lib/Target/RISCV/RISCVSubtarget.cpp index 7b64d3cee9c8..d3236bb07d56 100644 --- a/llvm/lib/Target/RISCV/RISCVSubtarget.cpp +++ b/llvm/lib/Target/RISCV/RISCVSubtarget.cpp @@ -16,8 +16,9 @@ #include "GISel/RISCVRegisterBankInfo.h" #include "RISCV.h" #include "RISCVFrameLowering.h" -#include "RISCVMacroFusion.h" #include "RISCVTargetMachine.h" +#include "llvm/CodeGen/MacroFusion.h" +#include "llvm/CodeGen/ScheduleDAGMutation.h" #include "llvm/MC/TargetRegistry.h" #include "llvm/Support/ErrorHandling.h" @@ -29,6 +30,9 @@ using namespace llvm; #define GET_SUBTARGETINFO_CTOR #include "RISCVGenSubtargetInfo.inc" +#define GET_RISCV_MACRO_FUSION_PRED_IMPL +#include "RISCVGenMacroFusion.inc" + namespace llvm::RISCVTuneInfoTable { #define GET_RISCVTuneInfoTable_IMPL @@ -187,7 +191,7 @@ bool RISCVSubtarget::enableSubRegLiveness() const { void RISCVSubtarget::getPostRAMutations( std::vector<std::unique_ptr<ScheduleDAGMutation>> &Mutations) const { - Mutations.push_back(createRISCVMacroFusionDAGMutation()); + Mutations.push_back(createMacroFusionDAGMutation(getMacroFusions())); } /// Enable use of alias analysis during code generation (during MI diff --git a/llvm/lib/Target/RISCV/RISCVSubtarget.h b/llvm/lib/Target/RISCV/RISCVSubtarget.h index 2ba93764facd..8c55efa69a6a 100644 --- a/llvm/lib/Target/RISCV/RISCVSubtarget.h +++ b/llvm/lib/Target/RISCV/RISCVSubtarget.h @@ -27,6 +27,9 @@ #include "llvm/Target/TargetMachine.h" #include <bitset> +#define GET_RISCV_MACRO_FUSION_PRED_DECL +#include "RISCVGenMacroFusion.inc" + #define GET_SUBTARGETINFO_HEADER #include "RISCVGenSubtargetInfo.inc" @@ -196,11 +199,6 @@ public: return UserReservedRegister[i]; } - bool hasMacroFusion() const { - return hasLUIADDIFusion() || hasAUIPCADDIFusion() || hasZExtHFusion() || - hasZExtWFusion() || hasShiftedZExtWFusion() || hasLDADDFusion(); - } - // Vector codegen related methods. bool hasVInstructions() const { return HasStdExtZve32x; } bool hasVInstructionsI64() const { return HasStdExtZve64x; } diff --git a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp index b4b81b545a54..2285c99d7901 100644 --- a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp +++ b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp @@ -14,7 +14,6 @@ #include "MCTargetDesc/RISCVBaseInfo.h" #include "RISCV.h" #include "RISCVMachineFunctionInfo.h" -#include "RISCVMacroFusion.h" #include "RISCVTargetObjectFile.h" #include "RISCVTargetTransformInfo.h" #include "TargetInfo/RISCVTargetInfo.h" @@ -26,6 +25,8 @@ #include "llvm/CodeGen/GlobalISel/RegBankSelect.h" #include "llvm/CodeGen/MIRParser/MIParser.h" #include "llvm/CodeGen/MIRYamlMapping.h" +#include "llvm/CodeGen/MachineScheduler.h" +#include "llvm/CodeGen/MacroFusion.h" #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/RegAllocRegistry.h" #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" @@ -361,9 +362,10 @@ public: DAG->addMutation(createLoadClusterDAGMutation( DAG->TII, DAG->TRI, /*ReorderWhileClustering=*/true)); } - if (ST.hasMacroFusion()) { + const auto &MacroFusions = ST.getMacroFusions(); + if (!MacroFusions.empty()) { DAG = DAG ? DAG : createGenericSchedLive(C); - DAG->addMutation(createRISCVMacroFusionDAGMutation()); + DAG->addMutation(createMacroFusionDAGMutation(MacroFusions)); } return DAG; } @@ -371,9 +373,10 @@ public: ScheduleDAGInstrs * createPostMachineScheduler(MachineSchedContext *C) const override { const RISCVSubtarget &ST = C->MF->getSubtarget<RISCVSubtarget>(); - if (ST.hasMacroFusion()) { + const auto &MacroFusions = ST.getMacroFusions(); + if (!MacroFusions.empty()) { ScheduleDAGMI *DAG = createGenericSchedPostRA(C); - DAG->addMutation(createRISCVMacroFusionDAGMutation()); + DAG->addMutation(createMacroFusionDAGMutation(MacroFusions)); return DAG; } return nullptr; diff --git a/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp b/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp index db19c8881c68..80c994a32ea9 100644 --- a/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp +++ b/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp @@ -840,8 +840,10 @@ void SystemZELFFrameLowering::inlineStackProbe( StackAllocMI->eraseFromParent(); if (DoneMBB != nullptr) { // Compute the live-in lists for the new blocks. - recomputeLiveIns(*DoneMBB); - recomputeLiveIns(*LoopMBB); + bool anyChange = false; + do { + anyChange = recomputeLiveIns(*DoneMBB) || recomputeLiveIns(*LoopMBB); + } while (anyChange); } } @@ -1439,8 +1441,10 @@ void SystemZXPLINKFrameLowering::inlineStackProbe( StackAllocMI->eraseFromParent(); // Compute the live-in lists for the new blocks. - recomputeLiveIns(*NextMBB); - recomputeLiveIns(*StackExtMBB); + bool anyChange = false; + do { + anyChange = recomputeLiveIns(*StackExtMBB) || recomputeLiveIns(*NextMBB); + } while (anyChange); } bool SystemZXPLINKFrameLowering::hasFP(const MachineFunction &MF) const { diff --git a/llvm/lib/Target/X86/X86AsmPrinter.cpp b/llvm/lib/Target/X86/X86AsmPrinter.cpp index 9f0fd4d0938e..87ec8aa23080 100644 --- a/llvm/lib/Target/X86/X86AsmPrinter.cpp +++ b/llvm/lib/Target/X86/X86AsmPrinter.cpp @@ -877,7 +877,6 @@ void X86AsmPrinter::emitStartOfAsmFile(Module &M) { OutStreamer->emitInt32(FeatureFlagsAnd); // data emitAlignment(WordSize == 4 ? Align(4) : Align(8)); // padding - OutStreamer->endSection(Nt); OutStreamer->switchSection(Cur); } } diff --git a/llvm/lib/Target/X86/X86FrameLowering.cpp b/llvm/lib/Target/X86/X86FrameLowering.cpp index c0d358ead278..c2f76a3b8abb 100644 --- a/llvm/lib/Target/X86/X86FrameLowering.cpp +++ b/llvm/lib/Target/X86/X86FrameLowering.cpp @@ -885,8 +885,10 @@ void X86FrameLowering::emitStackProbeInlineGenericLoop( } // Update Live In information - recomputeLiveIns(*testMBB); - recomputeLiveIns(*tailMBB); + bool anyChange = false; + do { + anyChange = recomputeLiveIns(*tailMBB) || recomputeLiveIns(*testMBB); + } while (anyChange); } void X86FrameLowering::emitStackProbeInlineWindowsCoreCLR64( @@ -1378,10 +1380,11 @@ void X86FrameLowering::BuildStackAlignAND(MachineBasicBlock &MBB, footMBB->addSuccessor(&MBB); } - recomputeLiveIns(*headMBB); - recomputeLiveIns(*bodyMBB); - recomputeLiveIns(*footMBB); - recomputeLiveIns(MBB); + bool anyChange = false; + do { + anyChange = recomputeLiveIns(*footMBB) || recomputeLiveIns(*bodyMBB) || + recomputeLiveIns(*headMBB) || recomputeLiveIns(MBB); + } while (anyChange); } } else { MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(AndOp), Reg) diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td index fe7d90fbcdf7..bb5e22c71427 100644 --- a/llvm/lib/Target/X86/X86InstrAVX512.td +++ b/llvm/lib/Target/X86/X86InstrAVX512.td @@ -12422,7 +12422,7 @@ multiclass GF2P8AFFINE_avx512_rmb_imm<bits<8> Op, string OpStr, SDNode OpNode, : avx512_3Op_rm_imm8<Op, OpStr, OpNode, sched, VTI, VTI> { let ExeDomain = VTI.ExeDomain in defm rmbi : AVX512_maskable<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst), - (ins VTI.RC:$src1, VTI.ScalarMemOp:$src2, u8imm:$src3), + (ins VTI.RC:$src1, BcstVTI.ScalarMemOp:$src2, u8imm:$src3), OpStr, "$src3, ${src2}"#BcstVTI.BroadcastStr#", $src1", "$src1, ${src2}"#BcstVTI.BroadcastStr#", $src3", (OpNode (VTI.VT VTI.RC:$src1), diff --git a/llvm/lib/Target/X86/X86InstrFoldTables.cpp b/llvm/lib/Target/X86/X86InstrFoldTables.cpp index c9d0f66c6e46..63136af2295f 100644 --- a/llvm/lib/Target/X86/X86InstrFoldTables.cpp +++ b/llvm/lib/Target/X86/X86InstrFoldTables.cpp @@ -291,12 +291,15 @@ struct X86BroadcastFoldTable { static bool matchBroadcastSize(const X86FoldTableEntry &Entry, unsigned BroadcastBits) { switch (Entry.Flags & TB_BCAST_MASK) { - case TB_BCAST_SD: - case TB_BCAST_Q: - return BroadcastBits == 64; - case TB_BCAST_SS: + case TB_BCAST_W: + case TB_BCAST_SH: + return BroadcastBits == 16; case TB_BCAST_D: + case TB_BCAST_SS: return BroadcastBits == 32; + case TB_BCAST_Q: + case TB_BCAST_SD: + return BroadcastBits == 64; } return false; } diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp index d6f9aa6d6ace..9ac1f783b7f0 100644 --- a/llvm/lib/Target/X86/X86InstrInfo.cpp +++ b/llvm/lib/Target/X86/X86InstrInfo.cpp @@ -2354,33 +2354,26 @@ MachineInstr *X86InstrInfo::commuteInstructionImpl(MachineInstr &MI, bool NewMI, case X86::VBLENDPSrri: // If we're optimizing for size, try to use MOVSD/MOVSS. if (MI.getParent()->getParent()->getFunction().hasOptSize()) { - unsigned Mask; - switch (Opc) { - default: - llvm_unreachable("Unreachable!"); - case X86::BLENDPDrri: - Opc = X86::MOVSDrr; - Mask = 0x03; - break; - case X86::BLENDPSrri: - Opc = X86::MOVSSrr; - Mask = 0x0F; - break; - case X86::VBLENDPDrri: - Opc = X86::VMOVSDrr; - Mask = 0x03; - break; - case X86::VBLENDPSrri: - Opc = X86::VMOVSSrr; - Mask = 0x0F; - break; - } + unsigned Mask = (Opc == X86::BLENDPDrri || Opc == X86::VBLENDPDrri) ? 0x03: 0x0F; if ((MI.getOperand(3).getImm() ^ Mask) == 1) { +#define FROM_TO(FROM, TO) \ + case X86::FROM: \ + Opc = X86::TO; \ + break; + switch (Opc) { + default: + llvm_unreachable("Unreachable!"); + FROM_TO(BLENDPDrri, MOVSDrr) + FROM_TO(BLENDPSrri, MOVSSrr) + FROM_TO(VBLENDPDrri, VMOVSDrr) + FROM_TO(VBLENDPSrri, VMOVSSrr) + } WorkingMI = CloneIfNew(MI); WorkingMI->setDesc(get(Opc)); WorkingMI->removeOperand(3); break; } +#undef FROM_TO } [[fallthrough]]; case X86::PBLENDWrri: diff --git a/llvm/lib/TargetParser/TargetParser.cpp b/llvm/lib/TargetParser/TargetParser.cpp index 3cbe974ff314..20f324604aa5 100644 --- a/llvm/lib/TargetParser/TargetParser.cpp +++ b/llvm/lib/TargetParser/TargetParser.cpp @@ -294,6 +294,7 @@ void AMDGPU::fillAMDGPUFeatureMap(StringRef GPU, const Triple &T, Features["gfx12-insts"] = true; Features["atomic-fadd-rtn-insts"] = true; Features["image-insts"] = true; + Features["fp8-conversion-insts"] = true; break; case GK_GFX1151: case GK_GFX1150: diff --git a/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp index bb2a77daa60a..1254a050027a 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp @@ -1032,7 +1032,8 @@ Instruction *InstCombinerImpl::visitLoadInst(LoadInst &LI) { // where there are several consecutive memory accesses to the same location, // separated by a few arithmetic operations. bool IsLoadCSE = false; - if (Value *AvailableVal = FindAvailableLoadedValue(&LI, *AA, &IsLoadCSE)) { + BatchAAResults BatchAA(*AA); + if (Value *AvailableVal = FindAvailableLoadedValue(&LI, BatchAA, &IsLoadCSE)) { if (IsLoadCSE) combineMetadataForCSE(cast<LoadInst>(AvailableVal), &LI, false); diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp index a8a5f9831e15..79873a9b4cbb 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp @@ -802,6 +802,9 @@ Value *InstCombinerImpl::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, return InsertNewInstWith(LShr, I->getIterator()); } else if (Known.One[BitWidth-ShiftAmt-1]) { // New bits are known one. Known.One |= HighBits; + // SignBits may be out-of-sync with Known.countMinSignBits(). Mask out + // high bits of Known.Zero to avoid conflicts. + Known.Zero &= ~HighBits; } } else { computeKnownBits(I, Known, Depth, CxtI); diff --git a/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp b/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp index 8f09569d0d9c..7b672e89b67a 100644 --- a/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp +++ b/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp @@ -1061,11 +1061,16 @@ void State::addInfoFor(BasicBlock &BB) { FactOrCheck::getCheck(DT.getNode(&BB), cast<CallInst>(&I))); break; // Enqueue the intrinsics to add extra info. - case Intrinsic::abs: case Intrinsic::umin: case Intrinsic::umax: case Intrinsic::smin: case Intrinsic::smax: + // TODO: Check if it is possible to instead only added the min/max facts + // when simplifying uses of the min/max intrinsics. + if (!isGuaranteedNotToBePoison(&I)) + break; + [[fallthrough]]; + case Intrinsic::abs: WorkList.push_back(FactOrCheck::getInstFact(DT.getNode(&BB), &I)); break; } diff --git a/llvm/lib/Transforms/Scalar/JumpThreading.cpp b/llvm/lib/Transforms/Scalar/JumpThreading.cpp index 8603c5cf9c02..87c01ead634f 100644 --- a/llvm/lib/Transforms/Scalar/JumpThreading.cpp +++ b/llvm/lib/Transforms/Scalar/JumpThreading.cpp @@ -1260,8 +1260,11 @@ bool JumpThreadingPass::simplifyPartiallyRedundantLoad(LoadInst *LoadI) { // the entry to its block. BasicBlock::iterator BBIt(LoadI); bool IsLoadCSE; + BatchAAResults BatchAA(*AA); + // The dominator tree is updated lazily and may not be valid at this point. + BatchAA.disableDominatorTree(); if (Value *AvailableVal = FindAvailableLoadedValue( - LoadI, LoadBB, BBIt, DefMaxInstsToScan, AA, &IsLoadCSE)) { + LoadI, LoadBB, BBIt, DefMaxInstsToScan, &BatchAA, &IsLoadCSE)) { // If the value of the load is locally available within the block, just use // it. This frequently occurs for reg2mem'd allocas. @@ -1322,9 +1325,9 @@ bool JumpThreadingPass::simplifyPartiallyRedundantLoad(LoadInst *LoadI) { MemoryLocation Loc(LoadedPtr->DoPHITranslation(LoadBB, PredBB), LocationSize::precise(DL.getTypeStoreSize(AccessTy)), AATags); - PredAvailable = findAvailablePtrLoadStore(Loc, AccessTy, LoadI->isAtomic(), - PredBB, BBIt, DefMaxInstsToScan, - AA, &IsLoadCSE, &NumScanedInst); + PredAvailable = findAvailablePtrLoadStore( + Loc, AccessTy, LoadI->isAtomic(), PredBB, BBIt, DefMaxInstsToScan, + &BatchAA, &IsLoadCSE, &NumScanedInst); // If PredBB has a single predecessor, continue scanning through the // single predecessor. @@ -1336,7 +1339,7 @@ bool JumpThreadingPass::simplifyPartiallyRedundantLoad(LoadInst *LoadI) { BBIt = SinglePredBB->end(); PredAvailable = findAvailablePtrLoadStore( Loc, AccessTy, LoadI->isAtomic(), SinglePredBB, BBIt, - (DefMaxInstsToScan - NumScanedInst), AA, &IsLoadCSE, + (DefMaxInstsToScan - NumScanedInst), &BatchAA, &IsLoadCSE, &NumScanedInst); } } diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 6ca93e15719f..dd596c567cd4 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -1957,6 +1957,8 @@ class GeneratedRTChecks { bool CostTooHigh = false; const bool AddBranchWeights; + Loop *OuterLoop = nullptr; + public: GeneratedRTChecks(ScalarEvolution &SE, DominatorTree *DT, LoopInfo *LI, TargetTransformInfo *TTI, const DataLayout &DL, @@ -2053,6 +2055,9 @@ public: DT->eraseNode(SCEVCheckBlock); LI->removeBlock(SCEVCheckBlock); } + + // Outer loop is used as part of the later cost calculations. + OuterLoop = L->getParentLoop(); } InstructionCost getCost() { @@ -2076,16 +2081,61 @@ public: LLVM_DEBUG(dbgs() << " " << C << " for " << I << "\n"); RTCheckCost += C; } - if (MemCheckBlock) + if (MemCheckBlock) { + InstructionCost MemCheckCost = 0; for (Instruction &I : *MemCheckBlock) { if (MemCheckBlock->getTerminator() == &I) continue; InstructionCost C = TTI->getInstructionCost(&I, TTI::TCK_RecipThroughput); LLVM_DEBUG(dbgs() << " " << C << " for " << I << "\n"); - RTCheckCost += C; + MemCheckCost += C; } + // If the runtime memory checks are being created inside an outer loop + // we should find out if these checks are outer loop invariant. If so, + // the checks will likely be hoisted out and so the effective cost will + // reduce according to the outer loop trip count. + if (OuterLoop) { + ScalarEvolution *SE = MemCheckExp.getSE(); + // TODO: If profitable, we could refine this further by analysing every + // individual memory check, since there could be a mixture of loop + // variant and invariant checks that mean the final condition is + // variant. + const SCEV *Cond = SE->getSCEV(MemRuntimeCheckCond); + if (SE->isLoopInvariant(Cond, OuterLoop)) { + // It seems reasonable to assume that we can reduce the effective + // cost of the checks even when we know nothing about the trip + // count. Assume that the outer loop executes at least twice. + unsigned BestTripCount = 2; + + // If exact trip count is known use that. + if (unsigned SmallTC = SE->getSmallConstantTripCount(OuterLoop)) + BestTripCount = SmallTC; + else if (LoopVectorizeWithBlockFrequency) { + // Else use profile data if available. + if (auto EstimatedTC = getLoopEstimatedTripCount(OuterLoop)) + BestTripCount = *EstimatedTC; + } + + InstructionCost NewMemCheckCost = MemCheckCost / BestTripCount; + + // Let's ensure the cost is always at least 1. + NewMemCheckCost = std::max(*NewMemCheckCost.getValue(), + (InstructionCost::CostType)1); + + LLVM_DEBUG(dbgs() + << "We expect runtime memory checks to be hoisted " + << "out of the outer loop. Cost reduced from " + << MemCheckCost << " to " << NewMemCheckCost << '\n'); + + MemCheckCost = NewMemCheckCost; + } + } + + RTCheckCost += MemCheckCost; + } + if (SCEVCheckBlock || MemCheckBlock) LLVM_DEBUG(dbgs() << "Total cost of runtime checks: " << RTCheckCost << "\n"); @@ -2144,8 +2194,8 @@ public: BranchInst::Create(LoopVectorPreHeader, SCEVCheckBlock); // Create new preheader for vector loop. - if (auto *PL = LI->getLoopFor(LoopVectorPreHeader)) - PL->addBasicBlockToLoop(SCEVCheckBlock, *LI); + if (OuterLoop) + OuterLoop->addBasicBlockToLoop(SCEVCheckBlock, *LI); SCEVCheckBlock->getTerminator()->eraseFromParent(); SCEVCheckBlock->moveBefore(LoopVectorPreHeader); @@ -2179,8 +2229,8 @@ public: DT->changeImmediateDominator(LoopVectorPreHeader, MemCheckBlock); MemCheckBlock->moveBefore(LoopVectorPreHeader); - if (auto *PL = LI->getLoopFor(LoopVectorPreHeader)) - PL->addBasicBlockToLoop(MemCheckBlock, *LI); + if (OuterLoop) + OuterLoop->addBasicBlockToLoop(MemCheckBlock, *LI); BranchInst &BI = *BranchInst::Create(Bypass, LoopVectorPreHeader, MemRuntimeCheckCond); diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index bbeb5da2cfec..ae2fc522ba40 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -597,13 +597,15 @@ void VPWidenCallRecipe::execute(VPTransformState &State) { for (const auto &I : enumerate(operands())) { // Some intrinsics have a scalar argument - don't replace it with a // vector. - // Some vectorized function variants may also take a scalar argument, - // e.g. linear parameters for pointers. Value *Arg; - if ((VFTy && !VFTy->getParamType(I.index())->isVectorTy()) || - (UseIntrinsic && - isVectorIntrinsicWithScalarOpAtArg(VectorIntrinsicID, I.index()))) + if (UseIntrinsic && + isVectorIntrinsicWithScalarOpAtArg(VectorIntrinsicID, I.index())) Arg = State.get(I.value(), VPIteration(0, 0)); + // Some vectorized function variants may also take a scalar argument, + // e.g. linear parameters for pointers. This needs to be the scalar value + // from the start of the respective part when interleaving. + else if (VFTy && !VFTy->getParamType(I.index())->isVectorTy()) + Arg = State.get(I.value(), VPIteration(Part, 0)); else Arg = State.get(I.value(), Part); if (UseIntrinsic && diff --git a/llvm/utils/TableGen/DAGISelMatcherEmitter.cpp b/llvm/utils/TableGen/DAGISelMatcherEmitter.cpp index 455183987b7b..50156d34528c 100644 --- a/llvm/utils/TableGen/DAGISelMatcherEmitter.cpp +++ b/llvm/utils/TableGen/DAGISelMatcherEmitter.cpp @@ -57,7 +57,8 @@ class MatcherTableEmitter { // We de-duplicate the predicates by code string, and use this map to track // all the patterns with "identical" predicates. - StringMap<TinyPtrVector<TreePattern *>> NodePredicatesByCodeToRun; + MapVector<std::string, TinyPtrVector<TreePattern *>, StringMap<unsigned>> + NodePredicatesByCodeToRun; std::vector<std::string> PatternPredicates; diff --git a/llvm/utils/TableGen/PredicateExpander.cpp b/llvm/utils/TableGen/PredicateExpander.cpp index d3a73e02cd91..0b9b6389fe38 100644 --- a/llvm/utils/TableGen/PredicateExpander.cpp +++ b/llvm/utils/TableGen/PredicateExpander.cpp @@ -59,6 +59,30 @@ void PredicateExpander::expandCheckImmOperandSimple(raw_ostream &OS, OS << ")"; } +void PredicateExpander::expandCheckImmOperandLT(raw_ostream &OS, int OpIndex, + int ImmVal, + StringRef FunctionMapper) { + if (!FunctionMapper.empty()) + OS << FunctionMapper << "("; + OS << "MI" << (isByRef() ? "." : "->") << "getOperand(" << OpIndex + << ").getImm()"; + if (!FunctionMapper.empty()) + OS << ")"; + OS << (shouldNegate() ? " >= " : " < ") << ImmVal; +} + +void PredicateExpander::expandCheckImmOperandGT(raw_ostream &OS, int OpIndex, + int ImmVal, + StringRef FunctionMapper) { + if (!FunctionMapper.empty()) + OS << FunctionMapper << "("; + OS << "MI" << (isByRef() ? "." : "->") << "getOperand(" << OpIndex + << ").getImm()"; + if (!FunctionMapper.empty()) + OS << ")"; + OS << (shouldNegate() ? " <= " : " > ") << ImmVal; +} + void PredicateExpander::expandCheckRegOperand(raw_ostream &OS, int OpIndex, const Record *Reg, StringRef FunctionMapper) { @@ -352,6 +376,16 @@ void PredicateExpander::expandPredicate(raw_ostream &OS, const Record *Rec) { Rec->getValueAsString("ImmVal"), Rec->getValueAsString("FunctionMapper")); + if (Rec->isSubClassOf("CheckImmOperandLT")) + return expandCheckImmOperandLT(OS, Rec->getValueAsInt("OpIndex"), + Rec->getValueAsInt("ImmVal"), + Rec->getValueAsString("FunctionMapper")); + + if (Rec->isSubClassOf("CheckImmOperandGT")) + return expandCheckImmOperandGT(OS, Rec->getValueAsInt("OpIndex"), + Rec->getValueAsInt("ImmVal"), + Rec->getValueAsString("FunctionMapper")); + if (Rec->isSubClassOf("CheckImmOperandSimple")) return expandCheckImmOperandSimple(OS, Rec->getValueAsInt("OpIndex"), Rec->getValueAsString("FunctionMapper")); diff --git a/llvm/utils/TableGen/PredicateExpander.h b/llvm/utils/TableGen/PredicateExpander.h index cfb0a3d51e67..a0dc63023978 100644 --- a/llvm/utils/TableGen/PredicateExpander.h +++ b/llvm/utils/TableGen/PredicateExpander.h @@ -61,6 +61,10 @@ public: StringRef FunctionMapperer); void expandCheckImmOperandSimple(raw_ostream &OS, int OpIndex, StringRef FunctionMapper); + void expandCheckImmOperandLT(raw_ostream &OS, int OpIndex, int ImmVal, + StringRef FunctionMapper); + void expandCheckImmOperandGT(raw_ostream &OS, int OpIndex, int ImmVal, + StringRef FunctionMapper); void expandCheckRegOperand(raw_ostream &OS, int OpIndex, const Record *Reg, StringRef FunctionMapper); void expandCheckRegOperandSimple(raw_ostream &OS, int OpIndex, diff --git a/llvm/utils/TableGen/X86FoldTablesEmitter.cpp b/llvm/utils/TableGen/X86FoldTablesEmitter.cpp index 8a860d0945bb..7ea02ecba324 100644 --- a/llvm/utils/TableGen/X86FoldTablesEmitter.cpp +++ b/llvm/utils/TableGen/X86FoldTablesEmitter.cpp @@ -14,6 +14,7 @@ #include "CodeGenInstruction.h" #include "CodeGenTarget.h" #include "X86RecognizableInstr.h" +#include "llvm/ADT/StringSwitch.h" #include "llvm/Support/FormattedStream.h" #include "llvm/Support/X86FoldTablesUtils.h" #include "llvm/TableGen/Record.h" @@ -80,6 +81,7 @@ class X86FoldTablesEmitter { bool FoldStore = false; enum BcastType { BCAST_NONE, + BCAST_W, BCAST_D, BCAST_Q, BCAST_SS, @@ -114,6 +116,9 @@ class X86FoldTablesEmitter { switch (BroadcastKind) { case BCAST_NONE: break; + case BCAST_W: + Attrs += "TB_BCAST_W|"; + break; case BCAST_D: Attrs += "TB_BCAST_D|"; break; @@ -529,45 +534,22 @@ void X86FoldTablesEmitter::addBroadcastEntry( assert(Table.find(RegInst) == Table.end() && "Override entry unexpectedly"); X86FoldTableEntry Result = X86FoldTableEntry(RegInst, MemInst); - Record *RegRec = RegInst->TheDef; - StringRef RegInstName = RegRec->getName(); - StringRef MemInstName = MemInst->TheDef->getName(); - Record *Domain = RegRec->getValueAsDef("ExeDomain"); - bool IsSSEPackedInt = Domain->getName() == "SSEPackedInt"; - if ((RegInstName.contains("DZ") || RegInstName.contains("DWZ") || - RegInstName.contains("Dr") || RegInstName.contains("I32")) && - IsSSEPackedInt) { - assert((MemInstName.contains("DZ") || RegInstName.contains("DWZ") || - MemInstName.contains("Dr") || MemInstName.contains("I32")) && - "Unmatched names for broadcast"); - Result.BroadcastKind = X86FoldTableEntry::BCAST_D; - } else if ((RegInstName.contains("QZ") || RegInstName.contains("QBZ") || - RegInstName.contains("Qr") || RegInstName.contains("I64")) && - IsSSEPackedInt) { - assert((MemInstName.contains("QZ") || MemInstName.contains("QBZ") || - MemInstName.contains("Qr") || MemInstName.contains("I64")) && - "Unmatched names for broadcast"); - Result.BroadcastKind = X86FoldTableEntry::BCAST_Q; - } else if ((RegInstName.contains("PS") || RegInstName.contains("F32") || - RegInstName.contains("CPH")) && - !RegInstName.contains("PH2PS")) { - assert((MemInstName.contains("PS") || MemInstName.contains("F32") || - MemInstName.contains("CPH")) && - "Unmatched names for broadcast"); - Result.BroadcastKind = X86FoldTableEntry::BCAST_SS; - } else if ((RegInstName.contains("PD") || RegInstName.contains("F64")) && - !RegInstName.contains("PH2PD")) { - assert((MemInstName.contains("PD") || MemInstName.contains("F64")) && - "Unmatched names for broadcast"); - Result.BroadcastKind = X86FoldTableEntry::BCAST_SD; - } else if (RegInstName.contains("PH")) { - assert(MemInstName.contains("PH") && "Unmatched names for broadcast"); - Result.BroadcastKind = X86FoldTableEntry::BCAST_SH; - } else { - errs() << RegInstName << ", " << MemInstName << "\n"; - llvm_unreachable("Name is not canoicalized for broadcast or " - "ExeDomain is incorrect"); + DagInit *In = MemInst->TheDef->getValueAsDag("InOperandList"); + for (unsigned I = 0, E = In->getNumArgs(); I != E; ++I) { + Result.BroadcastKind = + StringSwitch<X86FoldTableEntry::BcastType>(In->getArg(I)->getAsString()) + .Case("i16mem", X86FoldTableEntry::BCAST_W) + .Case("i32mem", X86FoldTableEntry::BCAST_D) + .Case("i64mem", X86FoldTableEntry::BCAST_Q) + .Case("f16mem", X86FoldTableEntry::BCAST_SH) + .Case("f32mem", X86FoldTableEntry::BCAST_SS) + .Case("f64mem", X86FoldTableEntry::BCAST_SD) + .Default(X86FoldTableEntry::BCAST_NONE); + if (Result.BroadcastKind != X86FoldTableEntry::BCAST_NONE) + break; } + assert(Result.BroadcastKind != X86FoldTableEntry::BCAST_NONE && + "Unknown memory operand for broadcast"); Table[RegInst] = Result; } |