diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2024-08-25 10:56:24 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2024-08-25 10:56:24 +0000 |
commit | adf62863f35c84e4c5708f3dc5a1589ce958a238 (patch) | |
tree | 924e88089bbec100de08015b4e496261eb6f2d66 | |
parent | 9b9503334fa856ed4ed6823d35b6f52546296f77 (diff) |
Vendor import of llvm-project branch release/19.x llvmorg-19.1.0-rc3-0-g437434df21d8.vendor/llvm-project/llvmorg-19.1.0-rc3-0-g437434df21d8
112 files changed, 1238 insertions, 1448 deletions
diff --git a/clang/include/clang/AST/DeclBase.h b/clang/include/clang/AST/DeclBase.h index 40f01abf384e..2a4bd0f9c2fd 100644 --- a/clang/include/clang/AST/DeclBase.h +++ b/clang/include/clang/AST/DeclBase.h @@ -670,6 +670,13 @@ public: /// Whether this declaration comes from another module unit. bool isInAnotherModuleUnit() const; + /// Whether this declaration comes from the same module unit being compiled. + bool isInCurrentModuleUnit() const; + + /// Whether the definition of the declaration should be emitted in external + /// sources. + bool shouldEmitInExternalSource() const; + /// Whether this declaration comes from explicit global module. bool isFromExplicitGlobalModule() const; diff --git a/clang/include/clang/AST/ExprCXX.h b/clang/include/clang/AST/ExprCXX.h index c2feac525c1e..45cfd7bfb7f9 100644 --- a/clang/include/clang/AST/ExprCXX.h +++ b/clang/include/clang/AST/ExprCXX.h @@ -3229,7 +3229,7 @@ class UnresolvedLookupExpr final const DeclarationNameInfo &NameInfo, bool RequiresADL, const TemplateArgumentListInfo *TemplateArgs, UnresolvedSetIterator Begin, UnresolvedSetIterator End, - bool KnownDependent); + bool KnownDependent, bool KnownInstantiationDependent); UnresolvedLookupExpr(EmptyShell Empty, unsigned NumResults, bool HasTemplateKWAndArgsInfo); @@ -3248,7 +3248,7 @@ public: NestedNameSpecifierLoc QualifierLoc, const DeclarationNameInfo &NameInfo, bool RequiresADL, UnresolvedSetIterator Begin, UnresolvedSetIterator End, - bool KnownDependent); + bool KnownDependent, bool KnownInstantiationDependent); // After canonicalization, there may be dependent template arguments in // CanonicalConverted But none of Args is dependent. When any of @@ -3258,7 +3258,8 @@ public: NestedNameSpecifierLoc QualifierLoc, SourceLocation TemplateKWLoc, const DeclarationNameInfo &NameInfo, bool RequiresADL, const TemplateArgumentListInfo *Args, UnresolvedSetIterator Begin, - UnresolvedSetIterator End, bool KnownDependent); + UnresolvedSetIterator End, bool KnownDependent, + bool KnownInstantiationDependent); static UnresolvedLookupExpr *CreateEmpty(const ASTContext &Context, unsigned NumResults, diff --git a/clang/include/clang/Basic/DiagnosticParseKinds.td b/clang/include/clang/Basic/DiagnosticParseKinds.td index f8d50d12bb93..12aab09f2855 100644 --- a/clang/include/clang/Basic/DiagnosticParseKinds.td +++ b/clang/include/clang/Basic/DiagnosticParseKinds.td @@ -1260,9 +1260,6 @@ def warn_pragma_intrinsic_builtin : Warning< def warn_pragma_unused_expected_var : Warning< "expected '#pragma unused' argument to be a variable name">, InGroup<IgnoredPragmas>; -// - #pragma mc_func -def err_pragma_mc_func_not_supported : - Error<"#pragma mc_func is not supported">; // - #pragma init_seg def warn_pragma_init_seg_unsupported_target : Warning< "'#pragma init_seg' is only supported when targeting a " diff --git a/clang/include/clang/Basic/PointerAuthOptions.h b/clang/include/clang/Basic/PointerAuthOptions.h index 417b4b00648c..a26af69e1fa2 100644 --- a/clang/include/clang/Basic/PointerAuthOptions.h +++ b/clang/include/clang/Basic/PointerAuthOptions.h @@ -159,6 +159,12 @@ public: }; struct PointerAuthOptions { + /// Should return addresses be authenticated? + bool ReturnAddresses = false; + + /// Do authentication failures cause a trap? + bool AuthTraps = false; + /// Do indirect goto label addresses need to be authenticated? bool IndirectGotos = false; diff --git a/clang/include/clang/Basic/arm_sve.td b/clang/include/clang/Basic/arm_sve.td index 94c093d89115..fb11d743fd64 100644 --- a/clang/include/clang/Basic/arm_sve.td +++ b/clang/include/clang/Basic/arm_sve.td @@ -2116,7 +2116,7 @@ def SVFCLAMP_BF : SInst<"svclamp[_{d}]", "dddd", "b", MergeNone, "aarch64_sve_ multiclass MinMaxIntr<string i, string zm, string mul, string t> { def SVS # NAME : SInst<"sv" # i # "[" # zm # "_{d}_" # mul # "]", t, "csil", MergeNone, "aarch64_sve_s" # i # zm # "_" # mul, [IsStreaming], []>; def SVU # NAME : SInst<"sv" # i # "[" # zm # "_{d}_" # mul # "]", t, "UcUsUiUl", MergeNone, "aarch64_sve_u" # i # zm # "_" # mul, [IsStreaming], []>; - def SVF # NAME : SInst<"sv" # i # "[" # zm # "_{d}_" # mul # "]", t, "bhfd", MergeNone, "aarch64_sve_f" # i # zm # "_" # mul, [IsStreaming], []>; + def SVF # NAME : SInst<"sv" # i # "[" # zm # "_{d}_" # mul # "]", t, "hfd", MergeNone, "aarch64_sve_f" # i # zm # "_" # mul, [IsStreaming], []>; } let SVETargetGuard = InvalidMode, SMETargetGuard = "sme2" in { @@ -2134,11 +2134,11 @@ let SVETargetGuard = InvalidMode, SMETargetGuard = "sme2" in { } multiclass SInstMinMaxByVector<string name> { - def NAME # _SINGLE_X2 : SInst<"sv" # name # "nm[_single_{d}_x2]", "22d", "bhfd", MergeNone, "aarch64_sve_f" # name # "nm_single_x2", [IsStreaming], []>; - def NAME # _SINGLE_X4 : SInst<"sv" # name # "nm[_single_{d}_x4]", "44d", "bhfd", MergeNone, "aarch64_sve_f" # name # "nm_single_x4", [IsStreaming], []>; + def NAME # _SINGLE_X2 : SInst<"sv" # name # "nm[_single_{d}_x2]", "22d", "hfd", MergeNone, "aarch64_sve_f" # name # "nm_single_x2", [IsStreaming], []>; + def NAME # _SINGLE_X4 : SInst<"sv" # name # "nm[_single_{d}_x4]", "44d", "hfd", MergeNone, "aarch64_sve_f" # name # "nm_single_x4", [IsStreaming], []>; - def NAME # _X2 : SInst<"sv" # name # "nm[_{d}_x2]", "222", "bhfd", MergeNone, "aarch64_sve_f" # name # "nm_x2", [IsStreaming], []>; - def NAME # _X4 : SInst<"sv" # name # "nm[_{d}_x4]", "444", "bhfd", MergeNone, "aarch64_sve_f" # name # "nm_x4", [IsStreaming], []>; + def NAME # _X2 : SInst<"sv" # name # "nm[_{d}_x2]", "222", "hfd", MergeNone, "aarch64_sve_f" # name # "nm_x2", [IsStreaming], []>; + def NAME # _X4 : SInst<"sv" # name # "nm[_{d}_x4]", "444", "hfd", MergeNone, "aarch64_sve_f" # name # "nm_x4", [IsStreaming], []>; } let SVETargetGuard = InvalidMode, SMETargetGuard = "sme2" in { @@ -2172,9 +2172,25 @@ let SVETargetGuard = InvalidMode, SMETargetGuard = "sme2" in { def SVFCLAMP_X4 : SInst<"svclamp[_single_{d}_x4]", "44dd", "hfd", MergeNone, "aarch64_sve_fclamp_single_x4", [IsStreaming], []>; } +multiclass BfSingleMultiVector<string name> { + def NAME # _SINGLE_X2 : SInst<"sv" # name # "[_single_{d}_x2]", "22d", "b", MergeNone, "aarch64_sve_f" # name # "_single_x2", [IsStreaming], []>; + def NAME # _SINGLE_X4 : SInst<"sv" # name # "[_single_{d}_x4]", "44d", "b", MergeNone, "aarch64_sve_f" # name # "_single_x4", [IsStreaming], []>; + + def NAME # _X2 : SInst<"sv" # name # "[_{d}_x2]", "222", "b", MergeNone, "aarch64_sve_f" # name # "_x2", [IsStreaming], []>; + def NAME # _X4 : SInst<"sv" # name # "[_{d}_x4]", "444", "b", MergeNone, "aarch64_sve_f" # name # "_x4", [IsStreaming], []>; +} + let SVETargetGuard = InvalidMode, SMETargetGuard = "sme2,b16b16"in { def SVBFCLAMP_X2 : SInst<"svclamp[_single_{d}_x2]", "22dd", "b", MergeNone, "aarch64_sve_bfclamp_single_x2", [IsStreaming], []>; def SVBFCLAMP_X4 : SInst<"svclamp[_single_{d}_x4]", "44dd", "b", MergeNone, "aarch64_sve_bfclamp_single_x4", [IsStreaming], []>; + + // bfmin, bfmax (single, multi) + defm SVBFMIN : BfSingleMultiVector<"min">; + defm SVBFMAX : BfSingleMultiVector<"max">; + + // bfminnm, bfmaxnm (single, multi) + defm SVBFMINNM : BfSingleMultiVector<"minnm">; + defm SVBFMAXNM : BfSingleMultiVector<"maxnm">; } let SVETargetGuard = InvalidMode, SMETargetGuard = "sme2" in { diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index 359a698ea87d..15f9ee75492e 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -932,8 +932,9 @@ def O_flag : Flag<["-"], "O">, Visibility<[ClangOption, CC1Option, FC1Option]>, Alias<O>, AliasArgs<["1"]>; def Ofast : Joined<["-"], "Ofast">, Group<O_Group>, Visibility<[ClangOption, CC1Option, FlangOption]>, - HelpText<"Deprecated; use '-O3 -ffast-math' for the same behavior," - " or '-O3' to enable only conforming optimizations">; + HelpTextForVariants<[ClangOption, CC1Option], + "Deprecated; use '-O3 -ffast-math' for the same behavior," + " or '-O3' to enable only conforming optimizations">; def P : Flag<["-"], "P">, Visibility<[ClangOption, CC1Option, FlangOption, FC1Option]>, Group<Preprocessor_Group>, @@ -3106,7 +3107,7 @@ def fmodules_user_build_path : Separate<["-"], "fmodules-user-build-path">, Grou HelpText<"Specify the module user build path">, MarshallingInfoString<HeaderSearchOpts<"ModuleUserBuildPath">>; def fprebuilt_module_path : Joined<["-"], "fprebuilt-module-path=">, Group<i_Group>, - Flags<[]>, Visibility<[ClangOption, CC1Option]>, + Flags<[]>, Visibility<[ClangOption, CLOption, CC1Option]>, MetaVarName<"<directory>">, HelpText<"Specify the prebuilt module path">; defm prebuilt_implicit_modules : BoolFOption<"prebuilt-implicit-modules", @@ -3115,11 +3116,11 @@ defm prebuilt_implicit_modules : BoolFOption<"prebuilt-implicit-modules", NegFlag<SetFalse>, BothFlags<[], [ClangOption, CC1Option]>>; def fmodule_output_EQ : Joined<["-"], "fmodule-output=">, - Flags<[NoXarchOption]>, Visibility<[ClangOption, CC1Option]>, + Flags<[NoXarchOption]>, Visibility<[ClangOption, CLOption, CC1Option]>, MarshallingInfoString<FrontendOpts<"ModuleOutputPath">>, HelpText<"Save intermediate module file results when compiling a standard C++ module unit.">; def fmodule_output : Flag<["-"], "fmodule-output">, Flags<[NoXarchOption]>, - Visibility<[ClangOption, CC1Option]>, + Visibility<[ClangOption, CLOption, CC1Option]>, HelpText<"Save intermediate module file results when compiling a standard C++ module unit.">; defm skip_odr_check_in_gmf : BoolOption<"f", "skip-odr-check-in-gmf", @@ -3299,8 +3300,10 @@ def fretain_comments_from_system_headers : Flag<["-"], "fretain-comments-from-sy Visibility<[ClangOption, CC1Option]>, MarshallingInfoFlag<LangOpts<"RetainCommentsFromSystemHeaders">>; def fmodule_header : Flag <["-"], "fmodule-header">, Group<f_Group>, + Visibility<[ClangOption, CLOption]>, HelpText<"Build a C++20 Header Unit from a header">; def fmodule_header_EQ : Joined<["-"], "fmodule-header=">, Group<f_Group>, + Visibility<[ClangOption, CLOption]>, MetaVarName<"<kind>">, HelpText<"Build a C++20 Header Unit from a header that should be found in the user (fmodule-header=user) or system (fmodule-header=system) search path.">; @@ -5945,6 +5948,7 @@ def _output : Separate<["--"], "output">, Alias<o>; def _param : Separate<["--"], "param">, Group<CompileOnly_Group>; def _param_EQ : Joined<["--"], "param=">, Alias<_param>; def _precompile : Flag<["--"], "precompile">, Flags<[NoXarchOption]>, + Visibility<[ClangOption, CLOption]>, Group<Action_Group>, HelpText<"Only precompile the input">; def _prefix_EQ : Joined<["--"], "prefix=">, Alias<B>; def _prefix : Separate<["--"], "prefix">, Alias<B>; @@ -8086,13 +8090,6 @@ def source_date_epoch : Separate<["-"], "source-date-epoch">, } // let Visibility = [CC1Option] -defm err_pragma_mc_func_aix : BoolFOption<"err-pragma-mc-func-aix", - PreprocessorOpts<"ErrorOnPragmaMcfuncOnAIX">, DefaultFalse, - PosFlag<SetTrue, [], [ClangOption, CC1Option], - "Treat uses of #pragma mc_func as errors">, - NegFlag<SetFalse,[], [ClangOption, CC1Option], - "Ignore uses of #pragma mc_func">>; - //===----------------------------------------------------------------------===// // CUDA Options //===----------------------------------------------------------------------===// diff --git a/clang/include/clang/Lex/PreprocessorOptions.h b/clang/include/clang/Lex/PreprocessorOptions.h index 3f7dd9db18ba..c2e3d6833302 100644 --- a/clang/include/clang/Lex/PreprocessorOptions.h +++ b/clang/include/clang/Lex/PreprocessorOptions.h @@ -211,10 +211,6 @@ public: /// If set, the UNIX timestamp specified by SOURCE_DATE_EPOCH. std::optional<uint64_t> SourceDateEpoch; - /// If set, the preprocessor reports an error when processing #pragma mc_func - /// on AIX. - bool ErrorOnPragmaMcfuncOnAIX = false; - public: PreprocessorOptions() : PrecompiledPreambleBytes(0, false) {} @@ -252,7 +248,6 @@ public: PrecompiledPreambleBytes.first = 0; PrecompiledPreambleBytes.second = false; RetainExcludedConditionalBlocks = false; - ErrorOnPragmaMcfuncOnAIX = false; } }; diff --git a/clang/include/clang/Parse/Parser.h b/clang/include/clang/Parse/Parser.h index 35bb1a19d40f..f256d603ae62 100644 --- a/clang/include/clang/Parse/Parser.h +++ b/clang/include/clang/Parse/Parser.h @@ -221,7 +221,6 @@ class Parser : public CodeCompletionHandler { std::unique_ptr<PragmaHandler> MaxTokensHerePragmaHandler; std::unique_ptr<PragmaHandler> MaxTokensTotalPragmaHandler; std::unique_ptr<PragmaHandler> RISCVPragmaHandler; - std::unique_ptr<PragmaHandler> MCFuncPragmaHandler; std::unique_ptr<CommentHandler> CommentSemaHandler; diff --git a/clang/include/clang/Serialization/ASTBitCodes.h b/clang/include/clang/Serialization/ASTBitCodes.h index 5dd0ba33f8a9..9b7e3af0e449 100644 --- a/clang/include/clang/Serialization/ASTBitCodes.h +++ b/clang/include/clang/Serialization/ASTBitCodes.h @@ -721,6 +721,9 @@ enum ASTRecordTypes { /// Record code for \#pragma clang unsafe_buffer_usage begin/end PP_UNSAFE_BUFFER_USAGE = 69, + + /// Record code for vtables to emit. + VTABLES_TO_EMIT = 70, }; /// Record types used within a source manager block. diff --git a/clang/include/clang/Serialization/ASTReader.h b/clang/include/clang/Serialization/ASTReader.h index 76e51ac7ab97..671520a3602b 100644 --- a/clang/include/clang/Serialization/ASTReader.h +++ b/clang/include/clang/Serialization/ASTReader.h @@ -790,6 +790,11 @@ private: /// the consumer eagerly. SmallVector<GlobalDeclID, 16> EagerlyDeserializedDecls; + /// The IDs of all vtables to emit. The referenced declarations are passed + /// to the consumers' HandleVTable eagerly after passing + /// EagerlyDeserializedDecls. + SmallVector<GlobalDeclID, 16> VTablesToEmit; + /// The IDs of all tentative definitions stored in the chain. /// /// Sema keeps track of all tentative definitions in a TU because it has to @@ -1500,6 +1505,7 @@ private: bool isConsumerInterestedIn(Decl *D); void PassInterestingDeclsToConsumer(); void PassInterestingDeclToConsumer(Decl *D); + void PassVTableToConsumer(CXXRecordDecl *RD); void finishPendingActions(); void diagnoseOdrViolations(); diff --git a/clang/include/clang/Serialization/ASTWriter.h b/clang/include/clang/Serialization/ASTWriter.h index a0e475ec9f86..71a7c28047e3 100644 --- a/clang/include/clang/Serialization/ASTWriter.h +++ b/clang/include/clang/Serialization/ASTWriter.h @@ -500,6 +500,10 @@ private: std::vector<SourceRange> NonAffectingRanges; std::vector<SourceLocation::UIntTy> NonAffectingOffsetAdjustments; + /// A list of classes which need to emit the VTable in the corresponding + /// object file. + llvm::SmallVector<CXXRecordDecl *> PendingEmittingVTables; + /// Computes input files that didn't affect compilation of the current module, /// and initializes data structures necessary for leaving those files out /// during \c SourceManager serialization. @@ -857,6 +861,8 @@ public: return PredefinedDecls.count(D); } + void handleVTable(CXXRecordDecl *RD); + private: // ASTDeserializationListener implementation void ReaderInitialized(ASTReader *Reader) override; @@ -951,6 +957,7 @@ public: void InitializeSema(Sema &S) override { SemaPtr = &S; } void HandleTranslationUnit(ASTContext &Ctx) override; + void HandleVTable(CXXRecordDecl *RD) override { Writer.handleVTable(RD); } ASTMutationListener *GetASTMutationListener() override; ASTDeserializationListener *GetASTDeserializationListener() override; bool hasEmittedPCH() const { return Buffer->IsComplete; } diff --git a/clang/lib/AST/ASTContext.cpp b/clang/lib/AST/ASTContext.cpp index 7af9ea7105bb..3da5e888f251 100644 --- a/clang/lib/AST/ASTContext.cpp +++ b/clang/lib/AST/ASTContext.cpp @@ -12405,8 +12405,7 @@ bool ASTContext::DeclMustBeEmitted(const Decl *D) { !isMSStaticDataMemberInlineDefinition(VD)) return false; - // Variables in other module units shouldn't be forced to be emitted. - if (VD->isInAnotherModuleUnit()) + if (VD->shouldEmitInExternalSource()) return false; // Variables that can be needed in other TUs are required. diff --git a/clang/lib/AST/ASTImporter.cpp b/clang/lib/AST/ASTImporter.cpp index 08ef09d353af..e95992b99f7e 100644 --- a/clang/lib/AST/ASTImporter.cpp +++ b/clang/lib/AST/ASTImporter.cpp @@ -8578,13 +8578,15 @@ ASTNodeImporter::VisitUnresolvedLookupExpr(UnresolvedLookupExpr *E) { return UnresolvedLookupExpr::Create( Importer.getToContext(), *ToNamingClassOrErr, *ToQualifierLocOrErr, *ToTemplateKeywordLocOrErr, ToNameInfo, E->requiresADL(), &ToTAInfo, - ToDecls.begin(), ToDecls.end(), KnownDependent); + ToDecls.begin(), ToDecls.end(), KnownDependent, + /*KnownInstantiationDependent=*/E->isInstantiationDependent()); } return UnresolvedLookupExpr::Create( Importer.getToContext(), *ToNamingClassOrErr, *ToQualifierLocOrErr, ToNameInfo, E->requiresADL(), ToDecls.begin(), ToDecls.end(), - /*KnownDependent=*/E->isTypeDependent()); + /*KnownDependent=*/E->isTypeDependent(), + /*KnownInstantiationDependent=*/E->isInstantiationDependent()); } ExpectedStmt diff --git a/clang/lib/AST/DeclBase.cpp b/clang/lib/AST/DeclBase.cpp index bc5a9206c0db..b59f118380ca 100644 --- a/clang/lib/AST/DeclBase.cpp +++ b/clang/lib/AST/DeclBase.cpp @@ -1125,20 +1125,36 @@ bool Decl::isInAnotherModuleUnit() const { if (!M) return false; + // FIXME or NOTE: maybe we need to be clear about the semantics + // of clang header modules. e.g., if this lives in a clang header + // module included by the current unit, should we return false + // here? + // + // This is clear for header units as the specification says the + // header units live in a synthesised translation unit. So we + // can return false here. M = M->getTopLevelModule(); - // FIXME: It is problematic if the header module lives in another module - // unit. Consider to fix this by techniques like - // ExternalASTSource::hasExternalDefinitions. - if (M->isHeaderLikeModule()) + if (!M->isNamedModule()) return false; - // A global module without parent implies that we're parsing the global - // module. So it can't be in another module unit. - if (M->isGlobalModule()) + return M != getASTContext().getCurrentNamedModule(); +} + +bool Decl::isInCurrentModuleUnit() const { + auto *M = getOwningModule(); + + if (!M || !M->isNamedModule()) return false; - assert(M->isNamedModule() && "New module kind?"); - return M != getASTContext().getCurrentNamedModule(); + return M == getASTContext().getCurrentNamedModule(); +} + +bool Decl::shouldEmitInExternalSource() const { + ExternalASTSource *Source = getASTContext().getExternalSource(); + if (!Source) + return false; + + return Source->hasExternalDefinitions(this) == ExternalASTSource::EK_Always; } bool Decl::isFromExplicitGlobalModule() const { diff --git a/clang/lib/AST/ExprCXX.cpp b/clang/lib/AST/ExprCXX.cpp index 8d2a1b5611cc..45e2badf2ddd 100644 --- a/clang/lib/AST/ExprCXX.cpp +++ b/clang/lib/AST/ExprCXX.cpp @@ -402,10 +402,11 @@ UnresolvedLookupExpr::UnresolvedLookupExpr( NestedNameSpecifierLoc QualifierLoc, SourceLocation TemplateKWLoc, const DeclarationNameInfo &NameInfo, bool RequiresADL, const TemplateArgumentListInfo *TemplateArgs, UnresolvedSetIterator Begin, - UnresolvedSetIterator End, bool KnownDependent) + UnresolvedSetIterator End, bool KnownDependent, + bool KnownInstantiationDependent) : OverloadExpr(UnresolvedLookupExprClass, Context, QualifierLoc, TemplateKWLoc, NameInfo, TemplateArgs, Begin, End, - KnownDependent, false, false), + KnownDependent, KnownInstantiationDependent, false), NamingClass(NamingClass) { UnresolvedLookupExprBits.RequiresADL = RequiresADL; } @@ -420,7 +421,7 @@ UnresolvedLookupExpr *UnresolvedLookupExpr::Create( const ASTContext &Context, CXXRecordDecl *NamingClass, NestedNameSpecifierLoc QualifierLoc, const DeclarationNameInfo &NameInfo, bool RequiresADL, UnresolvedSetIterator Begin, UnresolvedSetIterator End, - bool KnownDependent) { + bool KnownDependent, bool KnownInstantiationDependent) { unsigned NumResults = End - Begin; unsigned Size = totalSizeToAlloc<DeclAccessPair, ASTTemplateKWAndArgsInfo, TemplateArgumentLoc>(NumResults, 0, 0); @@ -428,7 +429,8 @@ UnresolvedLookupExpr *UnresolvedLookupExpr::Create( return new (Mem) UnresolvedLookupExpr( Context, NamingClass, QualifierLoc, /*TemplateKWLoc=*/SourceLocation(), NameInfo, RequiresADL, - /*TemplateArgs=*/nullptr, Begin, End, KnownDependent); + /*TemplateArgs=*/nullptr, Begin, End, KnownDependent, + KnownInstantiationDependent); } UnresolvedLookupExpr *UnresolvedLookupExpr::Create( @@ -436,7 +438,8 @@ UnresolvedLookupExpr *UnresolvedLookupExpr::Create( NestedNameSpecifierLoc QualifierLoc, SourceLocation TemplateKWLoc, const DeclarationNameInfo &NameInfo, bool RequiresADL, const TemplateArgumentListInfo *Args, UnresolvedSetIterator Begin, - UnresolvedSetIterator End, bool KnownDependent) { + UnresolvedSetIterator End, bool KnownDependent, + bool KnownInstantiationDependent) { unsigned NumResults = End - Begin; bool HasTemplateKWAndArgsInfo = Args || TemplateKWLoc.isValid(); unsigned NumTemplateArgs = Args ? Args->size() : 0; @@ -444,9 +447,9 @@ UnresolvedLookupExpr *UnresolvedLookupExpr::Create( TemplateArgumentLoc>( NumResults, HasTemplateKWAndArgsInfo, NumTemplateArgs); void *Mem = Context.Allocate(Size, alignof(UnresolvedLookupExpr)); - return new (Mem) UnresolvedLookupExpr(Context, NamingClass, QualifierLoc, - TemplateKWLoc, NameInfo, RequiresADL, - Args, Begin, End, KnownDependent); + return new (Mem) UnresolvedLookupExpr( + Context, NamingClass, QualifierLoc, TemplateKWLoc, NameInfo, RequiresADL, + Args, Begin, End, KnownDependent, KnownInstantiationDependent); } UnresolvedLookupExpr *UnresolvedLookupExpr::CreateEmpty( diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp index 234a9c16e39d..6e69e84a2344 100644 --- a/clang/lib/CodeGen/CGCall.cpp +++ b/clang/lib/CodeGen/CGCall.cpp @@ -2032,7 +2032,7 @@ static void getTrivialDefaultFunctionAttributes( } TargetInfo::BranchProtectionInfo BPI(LangOpts); - TargetCodeGenInfo::setBranchProtectionFnAttributes(BPI, FuncAttrs); + TargetCodeGenInfo::initBranchProtectionFnAttributes(BPI, FuncAttrs); } /// Merges `target-features` from \TargetOpts and \F, and sets the result in diff --git a/clang/lib/CodeGen/CGVTables.cpp b/clang/lib/CodeGen/CGVTables.cpp index 7f729d359b82..267bdf098297 100644 --- a/clang/lib/CodeGen/CGVTables.cpp +++ b/clang/lib/CodeGen/CGVTables.cpp @@ -1078,29 +1078,41 @@ llvm::GlobalVariable::LinkageTypes CodeGenModule::getVTableLinkage(const CXXRecordDecl *RD) { if (!RD->isExternallyVisible()) return llvm::GlobalVariable::InternalLinkage; - - // We're at the end of the translation unit, so the current key - // function is fully correct. - const CXXMethodDecl *keyFunction = Context.getCurrentKeyFunction(RD); - if (keyFunction && !RD->hasAttr<DLLImportAttr>()) { + + // In windows, the linkage of vtable is not related to modules. + bool IsInNamedModule = !getTarget().getCXXABI().isMicrosoft() && + RD->isInNamedModule(); + // If the CXXRecordDecl is not in a module unit, we need to get + // its key function. We're at the end of the translation unit, so the current + // key function is fully correct. + const CXXMethodDecl *keyFunction = + IsInNamedModule ? nullptr : Context.getCurrentKeyFunction(RD); + if (IsInNamedModule || (keyFunction && !RD->hasAttr<DLLImportAttr>())) { // If this class has a key function, use that to determine the // linkage of the vtable. const FunctionDecl *def = nullptr; - if (keyFunction->hasBody(def)) + if (keyFunction && keyFunction->hasBody(def)) keyFunction = cast<CXXMethodDecl>(def); - switch (keyFunction->getTemplateSpecializationKind()) { - case TSK_Undeclared: - case TSK_ExplicitSpecialization: + bool IsExternalDefinition = + IsInNamedModule ? RD->shouldEmitInExternalSource() : !def; + + TemplateSpecializationKind Kind = + IsInNamedModule ? RD->getTemplateSpecializationKind() + : keyFunction->getTemplateSpecializationKind(); + + switch (Kind) { + case TSK_Undeclared: + case TSK_ExplicitSpecialization: assert( - (def || CodeGenOpts.OptimizationLevel > 0 || + (IsInNamedModule || def || CodeGenOpts.OptimizationLevel > 0 || CodeGenOpts.getDebugInfo() != llvm::codegenoptions::NoDebugInfo) && - "Shouldn't query vtable linkage without key function, " - "optimizations, or debug info"); - if (!def && CodeGenOpts.OptimizationLevel > 0) + "Shouldn't query vtable linkage without the class in module units, " + "key function, optimizations, or debug info"); + if (IsExternalDefinition && CodeGenOpts.OptimizationLevel > 0) return llvm::GlobalVariable::AvailableExternallyLinkage; - if (keyFunction->isInlined()) + if (keyFunction && keyFunction->isInlined()) return !Context.getLangOpts().AppleKext ? llvm::GlobalVariable::LinkOnceODRLinkage : llvm::Function::InternalLinkage; @@ -1119,7 +1131,7 @@ CodeGenModule::getVTableLinkage(const CXXRecordDecl *RD) { case TSK_ExplicitInstantiationDeclaration: llvm_unreachable("Should not have been asked to emit this"); - } + } } // -fapple-kext mode does not support weak linkage, so we must use @@ -1213,22 +1225,20 @@ bool CodeGenVTables::isVTableExternal(const CXXRecordDecl *RD) { TSK == TSK_ExplicitInstantiationDefinition) return false; + // Otherwise, if the class is attached to a module, the tables are uniquely + // emitted in the object for the module unit in which it is defined. + if (RD->isInNamedModule()) + return RD->shouldEmitInExternalSource(); + // Otherwise, if the class doesn't have a key function (possibly // anymore), the vtable must be defined here. const CXXMethodDecl *keyFunction = CGM.getContext().getCurrentKeyFunction(RD); if (!keyFunction) return false; - const FunctionDecl *Def; // Otherwise, if we don't have a definition of the key function, the // vtable must be defined somewhere else. - if (!keyFunction->hasBody(Def)) - return true; - - assert(Def && "The body of the key function is not assigned to Def?"); - // If the non-inline key function comes from another module unit, the vtable - // must be defined there. - return Def->isInAnotherModuleUnit() && !Def->isInlineSpecified(); + return !keyFunction->hasBody(); } /// Given that we're currently at the end of the translation unit, and diff --git a/clang/lib/CodeGen/CodeGenFunction.cpp b/clang/lib/CodeGen/CodeGenFunction.cpp index af201554898f..2b2e23f1e5d7 100644 --- a/clang/lib/CodeGen/CodeGenFunction.cpp +++ b/clang/lib/CodeGen/CodeGenFunction.cpp @@ -880,8 +880,12 @@ void CodeGenFunction::StartFunction(GlobalDecl GD, QualType RetTy, // Add pointer authentication attributes. const CodeGenOptions &CodeGenOpts = CGM.getCodeGenOpts(); + if (CodeGenOpts.PointerAuth.ReturnAddresses) + Fn->addFnAttr("ptrauth-returns"); if (CodeGenOpts.PointerAuth.FunctionPointers) Fn->addFnAttr("ptrauth-calls"); + if (CodeGenOpts.PointerAuth.AuthTraps) + Fn->addFnAttr("ptrauth-auth-traps"); if (CodeGenOpts.PointerAuth.IndirectGotos) Fn->addFnAttr("ptrauth-indirect-gotos"); diff --git a/clang/lib/CodeGen/ItaniumCXXABI.cpp b/clang/lib/CodeGen/ItaniumCXXABI.cpp index cd76f8406e7b..0be92fb2e275 100644 --- a/clang/lib/CodeGen/ItaniumCXXABI.cpp +++ b/clang/lib/CodeGen/ItaniumCXXABI.cpp @@ -2315,6 +2315,9 @@ bool ItaniumCXXABI::canSpeculativelyEmitVTable(const CXXRecordDecl *RD) const { if (!canSpeculativelyEmitVTableAsBaseClass(RD)) return false; + if (RD->shouldEmitInExternalSource()) + return false; + // For a complete-object vtable (or more specifically, for the VTT), we need // to be able to speculatively emit the vtables of all dynamic virtual bases. for (const auto &B : RD->vbases()) { diff --git a/clang/lib/CodeGen/TargetInfo.cpp b/clang/lib/CodeGen/TargetInfo.cpp index 38faa50cf19c..64a9a5554caf 100644 --- a/clang/lib/CodeGen/TargetInfo.cpp +++ b/clang/lib/CodeGen/TargetInfo.cpp @@ -209,13 +209,37 @@ llvm::Value *TargetCodeGenInfo::createEnqueuedBlockKernel( void TargetCodeGenInfo::setBranchProtectionFnAttributes( const TargetInfo::BranchProtectionInfo &BPI, llvm::Function &F) { - llvm::AttrBuilder FuncAttrs(F.getContext()); - setBranchProtectionFnAttributes(BPI, FuncAttrs); - F.addFnAttrs(FuncAttrs); + // Called on already created and initialized function where attributes already + // set from command line attributes but some might need to be removed as the + // actual BPI is different. + if (BPI.SignReturnAddr != LangOptions::SignReturnAddressScopeKind::None) { + F.addFnAttr("sign-return-address", BPI.getSignReturnAddrStr()); + F.addFnAttr("sign-return-address-key", BPI.getSignKeyStr()); + } else { + if (F.hasFnAttribute("sign-return-address")) + F.removeFnAttr("sign-return-address"); + if (F.hasFnAttribute("sign-return-address-key")) + F.removeFnAttr("sign-return-address-key"); + } + + auto AddRemoveAttributeAsSet = [&](bool Set, const StringRef &ModAttr) { + if (Set) + F.addFnAttr(ModAttr); + else if (F.hasFnAttribute(ModAttr)) + F.removeFnAttr(ModAttr); + }; + + AddRemoveAttributeAsSet(BPI.BranchTargetEnforcement, + "branch-target-enforcement"); + AddRemoveAttributeAsSet(BPI.BranchProtectionPAuthLR, + "branch-protection-pauth-lr"); + AddRemoveAttributeAsSet(BPI.GuardedControlStack, "guarded-control-stack"); } -void TargetCodeGenInfo::setBranchProtectionFnAttributes( +void TargetCodeGenInfo::initBranchProtectionFnAttributes( const TargetInfo::BranchProtectionInfo &BPI, llvm::AttrBuilder &FuncAttrs) { + // Only used for initializing attributes in the AttrBuilder, which will not + // contain any of these attributes so no need to remove anything. if (BPI.SignReturnAddr != LangOptions::SignReturnAddressScopeKind::None) { FuncAttrs.addAttribute("sign-return-address", BPI.getSignReturnAddrStr()); FuncAttrs.addAttribute("sign-return-address-key", BPI.getSignKeyStr()); diff --git a/clang/lib/CodeGen/TargetInfo.h b/clang/lib/CodeGen/TargetInfo.h index 2f2138582ba1..156b4ff4353b 100644 --- a/clang/lib/CodeGen/TargetInfo.h +++ b/clang/lib/CodeGen/TargetInfo.h @@ -414,13 +414,16 @@ public: return nullptr; } + // Set the Branch Protection Attributes of the Function accordingly to the + // BPI. Remove attributes that contradict with current BPI. static void setBranchProtectionFnAttributes(const TargetInfo::BranchProtectionInfo &BPI, llvm::Function &F); + // Add the Branch Protection Attributes of the FuncAttrs. static void - setBranchProtectionFnAttributes(const TargetInfo::BranchProtectionInfo &BPI, - llvm::AttrBuilder &FuncAttrs); + initBranchProtectionFnAttributes(const TargetInfo::BranchProtectionInfo &BPI, + llvm::AttrBuilder &FuncAttrs); protected: static std::string qualifyWindowsLibrary(StringRef Lib); diff --git a/clang/lib/CodeGen/Targets/AArch64.cpp b/clang/lib/CodeGen/Targets/AArch64.cpp index 1dec3cd40ebd..97381f673c28 100644 --- a/clang/lib/CodeGen/Targets/AArch64.cpp +++ b/clang/lib/CodeGen/Targets/AArch64.cpp @@ -840,12 +840,13 @@ static bool isStreamingCompatible(const FunctionDecl *F) { static void diagnoseIfNeedsFPReg(DiagnosticsEngine &Diags, const StringRef ABIName, const AArch64ABIInfo &ABIInfo, - const QualType &Ty, const NamedDecl *D) { + const QualType &Ty, const NamedDecl *D, + SourceLocation loc) { const Type *HABase = nullptr; uint64_t HAMembers = 0; if (Ty->isFloatingType() || Ty->isVectorType() || ABIInfo.isHomogeneousAggregate(Ty, HABase, HAMembers)) { - Diags.Report(D->getLocation(), diag::err_target_unsupported_type_for_abi) + Diags.Report(loc, diag::err_target_unsupported_type_for_abi) << D->getDeclName() << Ty << ABIName; } } @@ -860,10 +861,11 @@ void AArch64TargetCodeGenInfo::checkFunctionABI( if (!TI.hasFeature("fp") && !ABIInfo.isSoftFloat()) { diagnoseIfNeedsFPReg(CGM.getDiags(), TI.getABI(), ABIInfo, - FuncDecl->getReturnType(), FuncDecl); + FuncDecl->getReturnType(), FuncDecl, + FuncDecl->getLocation()); for (ParmVarDecl *PVD : FuncDecl->parameters()) { diagnoseIfNeedsFPReg(CGM.getDiags(), TI.getABI(), ABIInfo, PVD->getType(), - PVD); + PVD, FuncDecl->getLocation()); } } } @@ -908,11 +910,11 @@ void AArch64TargetCodeGenInfo::checkFunctionCallABISoftFloat( return; diagnoseIfNeedsFPReg(CGM.getDiags(), TI.getABI(), ABIInfo, ReturnType, - Caller); + Callee ? Callee : Caller, CallLoc); for (const CallArg &Arg : Args) diagnoseIfNeedsFPReg(CGM.getDiags(), TI.getABI(), ABIInfo, Arg.getType(), - Caller); + Callee ? Callee : Caller, CallLoc); } void AArch64TargetCodeGenInfo::checkFunctionCallABI(CodeGenModule &CGM, diff --git a/clang/lib/Driver/ToolChains/AIX.cpp b/clang/lib/Driver/ToolChains/AIX.cpp index fb780fb75651..b04502a57a9f 100644 --- a/clang/lib/Driver/ToolChains/AIX.cpp +++ b/clang/lib/Driver/ToolChains/AIX.cpp @@ -557,12 +557,6 @@ void AIX::addClangTargetOptions( if (!Args.getLastArgNoClaim(options::OPT_fsized_deallocation, options::OPT_fno_sized_deallocation)) CC1Args.push_back("-fno-sized-deallocation"); - - if (Args.hasFlag(options::OPT_ferr_pragma_mc_func_aix, - options::OPT_fno_err_pragma_mc_func_aix, false)) - CC1Args.push_back("-ferr-pragma-mc-func-aix"); - else - CC1Args.push_back("-fno-err-pragma-mc-func-aix"); } void AIX::addProfileRTLibs(const llvm::opt::ArgList &Args, diff --git a/clang/lib/Driver/ToolChains/Cuda.cpp b/clang/lib/Driver/ToolChains/Cuda.cpp index 59453c484ae4..61d12b10dfb6 100644 --- a/clang/lib/Driver/ToolChains/Cuda.cpp +++ b/clang/lib/Driver/ToolChains/Cuda.cpp @@ -609,6 +609,10 @@ void NVPTX::Linker::ConstructJob(Compilation &C, const JobAction &JA, CmdArgs.push_back(Args.MakeArgString( "--pxtas-path=" + Args.getLastArgValue(options::OPT_ptxas_path_EQ))); + if (Args.hasArg(options::OPT_cuda_path_EQ)) + CmdArgs.push_back(Args.MakeArgString( + "--cuda-path=" + Args.getLastArgValue(options::OPT_cuda_path_EQ))); + // Add paths specified in LIBRARY_PATH environment variable as -L options. addDirectoryList(Args, CmdArgs, "-L", "LIBRARY_PATH"); diff --git a/clang/lib/Driver/ToolChains/Darwin.cpp b/clang/lib/Driver/ToolChains/Darwin.cpp index c6f9d7beffb1..17d57b2f7eed 100644 --- a/clang/lib/Driver/ToolChains/Darwin.cpp +++ b/clang/lib/Driver/ToolChains/Darwin.cpp @@ -2923,22 +2923,45 @@ bool Darwin::isAlignedAllocationUnavailable() const { return TargetVersion < alignedAllocMinVersion(OS); } -static bool sdkSupportsBuiltinModules(const Darwin::DarwinPlatformKind &TargetPlatform, const std::optional<DarwinSDKInfo> &SDKInfo) { +static bool sdkSupportsBuiltinModules( + const Darwin::DarwinPlatformKind &TargetPlatform, + const Darwin::DarwinEnvironmentKind &TargetEnvironment, + const std::optional<DarwinSDKInfo> &SDKInfo) { + if (TargetEnvironment == Darwin::NativeEnvironment || + TargetEnvironment == Darwin::Simulator || + TargetEnvironment == Darwin::MacCatalyst) { + // Standard xnu/Mach/Darwin based environments + // depend on the SDK version. + } else { + // All other environments support builtin modules from the start. + return true; + } + if (!SDKInfo) + // If there is no SDK info, assume this is building against a + // pre-SDK version of macOS (i.e. before Mac OS X 10.4). Those + // don't support modules anyway, but the headers definitely + // don't support builtin modules either. It might also be some + // kind of degenerate build environment, err on the side of + // the old behavior which is to not use builtin modules. return false; VersionTuple SDKVersion = SDKInfo->getVersion(); switch (TargetPlatform) { + // Existing SDKs added support for builtin modules in the fall + // 2024 major releases. case Darwin::MacOS: - return SDKVersion >= VersionTuple(99U); + return SDKVersion >= VersionTuple(15U); case Darwin::IPhoneOS: - return SDKVersion >= VersionTuple(99U); + return SDKVersion >= VersionTuple(18U); case Darwin::TvOS: - return SDKVersion >= VersionTuple(99U); + return SDKVersion >= VersionTuple(18U); case Darwin::WatchOS: - return SDKVersion >= VersionTuple(99U); + return SDKVersion >= VersionTuple(11U); case Darwin::XROS: - return SDKVersion >= VersionTuple(99U); + return SDKVersion >= VersionTuple(2U); + + // New SDKs support builtin modules from the start. default: return true; } @@ -3030,7 +3053,7 @@ void Darwin::addClangTargetOptions( // i.e. when the builtin stdint.h is in the Darwin module too, the cycle // goes away. Note that -fbuiltin-headers-in-system-modules does nothing // to fix the same problem with C++ headers, and is generally fragile. - if (!sdkSupportsBuiltinModules(TargetPlatform, SDKInfo)) + if (!sdkSupportsBuiltinModules(TargetPlatform, TargetEnvironment, SDKInfo)) CC1Args.push_back("-fbuiltin-headers-in-system-modules"); if (!DriverArgs.hasArgNoClaim(options::OPT_fdefine_target_os_macros, diff --git a/clang/lib/Driver/ToolChains/Gnu.cpp b/clang/lib/Driver/ToolChains/Gnu.cpp index 52c2ee90b1b2..543f3965dfd4 100644 --- a/clang/lib/Driver/ToolChains/Gnu.cpp +++ b/clang/lib/Driver/ToolChains/Gnu.cpp @@ -2463,7 +2463,8 @@ void Generic_GCC::GCCInstallationDetector::AddDefaultGCCPrefixes( // lists should shrink over time. Please don't add more elements to *Triples. static const char *const AArch64LibDirs[] = {"/lib64", "/lib"}; static const char *const AArch64Triples[] = { - "aarch64-none-linux-gnu", "aarch64-redhat-linux", "aarch64-suse-linux"}; + "aarch64-none-linux-gnu", "aarch64-linux-gnu", "aarch64-redhat-linux", + "aarch64-suse-linux"}; static const char *const AArch64beLibDirs[] = {"/lib"}; static const char *const AArch64beTriples[] = {"aarch64_be-none-linux-gnu"}; diff --git a/clang/lib/Driver/Types.cpp b/clang/lib/Driver/Types.cpp index a7b6b9000e1d..2b9b391c19c9 100644 --- a/clang/lib/Driver/Types.cpp +++ b/clang/lib/Driver/Types.cpp @@ -242,7 +242,9 @@ bool types::isCXX(ID Id) { case TY_CXXHUHeader: case TY_PP_CXXHeaderUnit: case TY_ObjCXXHeader: case TY_PP_ObjCXXHeader: - case TY_CXXModule: case TY_PP_CXXModule: + case TY_CXXModule: + case TY_PP_CXXModule: + case TY_ModuleFile: case TY_PP_CLCXX: case TY_CUDA: case TY_PP_CUDA: case TY_CUDA_DEVICE: case TY_HIP: diff --git a/clang/lib/Format/TokenAnnotator.cpp b/clang/lib/Format/TokenAnnotator.cpp index 63c8699fd62d..6b9253613788 100644 --- a/clang/lib/Format/TokenAnnotator.cpp +++ b/clang/lib/Format/TokenAnnotator.cpp @@ -2872,9 +2872,18 @@ private: return false; // Search for unexpected tokens. - for (auto *Prev = BeforeRParen; Prev != LParen; Prev = Prev->Previous) + for (auto *Prev = BeforeRParen; Prev != LParen; Prev = Prev->Previous) { + if (Prev->is(tok::r_paren)) { + Prev = Prev->MatchingParen; + if (!Prev) + return false; + if (Prev->is(TT_FunctionTypeLParen)) + break; + continue; + } if (!Prev->isOneOf(tok::kw_const, tok::identifier, tok::coloncolon)) return false; + } return true; } diff --git a/clang/lib/Format/UnwrappedLineParser.cpp b/clang/lib/Format/UnwrappedLineParser.cpp index d406a531a5c0..688c7c5b1e97 100644 --- a/clang/lib/Format/UnwrappedLineParser.cpp +++ b/clang/lib/Format/UnwrappedLineParser.cpp @@ -507,6 +507,9 @@ void UnwrappedLineParser::calculateBraceTypes(bool ExpectClassBody) { if (!Line->InMacroBody && !Style.isTableGen()) { // Skip PPDirective lines and comments. while (NextTok->is(tok::hash)) { + NextTok = Tokens->getNextToken(); + if (NextTok->is(tok::pp_not_keyword)) + break; do { NextTok = Tokens->getNextToken(); } while (NextTok->NewlinesBefore == 0 && NextTok->isNot(tok::eof)); diff --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp index f6b6c44a4cab..028fdb2cc6b9 100644 --- a/clang/lib/Frontend/CompilerInvocation.cpp +++ b/clang/lib/Frontend/CompilerInvocation.cpp @@ -1504,6 +1504,8 @@ void CompilerInvocation::setDefaultPointerAuthOptions( Opts.CXXMemberFunctionPointers = PointerAuthSchema(Key::ASIA, false, Discrimination::Type); } + Opts.ReturnAddresses = LangOpts.PointerAuthReturns; + Opts.AuthTraps = LangOpts.PointerAuthAuthTraps; Opts.IndirectGotos = LangOpts.PointerAuthIndirectGotos; } @@ -1511,7 +1513,8 @@ static void parsePointerAuthOptions(PointerAuthOptions &Opts, const LangOptions &LangOpts, const llvm::Triple &Triple, DiagnosticsEngine &Diags) { - if (!LangOpts.PointerAuthCalls && !LangOpts.PointerAuthIndirectGotos) + if (!LangOpts.PointerAuthCalls && !LangOpts.PointerAuthReturns && + !LangOpts.PointerAuthAuthTraps && !LangOpts.PointerAuthIndirectGotos) return; CompilerInvocation::setDefaultPointerAuthOptions(Opts, LangOpts, Triple); diff --git a/clang/lib/Frontend/InitPreprocessor.cpp b/clang/lib/Frontend/InitPreprocessor.cpp index 920ddf7e5991..3ed7243deba8 100644 --- a/clang/lib/Frontend/InitPreprocessor.cpp +++ b/clang/lib/Frontend/InitPreprocessor.cpp @@ -763,6 +763,7 @@ static void InitializeCPlusPlusFeatureTestMacros(const LangOptions &LangOpts, Builder.defineMacro("__cpp_placeholder_variables", "202306L"); // C++26 features supported in earlier language modes. + Builder.defineMacro("__cpp_pack_indexing", "202311L"); Builder.defineMacro("__cpp_deleted_function", "202403L"); if (LangOpts.Char8) diff --git a/clang/lib/Headers/ptrauth.h b/clang/lib/Headers/ptrauth.h index 4724155b0dc7..154b599862a8 100644 --- a/clang/lib/Headers/ptrauth.h +++ b/clang/lib/Headers/ptrauth.h @@ -28,6 +28,12 @@ typedef enum { /* A process-specific key which can be used to sign data pointers. */ ptrauth_key_process_dependent_data = ptrauth_key_asdb, + /* The key used to sign return addresses on the stack. + The extra data is based on the storage address of the return address. + On AArch64, that is always the storage address of the return address + 8 + (or, in other words, the value of the stack pointer on function entry) */ + ptrauth_key_return_address = ptrauth_key_process_dependent_code, + /* The key used to sign C function pointers. The extra data is always 0. */ ptrauth_key_function_pointer = ptrauth_key_process_independent_code, diff --git a/clang/lib/Parse/ParsePragma.cpp b/clang/lib/Parse/ParsePragma.cpp index aef4ddb75881..cc6f18b5b319 100644 --- a/clang/lib/Parse/ParsePragma.cpp +++ b/clang/lib/Parse/ParsePragma.cpp @@ -14,7 +14,6 @@ #include "clang/Basic/PragmaKinds.h" #include "clang/Basic/TargetInfo.h" #include "clang/Lex/Preprocessor.h" -#include "clang/Lex/PreprocessorOptions.h" #include "clang/Lex/Token.h" #include "clang/Parse/LoopHint.h" #include "clang/Parse/ParseDiagnostic.h" @@ -412,19 +411,6 @@ private: Sema &Actions; }; -struct PragmaMCFuncHandler : public PragmaHandler { - PragmaMCFuncHandler(bool ReportError) - : PragmaHandler("mc_func"), ReportError(ReportError) {} - void HandlePragma(Preprocessor &PP, PragmaIntroducer Introducer, - Token &Tok) override { - if (ReportError) - PP.Diag(Tok, diag::err_pragma_mc_func_not_supported); - } - -private: - bool ReportError = false; -}; - void markAsReinjectedForRelexing(llvm::MutableArrayRef<clang::Token> Toks) { for (auto &T : Toks) T.setFlag(clang::Token::IsReinjected); @@ -582,12 +568,6 @@ void Parser::initializePragmaHandlers() { RISCVPragmaHandler = std::make_unique<PragmaRISCVHandler>(Actions); PP.AddPragmaHandler("clang", RISCVPragmaHandler.get()); } - - if (getTargetInfo().getTriple().isOSAIX()) { - MCFuncPragmaHandler = std::make_unique<PragmaMCFuncHandler>( - PP.getPreprocessorOpts().ErrorOnPragmaMcfuncOnAIX); - PP.AddPragmaHandler(MCFuncPragmaHandler.get()); - } } void Parser::resetPragmaHandlers() { @@ -722,11 +702,6 @@ void Parser::resetPragmaHandlers() { PP.RemovePragmaHandler("clang", RISCVPragmaHandler.get()); RISCVPragmaHandler.reset(); } - - if (getTargetInfo().getTriple().isOSAIX()) { - PP.RemovePragmaHandler(MCFuncPragmaHandler.get()); - MCFuncPragmaHandler.reset(); - } } /// Handle the annotation token produced for #pragma unused(...) diff --git a/clang/lib/Sema/SemaConcept.cpp b/clang/lib/Sema/SemaConcept.cpp index 9e16b67284be..c34d32002b5a 100644 --- a/clang/lib/Sema/SemaConcept.cpp +++ b/clang/lib/Sema/SemaConcept.cpp @@ -531,6 +531,10 @@ static ExprResult calculateConstraintSatisfaction( std::optional<unsigned> EvaluateFoldExpandedConstraintSize(const CXXFoldExpr *FE) const { + + // We should ignore errors in the presence of packs of different size. + Sema::SFINAETrap Trap(S); + Expr *Pattern = FE->getPattern(); SmallVector<UnexpandedParameterPack, 2> Unexpanded; diff --git a/clang/lib/Sema/SemaCoroutine.cpp b/clang/lib/Sema/SemaCoroutine.cpp index 81334c817b2a..4e180d648cd8 100644 --- a/clang/lib/Sema/SemaCoroutine.cpp +++ b/clang/lib/Sema/SemaCoroutine.cpp @@ -820,7 +820,8 @@ ExprResult Sema::BuildOperatorCoawaitLookupExpr(Scope *S, SourceLocation Loc) { Expr *CoawaitOp = UnresolvedLookupExpr::Create( Context, /*NamingClass*/ nullptr, NestedNameSpecifierLoc(), DeclarationNameInfo(OpName, Loc), /*RequiresADL*/ true, Functions.begin(), - Functions.end(), /*KnownDependent=*/false); + Functions.end(), /*KnownDependent=*/false, + /*KnownInstantiationDependent=*/false); assert(CoawaitOp); return CoawaitOp; } diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp index 01231f8e385e..d608dd92a4b4 100644 --- a/clang/lib/Sema/SemaDecl.cpp +++ b/clang/lib/Sema/SemaDecl.cpp @@ -1219,7 +1219,7 @@ Corrected: return NameClassification::OverloadSet(UnresolvedLookupExpr::Create( Context, Result.getNamingClass(), SS.getWithLocInContext(Context), Result.getLookupNameInfo(), ADL, Result.begin(), Result.end(), - /*KnownDependent=*/false)); + /*KnownDependent=*/false, /*KnownInstantiationDependent=*/false)); } ExprResult @@ -18073,6 +18073,15 @@ void Sema::ActOnTagFinishDefinition(Scope *S, Decl *TagD, if (NumInitMethods > 1 || !Def->hasInitMethod()) Diag(RD->getLocation(), diag::err_sycl_special_type_num_init_method); } + + // If we're defining a dynamic class in a module interface unit, we always + // need to produce the vtable for it, even if the vtable is not used in the + // current TU. + // + // The case where the current class is not dynamic is handled in + // MarkVTableUsed. + if (getCurrentModule() && getCurrentModule()->isInterfaceOrPartition()) + MarkVTableUsed(RD->getLocation(), RD, /*DefinitionRequired=*/true); } // Exit this scope of this tag's definition. diff --git a/clang/lib/Sema/SemaDeclCXX.cpp b/clang/lib/Sema/SemaDeclCXX.cpp index 04b8d88cae21..4e4f91de8cd5 100644 --- a/clang/lib/Sema/SemaDeclCXX.cpp +++ b/clang/lib/Sema/SemaDeclCXX.cpp @@ -1289,7 +1289,7 @@ static bool checkTupleLikeDecomposition(Sema &S, S.Context, nullptr, NestedNameSpecifierLoc(), SourceLocation(), DeclarationNameInfo(GetDN, Loc), /*RequiresADL=*/true, &Args, UnresolvedSetIterator(), UnresolvedSetIterator(), - /*KnownDependent=*/false); + /*KnownDependent=*/false, /*KnownInstantiationDependent=*/false); Expr *Arg = E.get(); E = S.BuildCallExpr(nullptr, Get, Loc, Arg, Loc); @@ -7042,11 +7042,43 @@ void Sema::CheckCompletedCXXClass(Scope *S, CXXRecordDecl *Record) { } } + bool EffectivelyConstexprDestructor = true; + // Avoid triggering vtable instantiation due to a dtor that is not + // "effectively constexpr" for better compatibility. + // See https://github.com/llvm/llvm-project/issues/102293 for more info. + if (isa<CXXDestructorDecl>(M)) { + auto Check = [](QualType T, auto &&Check) -> bool { + const CXXRecordDecl *RD = + T->getBaseElementTypeUnsafe()->getAsCXXRecordDecl(); + if (!RD || !RD->isCompleteDefinition()) + return true; + + if (!RD->hasConstexprDestructor()) + return false; + + QualType CanUnqualT = T.getCanonicalType().getUnqualifiedType(); + for (const CXXBaseSpecifier &B : RD->bases()) + if (B.getType().getCanonicalType().getUnqualifiedType() != + CanUnqualT && + !Check(B.getType(), Check)) + return false; + for (const FieldDecl *FD : RD->fields()) + if (FD->getType().getCanonicalType().getUnqualifiedType() != + CanUnqualT && + !Check(FD->getType(), Check)) + return false; + return true; + }; + EffectivelyConstexprDestructor = + Check(QualType(Record->getTypeForDecl(), 0), Check); + } + // Define defaulted constexpr virtual functions that override a base class // function right away. // FIXME: We can defer doing this until the vtable is marked as used. if (CSM != CXXSpecialMemberKind::Invalid && !M->isDeleted() && - M->isDefaulted() && M->isConstexpr() && M->size_overridden_methods()) + M->isDefaulted() && M->isConstexpr() && M->size_overridden_methods() && + EffectivelyConstexprDestructor) DefineDefaultedFunction(*this, M, M->getLocation()); if (!Incomplete) @@ -18485,11 +18517,15 @@ bool Sema::DefineUsedVTables() { bool DefineVTable = true; - // If this class has a key function, but that key function is - // defined in another translation unit, we don't need to emit the - // vtable even though we're using it. const CXXMethodDecl *KeyFunction = Context.getCurrentKeyFunction(Class); - if (KeyFunction && !KeyFunction->hasBody()) { + // V-tables for non-template classes with an owning module are always + // uniquely emitted in that module. + if (Class->isInCurrentModuleUnit()) { + DefineVTable = true; + } else if (KeyFunction && !KeyFunction->hasBody()) { + // If this class has a key function, but that key function is + // defined in another translation unit, we don't need to emit the + // vtable even though we're using it. // The key function is in another translation unit. DefineVTable = false; TemplateSpecializationKind TSK = @@ -18534,7 +18570,7 @@ bool Sema::DefineUsedVTables() { DefinedAnything = true; MarkVirtualMembersReferenced(Loc, Class); CXXRecordDecl *Canonical = Class->getCanonicalDecl(); - if (VTablesUsed[Canonical]) + if (VTablesUsed[Canonical] && !Class->shouldEmitInExternalSource()) Consumer.HandleVTable(Class); // Warn if we're emitting a weak vtable. The vtable will be weak if there is diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp index 74c0e0170590..edb8b79a2220 100644 --- a/clang/lib/Sema/SemaExpr.cpp +++ b/clang/lib/Sema/SemaExpr.cpp @@ -3188,7 +3188,7 @@ ExprResult Sema::BuildDeclarationNameExpr(const CXXScopeSpec &SS, UnresolvedLookupExpr *ULE = UnresolvedLookupExpr::Create( Context, R.getNamingClass(), SS.getWithLocInContext(Context), R.getLookupNameInfo(), NeedsADL, R.begin(), R.end(), - /*KnownDependent=*/false); + /*KnownDependent=*/false, /*KnownInstantiationDependent=*/false); return ULE; } diff --git a/clang/lib/Sema/SemaExprMember.cpp b/clang/lib/Sema/SemaExprMember.cpp index 2070f3b7bb3a..f1ba26f38520 100644 --- a/clang/lib/Sema/SemaExprMember.cpp +++ b/clang/lib/Sema/SemaExprMember.cpp @@ -331,7 +331,8 @@ ExprResult Sema::BuildPossibleImplicitMemberExpr( return UnresolvedLookupExpr::Create( Context, R.getNamingClass(), SS.getWithLocInContext(Context), TemplateKWLoc, R.getLookupNameInfo(), /*RequiresADL=*/false, - TemplateArgs, R.begin(), R.end(), /*KnownDependent=*/true); + TemplateArgs, R.begin(), R.end(), /*KnownDependent=*/true, + /*KnownInstantiationDependent=*/true); case IMA_Error_StaticOrExplicitContext: case IMA_Error_Unrelated: diff --git a/clang/lib/Sema/SemaInit.cpp b/clang/lib/Sema/SemaInit.cpp index dc2ba039afe7..eea4bdfa68b5 100644 --- a/clang/lib/Sema/SemaInit.cpp +++ b/clang/lib/Sema/SemaInit.cpp @@ -515,8 +515,8 @@ class InitListChecker { uint64_t ElsCount = 1; // Otherwise try to fill whole array with embed data. if (Entity.getKind() == InitializedEntity::EK_ArrayElement) { - ValueDecl *ArrDecl = Entity.getParent()->getDecl(); - auto *AType = SemaRef.Context.getAsArrayType(ArrDecl->getType()); + auto *AType = + SemaRef.Context.getAsArrayType(Entity.getParent()->getType()); assert(AType && "expected array type when initializing array"); ElsCount = Embed->getDataElementCount(); if (const auto *CAType = dyn_cast<ConstantArrayType>(AType)) diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp index 67e3c1d9067f..6cbc075302eb 100644 --- a/clang/lib/Sema/SemaOpenMP.cpp +++ b/clang/lib/Sema/SemaOpenMP.cpp @@ -17968,7 +17968,8 @@ buildDeclareReductionRef(Sema &SemaRef, SourceLocation Loc, SourceRange Range, return UnresolvedLookupExpr::Create( SemaRef.Context, /*NamingClass=*/nullptr, ReductionIdScopeSpec.getWithLocInContext(SemaRef.Context), ReductionId, - /*ADL=*/true, ResSet.begin(), ResSet.end(), /*KnownDependent=*/false); + /*ADL=*/true, ResSet.begin(), ResSet.end(), /*KnownDependent=*/false, + /*KnownInstantiationDependent=*/false); } // Lookup inside the classes. // C++ [over.match.oper]p3: @@ -20834,7 +20835,8 @@ static ExprResult buildUserDefinedMapperRef(Sema &SemaRef, Scope *S, return UnresolvedLookupExpr::Create( SemaRef.Context, /*NamingClass=*/nullptr, MapperIdScopeSpec.getWithLocInContext(SemaRef.Context), MapperId, - /*ADL=*/false, URS.begin(), URS.end(), /*KnownDependent=*/false); + /*ADL=*/false, URS.begin(), URS.end(), /*KnownDependent=*/false, + /*KnownInstantiationDependent=*/false); } SourceLocation Loc = MapperId.getLoc(); // [OpenMP 5.0], 2.19.7.3 declare mapper Directive, Restrictions diff --git a/clang/lib/Sema/SemaOverload.cpp b/clang/lib/Sema/SemaOverload.cpp index 554a2df14bea..28fd3b06156b 100644 --- a/clang/lib/Sema/SemaOverload.cpp +++ b/clang/lib/Sema/SemaOverload.cpp @@ -14083,9 +14083,9 @@ ExprResult Sema::CreateUnresolvedLookupExpr(CXXRecordDecl *NamingClass, DeclarationNameInfo DNI, const UnresolvedSetImpl &Fns, bool PerformADL) { - return UnresolvedLookupExpr::Create(Context, NamingClass, NNSLoc, DNI, - PerformADL, Fns.begin(), Fns.end(), - /*KnownDependent=*/false); + return UnresolvedLookupExpr::Create( + Context, NamingClass, NNSLoc, DNI, PerformADL, Fns.begin(), Fns.end(), + /*KnownDependent=*/false, /*KnownInstantiationDependent=*/false); } ExprResult Sema::BuildCXXMemberCallExpr(Expr *E, NamedDecl *FoundDecl, diff --git a/clang/lib/Sema/SemaTemplate.cpp b/clang/lib/Sema/SemaTemplate.cpp index 87b1f98bbe5a..ca71542d886f 100644 --- a/clang/lib/Sema/SemaTemplate.cpp +++ b/clang/lib/Sema/SemaTemplate.cpp @@ -4436,7 +4436,8 @@ ExprResult Sema::BuildTemplateIdExpr(const CXXScopeSpec &SS, UnresolvedLookupExpr *ULE = UnresolvedLookupExpr::Create( Context, R.getNamingClass(), SS.getWithLocInContext(Context), TemplateKWLoc, R.getLookupNameInfo(), RequiresADL, TemplateArgs, - R.begin(), R.end(), KnownDependent); + R.begin(), R.end(), KnownDependent, + /*KnownInstantiationDependent=*/false); // Model the templates with UnresolvedTemplateTy. The expression should then // either be transformed in an instantiation or be diagnosed in diff --git a/clang/lib/Sema/TreeTransform.h b/clang/lib/Sema/TreeTransform.h index 84e846356e43..51e6a4845bf6 100644 --- a/clang/lib/Sema/TreeTransform.h +++ b/clang/lib/Sema/TreeTransform.h @@ -10541,7 +10541,7 @@ TreeTransform<Derived>::TransformOMPReductionClause(OMPReductionClause *C) { SemaRef.Context, /*NamingClass=*/nullptr, ReductionIdScopeSpec.getWithLocInContext(SemaRef.Context), NameInfo, /*ADL=*/true, Decls.begin(), Decls.end(), - /*KnownDependent=*/false)); + /*KnownDependent=*/false, /*KnownInstantiationDependent=*/false)); } else UnresolvedReductions.push_back(nullptr); } @@ -10588,7 +10588,7 @@ OMPClause *TreeTransform<Derived>::TransformOMPTaskReductionClause( SemaRef.Context, /*NamingClass=*/nullptr, ReductionIdScopeSpec.getWithLocInContext(SemaRef.Context), NameInfo, /*ADL=*/true, Decls.begin(), Decls.end(), - /*KnownDependent=*/false)); + /*KnownDependent=*/false, /*KnownInstantiationDependent=*/false)); } else UnresolvedReductions.push_back(nullptr); } @@ -10634,7 +10634,7 @@ TreeTransform<Derived>::TransformOMPInReductionClause(OMPInReductionClause *C) { SemaRef.Context, /*NamingClass=*/nullptr, ReductionIdScopeSpec.getWithLocInContext(SemaRef.Context), NameInfo, /*ADL=*/true, Decls.begin(), Decls.end(), - /*KnownDependent=*/false)); + /*KnownDependent=*/false, /*KnownInstantiationDependent=*/false)); } else UnresolvedReductions.push_back(nullptr); } @@ -10816,7 +10816,7 @@ bool transformOMPMappableExprListClause( TT.getSema().Context, /*NamingClass=*/nullptr, MapperIdScopeSpec.getWithLocInContext(TT.getSema().Context), MapperIdInfo, /*ADL=*/true, Decls.begin(), Decls.end(), - /*KnownDependent=*/false)); + /*KnownDependent=*/false, /*KnownInstantiationDependent=*/false)); } else { UnresolvedMappers.push_back(nullptr); } diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp index 3cb96df12e4d..29aec144aec1 100644 --- a/clang/lib/Serialization/ASTReader.cpp +++ b/clang/lib/Serialization/ASTReader.cpp @@ -3921,6 +3921,13 @@ llvm::Error ASTReader::ReadASTBlock(ModuleFile &F, } break; + case VTABLES_TO_EMIT: + if (F.Kind == MK_MainFile || + getContext().getLangOpts().BuildingPCHWithObjectFile) + for (unsigned I = 0, N = Record.size(); I != N;) + VTablesToEmit.push_back(ReadDeclID(F, Record, I)); + break; + case IMPORTED_MODULES: if (!F.isModule()) { // If we aren't loading a module (which has its own exports), make @@ -8110,6 +8117,10 @@ void ASTReader::PassInterestingDeclToConsumer(Decl *D) { Consumer->HandleInterestingDecl(DeclGroupRef(D)); } +void ASTReader::PassVTableToConsumer(CXXRecordDecl *RD) { + Consumer->HandleVTable(RD); +} + void ASTReader::StartTranslationUnit(ASTConsumer *Consumer) { this->Consumer = Consumer; diff --git a/clang/lib/Serialization/ASTReaderDecl.cpp b/clang/lib/Serialization/ASTReaderDecl.cpp index 31ab6c651d59..c118f3818467 100644 --- a/clang/lib/Serialization/ASTReaderDecl.cpp +++ b/clang/lib/Serialization/ASTReaderDecl.cpp @@ -3684,6 +3684,54 @@ static void inheritDefaultTemplateArguments(ASTContext &Context, } } +// [basic.link]/p10: +// If two declarations of an entity are attached to different modules, +// the program is ill-formed; +static void checkMultipleDefinitionInNamedModules(ASTReader &Reader, Decl *D, + Decl *Previous) { + Module *M = Previous->getOwningModule(); + + // We only care about the case in named modules. + if (!M || !M->isNamedModule()) + return; + + // If it is previous implcitly introduced, it is not meaningful to + // diagnose it. + if (Previous->isImplicit()) + return; + + // FIXME: Get rid of the enumeration of decl types once we have an appropriate + // abstract for decls of an entity. e.g., the namespace decl and using decl + // doesn't introduce an entity. + if (!isa<VarDecl, FunctionDecl, TagDecl, RedeclarableTemplateDecl>(Previous)) + return; + + // Skip implicit instantiations since it may give false positive diagnostic + // messages. + // FIXME: Maybe this shows the implicit instantiations may have incorrect + // module owner ships. But given we've finished the compilation of a module, + // how can we add new entities to that module? + if (auto *VTSD = dyn_cast<VarTemplateSpecializationDecl>(Previous); + VTSD && !VTSD->isExplicitSpecialization()) + return; + if (auto *CTSD = dyn_cast<ClassTemplateSpecializationDecl>(Previous); + CTSD && !CTSD->isExplicitSpecialization()) + return; + if (auto *Func = dyn_cast<FunctionDecl>(Previous)) + if (auto *FTSI = Func->getTemplateSpecializationInfo(); + FTSI && !FTSI->isExplicitSpecialization()) + return; + + // It is fine if they are in the same module. + if (Reader.getContext().isInSameModule(M, D->getOwningModule())) + return; + + Reader.Diag(Previous->getLocation(), + diag::err_multiple_decl_in_different_modules) + << cast<NamedDecl>(Previous) << M->Name; + Reader.Diag(D->getLocation(), diag::note_also_found); +} + void ASTDeclReader::attachPreviousDecl(ASTReader &Reader, Decl *D, Decl *Previous, Decl *Canon) { assert(D && Previous); @@ -3697,22 +3745,7 @@ void ASTDeclReader::attachPreviousDecl(ASTReader &Reader, Decl *D, #include "clang/AST/DeclNodes.inc" } - // [basic.link]/p10: - // If two declarations of an entity are attached to different modules, - // the program is ill-formed; - // - // FIXME: Get rid of the enumeration of decl types once we have an appropriate - // abstract for decls of an entity. e.g., the namespace decl and using decl - // doesn't introduce an entity. - if (Module *M = Previous->getOwningModule(); - M && M->isNamedModule() && - isa<VarDecl, FunctionDecl, TagDecl, RedeclarableTemplateDecl>(Previous) && - !Reader.getContext().isInSameModule(M, D->getOwningModule())) { - Reader.Diag(Previous->getLocation(), - diag::err_multiple_decl_in_different_modules) - << cast<NamedDecl>(Previous) << M->Name; - Reader.Diag(D->getLocation(), diag::note_also_found); - } + checkMultipleDefinitionInNamedModules(Reader, D, Previous); // If the declaration was visible in one module, a redeclaration of it in // another module remains visible even if it wouldn't be visible by itself. @@ -4209,6 +4242,13 @@ void ASTReader::PassInterestingDeclsToConsumer() { // If we add any new potential interesting decl in the last call, consume it. ConsumingPotentialInterestingDecls(); + + for (GlobalDeclID ID : VTablesToEmit) { + auto *RD = cast<CXXRecordDecl>(GetDecl(ID)); + assert(!RD->shouldEmitInExternalSource()); + PassVTableToConsumer(RD); + } + VTablesToEmit.clear(); } void ASTReader::loadDeclUpdateRecords(PendingUpdateRecord &Record) { diff --git a/clang/lib/Serialization/ASTWriter.cpp b/clang/lib/Serialization/ASTWriter.cpp index c78d8943d6d9..7c0636962459 100644 --- a/clang/lib/Serialization/ASTWriter.cpp +++ b/clang/lib/Serialization/ASTWriter.cpp @@ -927,6 +927,7 @@ void ASTWriter::WriteBlockInfoBlock() { RECORD(DECLS_TO_CHECK_FOR_DEFERRED_DIAGS); RECORD(PP_ASSUME_NONNULL_LOC); RECORD(PP_UNSAFE_BUFFER_USAGE); + RECORD(VTABLES_TO_EMIT); // SourceManager Block. BLOCK(SOURCE_MANAGER_BLOCK); @@ -3961,6 +3962,10 @@ void ASTWriter::WriteIdentifierTable(Preprocessor &PP, Stream.EmitRecord(INTERESTING_IDENTIFIERS, InterestingIdents); } +void ASTWriter::handleVTable(CXXRecordDecl *RD) { + PendingEmittingVTables.push_back(RD); +} + //===----------------------------------------------------------------------===// // DeclContext's Name Lookup Table Serialization //===----------------------------------------------------------------------===// @@ -5163,6 +5168,13 @@ void ASTWriter::PrepareWritingSpecialDecls(Sema &SemaRef) { // Write all of the DeclsToCheckForDeferredDiags. for (auto *D : SemaRef.DeclsToCheckForDeferredDiags) GetDeclRef(D); + + // Write all classes that need to emit the vtable definitions if required. + if (isWritingStdCXXNamedModules()) + for (CXXRecordDecl *RD : PendingEmittingVTables) + GetDeclRef(RD); + else + PendingEmittingVTables.clear(); } void ASTWriter::WriteSpecialDeclRecords(Sema &SemaRef) { @@ -5317,6 +5329,17 @@ void ASTWriter::WriteSpecialDeclRecords(Sema &SemaRef) { } if (!DeleteExprsToAnalyze.empty()) Stream.EmitRecord(DELETE_EXPRS_TO_ANALYZE, DeleteExprsToAnalyze); + + RecordData VTablesToEmit; + for (CXXRecordDecl *RD : PendingEmittingVTables) { + if (!wasDeclEmitted(RD)) + continue; + + AddDeclRef(RD, VTablesToEmit); + } + + if (!VTablesToEmit.empty()) + Stream.EmitRecord(VTABLES_TO_EMIT, VTablesToEmit); } ASTFileSignature ASTWriter::WriteASTCore(Sema &SemaRef, StringRef isysroot, @@ -6559,10 +6582,12 @@ void ASTRecordWriter::AddCXXDefinitionData(const CXXRecordDecl *D) { // computed. Record->push_back(D->getODRHash()); - bool ModulesDebugInfo = - Writer->Context->getLangOpts().ModulesDebugInfo && !D->isDependentType(); - Record->push_back(ModulesDebugInfo); - if (ModulesDebugInfo) + bool ModulesCodegen = + !D->isDependentType() && + (Writer->Context->getLangOpts().ModulesDebugInfo || + D->isInNamedModule()); + Record->push_back(ModulesCodegen); + if (ModulesCodegen) Writer->AddDeclRef(D, Writer->ModularCodegenDecls); // IsLambda bit is already saved. diff --git a/clang/lib/Serialization/ASTWriterDecl.cpp b/clang/lib/Serialization/ASTWriterDecl.cpp index 17c774038571..8a4ca54349e3 100644 --- a/clang/lib/Serialization/ASTWriterDecl.cpp +++ b/clang/lib/Serialization/ASTWriterDecl.cpp @@ -1529,8 +1529,14 @@ void ASTDeclWriter::VisitCXXRecordDecl(CXXRecordDecl *D) { if (D->isThisDeclarationADefinition()) Record.AddCXXDefinitionData(D); + if (D->isCompleteDefinition() && D->isInNamedModule()) + Writer.AddDeclRef(D, Writer.ModularCodegenDecls); + // Store (what we currently believe to be) the key function to avoid // deserializing every method so we can compute it. + // + // FIXME: Avoid adding the key function if the class is defined in + // module purview since in that case the key function is meaningless. if (D->isCompleteDefinition()) Record.AddDeclRef(Context.getCurrentKeyFunction(D)); diff --git a/clang/tools/clang-format/ClangFormat.cpp b/clang/tools/clang-format/ClangFormat.cpp index 6cba1267f3b0..c4b6209a71a8 100644 --- a/clang/tools/clang-format/ClangFormat.cpp +++ b/clang/tools/clang-format/ClangFormat.cpp @@ -210,6 +210,10 @@ static cl::opt<bool> FailOnIncompleteFormat( cl::desc("If set, fail with exit code 1 on incomplete format."), cl::init(false), cl::cat(ClangFormatCategory)); +static cl::opt<bool> ListIgnored("list-ignored", + cl::desc("List ignored files."), + cl::cat(ClangFormatCategory), cl::Hidden); + namespace clang { namespace format { @@ -715,7 +719,13 @@ int main(int argc, const char **argv) { unsigned FileNo = 1; bool Error = false; for (const auto &FileName : FileNames) { - if (isIgnored(FileName)) + const bool Ignored = isIgnored(FileName); + if (ListIgnored) { + if (Ignored) + outs() << FileName << '\n'; + continue; + } + if (Ignored) continue; if (Verbose) { errs() << "Formatting [" << FileNo++ << "/" << FileNames.size() << "] " diff --git a/compiler-rt/lib/builtins/aarch64/sme-libc-mem-routines.S b/compiler-rt/lib/builtins/aarch64/sme-libc-mem-routines.S index 926ad3b1b633..0318d9a6f1eb 100644 --- a/compiler-rt/lib/builtins/aarch64/sme-libc-mem-routines.S +++ b/compiler-rt/lib/builtins/aarch64/sme-libc-mem-routines.S @@ -252,7 +252,15 @@ DEFINE_COMPILERRT_FUNCTION_ALIAS(__arm_sc_memmove, __arm_sc_memcpy) #define zva_val x5 DEFINE_COMPILERRT_OUTLINE_FUNCTION_UNMANGLED(__arm_sc_memset) - dup v0.16B, valw +#ifdef __ARM_FEATURE_SVE + mov z0.b, valw +#else + bfi valw, valw, #8, #8 + bfi valw, valw, #16, #16 + bfi val, val, #32, #32 + fmov d0, val + fmov v0.d[1], val +#endif add dstend2, dstin, count cmp count, 96 diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_linux.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_linux.cpp index 1d6a55bdb7f3..8d375ffcd079 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_linux.cpp +++ b/compiler-rt/lib/sanitizer_common/sanitizer_linux.cpp @@ -33,11 +33,15 @@ // For mips64, syscall(__NR_stat) fills the buffer in the 'struct kernel_stat' // format. Struct kernel_stat is defined as 'struct stat' in asm/stat.h. To // access stat from asm/stat.h, without conflicting with definition in -// sys/stat.h, we use this trick. -# if SANITIZER_MIPS64 +// sys/stat.h, we use this trick. sparc64 is similar, using +// syscall(__NR_stat64) and struct kernel_stat64. +# if SANITIZER_MIPS64 || SANITIZER_SPARC64 # include <asm/unistd.h> # include <sys/types.h> # define stat kernel_stat +# if SANITIZER_SPARC64 +# define stat64 kernel_stat64 +# endif # if SANITIZER_GO # undef st_atime # undef st_mtime @@ -48,6 +52,7 @@ # endif # include <asm/stat.h> # undef stat +# undef stat64 # endif # include <dlfcn.h> @@ -285,8 +290,7 @@ uptr internal_ftruncate(fd_t fd, uptr size) { return res; } -# if (!SANITIZER_LINUX_USES_64BIT_SYSCALLS || SANITIZER_SPARC) && \ - SANITIZER_LINUX +# if !SANITIZER_LINUX_USES_64BIT_SYSCALLS && SANITIZER_LINUX static void stat64_to_stat(struct stat64 *in, struct stat *out) { internal_memset(out, 0, sizeof(*out)); out->st_dev = in->st_dev; @@ -327,7 +331,12 @@ static void statx_to_stat(struct statx *in, struct stat *out) { } # endif -# if SANITIZER_MIPS64 +# if SANITIZER_MIPS64 || SANITIZER_SPARC64 +# if SANITIZER_MIPS64 +typedef struct kernel_stat kstat_t; +# else +typedef struct kernel_stat64 kstat_t; +# endif // Undefine compatibility macros from <sys/stat.h> // so that they would not clash with the kernel_stat // st_[a|m|c]time fields @@ -345,7 +354,7 @@ static void statx_to_stat(struct statx *in, struct stat *out) { # undef st_mtime_nsec # undef st_ctime_nsec # endif -static void kernel_stat_to_stat(struct kernel_stat *in, struct stat *out) { +static void kernel_stat_to_stat(kstat_t *in, struct stat *out) { internal_memset(out, 0, sizeof(*out)); out->st_dev = in->st_dev; out->st_ino = in->st_ino; @@ -391,6 +400,12 @@ uptr internal_stat(const char *path, void *buf) { !SANITIZER_SPARC return internal_syscall(SYSCALL(newfstatat), AT_FDCWD, (uptr)path, (uptr)buf, 0); +# elif SANITIZER_SPARC64 + kstat_t buf64; + int res = internal_syscall(SYSCALL(fstatat64), AT_FDCWD, (uptr)path, + (uptr)&buf64, 0); + kernel_stat_to_stat(&buf64, (struct stat *)buf); + return res; # else struct stat64 buf64; int res = internal_syscall(SYSCALL(fstatat64), AT_FDCWD, (uptr)path, @@ -423,6 +438,12 @@ uptr internal_lstat(const char *path, void *buf) { !SANITIZER_SPARC return internal_syscall(SYSCALL(newfstatat), AT_FDCWD, (uptr)path, (uptr)buf, AT_SYMLINK_NOFOLLOW); +# elif SANITIZER_SPARC64 + kstat_t buf64; + int res = internal_syscall(SYSCALL(fstatat64), AT_FDCWD, (uptr)path, + (uptr)&buf64, AT_SYMLINK_NOFOLLOW); + kernel_stat_to_stat(&buf64, (struct stat *)buf); + return res; # else struct stat64 buf64; int res = internal_syscall(SYSCALL(fstatat64), AT_FDCWD, (uptr)path, @@ -442,10 +463,16 @@ uptr internal_fstat(fd_t fd, void *buf) { # if SANITIZER_FREEBSD || SANITIZER_LINUX_USES_64BIT_SYSCALLS # if SANITIZER_MIPS64 // For mips64, fstat syscall fills buffer in the format of kernel_stat - struct kernel_stat kbuf; + kstat_t kbuf; int res = internal_syscall(SYSCALL(fstat), fd, &kbuf); kernel_stat_to_stat(&kbuf, (struct stat *)buf); return res; +# elif SANITIZER_LINUX && SANITIZER_SPARC64 + // For sparc64, fstat64 syscall fills buffer in the format of kernel_stat64 + kstat_t kbuf; + int res = internal_syscall(SYSCALL(fstat64), fd, &kbuf); + kernel_stat_to_stat(&kbuf, (struct stat *)buf); + return res; # elif SANITIZER_LINUX && defined(__loongarch__) struct statx bufx; int res = internal_syscall(SYSCALL(statx), fd, "", AT_EMPTY_PATH, @@ -826,10 +853,16 @@ uptr internal_sigaltstack(const void *ss, void *oss) { return internal_syscall(SYSCALL(sigaltstack), (uptr)ss, (uptr)oss); } +extern "C" pid_t __fork(void); + int internal_fork() { # if SANITIZER_LINUX # if SANITIZER_S390 return internal_syscall(SYSCALL(clone), 0, SIGCHLD); +# elif SANITIZER_SPARC + // The clone syscall interface on SPARC differs massively from the rest, + // so fall back to __fork. + return __fork(); # else return internal_syscall(SYSCALL(clone), SIGCHLD, 0); # endif diff --git a/libcxx/include/__bit/rotate.h b/libcxx/include/__bit/rotate.h index d848056c3350..90e430e9d042 100644 --- a/libcxx/include/__bit/rotate.h +++ b/libcxx/include/__bit/rotate.h @@ -20,24 +20,37 @@ _LIBCPP_BEGIN_NAMESPACE_STD +// Writing two full functions for rotl and rotr makes it easier for the compiler +// to optimize the code. On x86 this function becomes the ROL instruction and +// the rotr function becomes the ROR instruction. template <class _Tp> -_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 _Tp __rotr(_Tp __t, int __cnt) _NOEXCEPT { - static_assert(__libcpp_is_unsigned_integer<_Tp>::value, "__rotr requires an unsigned integer type"); - const unsigned int __dig = numeric_limits<_Tp>::digits; - if ((__cnt % __dig) == 0) - return __t; +_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 _Tp __rotl(_Tp __x, int __s) _NOEXCEPT { + static_assert(__libcpp_is_unsigned_integer<_Tp>::value, "__rotl requires an unsigned integer type"); + const int __N = numeric_limits<_Tp>::digits; + int __r = __s % __N; + + if (__r == 0) + return __x; - if (__cnt < 0) { - __cnt *= -1; - return (__t << (__cnt % __dig)) | (__t >> (__dig - (__cnt % __dig))); // rotr with negative __cnt is similar to rotl - } + if (__r > 0) + return (__x << __r) | (__x >> (__N - __r)); - return (__t >> (__cnt % __dig)) | (__t << (__dig - (__cnt % __dig))); + return (__x >> -__r) | (__x << (__N + __r)); } template <class _Tp> -_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 _Tp __rotl(_Tp __t, int __cnt) _NOEXCEPT { - return std::__rotr(__t, -__cnt); +_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 _Tp __rotr(_Tp __x, int __s) _NOEXCEPT { + static_assert(__libcpp_is_unsigned_integer<_Tp>::value, "__rotr requires an unsigned integer type"); + const int __N = numeric_limits<_Tp>::digits; + int __r = __s % __N; + + if (__r == 0) + return __x; + + if (__r > 0) + return (__x >> __r) | (__x << (__N - __r)); + + return (__x << -__r) | (__x >> (__N + __r)); } #if _LIBCPP_STD_VER >= 20 diff --git a/libcxx/include/__math/hypot.h b/libcxx/include/__math/hypot.h index 61fd260c5940..b99216371101 100644 --- a/libcxx/include/__math/hypot.h +++ b/libcxx/include/__math/hypot.h @@ -9,19 +9,17 @@ #ifndef _LIBCPP___MATH_HYPOT_H #define _LIBCPP___MATH_HYPOT_H +#include <__algorithm/max.h> #include <__config> +#include <__math/abs.h> +#include <__math/exponential_functions.h> +#include <__math/roots.h> #include <__type_traits/enable_if.h> #include <__type_traits/is_arithmetic.h> #include <__type_traits/is_same.h> #include <__type_traits/promote.h> - -#if _LIBCPP_STD_VER >= 17 -# include <__algorithm/max.h> -# include <__math/abs.h> -# include <__math/roots.h> -# include <__utility/pair.h> -# include <limits> -#endif +#include <__utility/pair.h> +#include <limits> #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) # pragma GCC system_header @@ -53,58 +51,32 @@ inline _LIBCPP_HIDE_FROM_ABI typename __promote<_A1, _A2>::type hypot(_A1 __x, _ } #if _LIBCPP_STD_VER >= 17 -// Factors needed to determine if over-/underflow might happen for `std::hypot(x,y,z)`. -// returns [overflow_threshold, overflow_scale] -template <class _Real> -_LIBCPP_HIDE_FROM_ABI std::pair<_Real, _Real> __hypot_factors() { - static_assert(std::numeric_limits<_Real>::is_iec559); - - if constexpr (std::is_same_v<_Real, float>) { - static_assert(-125 == std::numeric_limits<_Real>::min_exponent); - static_assert(+128 == std::numeric_limits<_Real>::max_exponent); - return {0x1.0p+62f, 0x1.0p-70f}; - } else if constexpr (std::is_same_v<_Real, double>) { - static_assert(-1021 == std::numeric_limits<_Real>::min_exponent); - static_assert(+1024 == std::numeric_limits<_Real>::max_exponent); - return {0x1.0p+510, 0x1.0p-600}; - } else { // long double - static_assert(std::is_same_v<_Real, long double>); - - // preprocessor guard necessary, otherwise literals (e.g. `0x1.0p+8'190l`) throw warnings even when shielded by `if - // constexpr` -# if __DBL_MAX_EXP__ == __LDBL_MAX_EXP__ - static_assert(sizeof(_Real) == sizeof(double)); - return static_cast<std::pair<_Real, _Real>>(__math::__hypot_factors<double>()); -# else - static_assert(sizeof(_Real) > sizeof(double)); - static_assert(-16381 == std::numeric_limits<_Real>::min_exponent); - static_assert(+16384 == std::numeric_limits<_Real>::max_exponent); - return {0x1.0p+8190l, 0x1.0p-9000l}; -# endif - } -} - // Computes the three-dimensional hypotenuse: `std::hypot(x,y,z)`. // The naive implementation might over-/underflow which is why this implementation is more involved: // If the square of an argument might run into issues, we scale the arguments appropriately. // See https://github.com/llvm/llvm-project/issues/92782 for a detailed discussion and summary. template <class _Real> _LIBCPP_HIDE_FROM_ABI _Real __hypot(_Real __x, _Real __y, _Real __z) { + // Factors needed to determine if over-/underflow might happen + constexpr int __exp = std::numeric_limits<_Real>::max_exponent / 2; + const _Real __overflow_threshold = __math::ldexp(_Real(1), __exp); + const _Real __overflow_scale = __math::ldexp(_Real(1), -(__exp + 20)); + + // Scale arguments depending on their size const _Real __max_abs = std::max(__math::fabs(__x), std::max(__math::fabs(__y), __math::fabs(__z))); - const auto [__overflow_threshold, __overflow_scale] = __math::__hypot_factors<_Real>(); _Real __scale; if (__max_abs > __overflow_threshold) { // x*x + y*y + z*z might overflow __scale = __overflow_scale; - __x *= __scale; - __y *= __scale; - __z *= __scale; } else if (__max_abs < 1 / __overflow_threshold) { // x*x + y*y + z*z might underflow __scale = 1 / __overflow_scale; - __x *= __scale; - __y *= __scale; - __z *= __scale; - } else + } else { __scale = 1; + } + __x *= __scale; + __y *= __scale; + __z *= __scale; + + // Compute hypot of scaled arguments and undo scaling return __math::sqrt(__x * __x + __y * __y + __z * __z) / __scale; } diff --git a/libcxx/include/__memory/inout_ptr.h b/libcxx/include/__memory/inout_ptr.h index 72e1a21ad686..e5f3ac5d027e 100644 --- a/libcxx/include/__memory/inout_ptr.h +++ b/libcxx/include/__memory/inout_ptr.h @@ -63,17 +63,17 @@ public: } } - using _SP = __pointer_of_or_t<_Smart, _Pointer>; + using _SmartPtr = __pointer_of_or_t<_Smart, _Pointer>; if constexpr (is_pointer_v<_Smart>) { - std::apply([&](auto&&... __args) { __s_ = _Smart(static_cast<_SP>(__p_), std::forward<_Args>(__args)...); }, + std::apply([&](auto&&... __args) { __s_ = _Smart(static_cast<_SmartPtr>(__p_), std::forward<_Args>(__args)...); }, std::move(__a_)); } else if constexpr (__resettable_smart_pointer_with_args<_Smart, _Pointer, _Args...>) { - std::apply([&](auto&&... __args) { __s_.reset(static_cast<_SP>(__p_), std::forward<_Args>(__args)...); }, + std::apply([&](auto&&... __args) { __s_.reset(static_cast<_SmartPtr>(__p_), std::forward<_Args>(__args)...); }, std::move(__a_)); } else { - static_assert(is_constructible_v<_Smart, _SP, _Args...>, + static_assert(is_constructible_v<_Smart, _SmartPtr, _Args...>, "The smart pointer must be constructible from arguments of types _Smart, _Pointer, _Args..."); - std::apply([&](auto&&... __args) { __s_ = _Smart(static_cast<_SP>(__p_), std::forward<_Args>(__args)...); }, + std::apply([&](auto&&... __args) { __s_ = _Smart(static_cast<_SmartPtr>(__p_), std::forward<_Args>(__args)...); }, std::move(__a_)); } } diff --git a/libcxx/include/__memory/out_ptr.h b/libcxx/include/__memory/out_ptr.h index 95aa2029c923..fd99110790cc 100644 --- a/libcxx/include/__memory/out_ptr.h +++ b/libcxx/include/__memory/out_ptr.h @@ -58,14 +58,14 @@ public: return; } - using _SP = __pointer_of_or_t<_Smart, _Pointer>; + using _SmartPtr = __pointer_of_or_t<_Smart, _Pointer>; if constexpr (__resettable_smart_pointer_with_args<_Smart, _Pointer, _Args...>) { - std::apply([&](auto&&... __args) { __s_.reset(static_cast<_SP>(__p_), std::forward<_Args>(__args)...); }, + std::apply([&](auto&&... __args) { __s_.reset(static_cast<_SmartPtr>(__p_), std::forward<_Args>(__args)...); }, std::move(__a_)); } else { - static_assert(is_constructible_v<_Smart, _SP, _Args...>, + static_assert(is_constructible_v<_Smart, _SmartPtr, _Args...>, "The smart pointer must be constructible from arguments of types _Smart, _Pointer, _Args..."); - std::apply([&](auto&&... __args) { __s_ = _Smart(static_cast<_SP>(__p_), std::forward<_Args>(__args)...); }, + std::apply([&](auto&&... __args) { __s_ = _Smart(static_cast<_SmartPtr>(__p_), std::forward<_Args>(__args)...); }, std::move(__a_)); } } diff --git a/libcxx/include/complex b/libcxx/include/complex index 22271acaf735..e6534025de57 100644 --- a/libcxx/include/complex +++ b/libcxx/include/complex @@ -421,7 +421,8 @@ public: _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR complex(float __re = 0.0f, float __im = 0.0f) : __re_(__re), __im_(__im) {} - _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR complex(__from_builtin_tag, _Complex float __v) + template <class _Tag, __enable_if_t<_IsSame<_Tag, __from_builtin_tag>::value, int> = 0> + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR explicit complex(_Tag, _Complex float __v) : __re_(__real__ __v), __im_(__imag__ __v) {} _LIBCPP_HIDE_FROM_ABI explicit _LIBCPP_CONSTEXPR complex(const complex<double>& __c); @@ -517,7 +518,8 @@ public: _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR complex(double __re = 0.0, double __im = 0.0) : __re_(__re), __im_(__im) {} - _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR complex(__from_builtin_tag, _Complex double __v) + template <class _Tag, __enable_if_t<_IsSame<_Tag, __from_builtin_tag>::value, int> = 0> + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR explicit complex(_Tag, _Complex double __v) : __re_(__real__ __v), __im_(__imag__ __v) {} _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR complex(const complex<float>& __c); @@ -617,7 +619,8 @@ public: _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR complex(long double __re = 0.0L, long double __im = 0.0L) : __re_(__re), __im_(__im) {} - _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR complex(__from_builtin_tag, _Complex long double __v) + template <class _Tag, __enable_if_t<_IsSame<_Tag, __from_builtin_tag>::value, int> = 0> + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR explicit complex(_Tag, _Complex long double __v) : __re_(__real__ __v), __im_(__imag__ __v) {} _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR complex(const complex<float>& __c); diff --git a/libcxx/include/optional b/libcxx/include/optional index f9cbcbfa595d..41d7515a2b68 100644 --- a/libcxx/include/optional +++ b/libcxx/include/optional @@ -301,7 +301,7 @@ struct __optional_destruct_base<_Tp, false> { # if _LIBCPP_STD_VER >= 23 template <class _Fp, class... _Args> - _LIBCPP_HIDE_FROM_ABI constexpr __optional_destruct_base( + _LIBCPP_HIDE_FROM_ABI constexpr explicit __optional_destruct_base( __optional_construct_from_invoke_tag, _Fp&& __f, _Args&&... __args) : __val_(std::invoke(std::forward<_Fp>(__f), std::forward<_Args>(__args)...)), __engaged_(true) {} # endif @@ -707,8 +707,11 @@ public: } # if _LIBCPP_STD_VER >= 23 - template <class _Fp, class... _Args> - _LIBCPP_HIDE_FROM_ABI constexpr explicit optional(__optional_construct_from_invoke_tag, _Fp&& __f, _Args&&... __args) + template <class _Tag, + class _Fp, + class... _Args, + __enable_if_t<_IsSame<_Tag, __optional_construct_from_invoke_tag>::value, int> = 0> + _LIBCPP_HIDE_FROM_ABI constexpr explicit optional(_Tag, _Fp&& __f, _Args&&... __args) : __base(__optional_construct_from_invoke_tag{}, std::forward<_Fp>(__f), std::forward<_Args>(__args)...) {} # endif diff --git a/libcxx/include/span b/libcxx/include/span index 60d76d830f0f..da631cdc3f90 100644 --- a/libcxx/include/span +++ b/libcxx/include/span @@ -206,10 +206,10 @@ struct __is_std_span<span<_Tp, _Sz>> : true_type {}; template <class _Range, class _ElementType> concept __span_compatible_range = + !__is_std_span<remove_cvref_t<_Range>>::value && // ranges::contiguous_range<_Range> && // ranges::sized_range<_Range> && // (ranges::borrowed_range<_Range> || is_const_v<_ElementType>) && // - !__is_std_span<remove_cvref_t<_Range>>::value && // !__is_std_array<remove_cvref_t<_Range>>::value && // !is_array_v<remove_cvref_t<_Range>> && // is_convertible_v<remove_reference_t<ranges::range_reference_t<_Range>> (*)[], _ElementType (*)[]>; diff --git a/libunwind/src/Registers.hpp b/libunwind/src/Registers.hpp index d11ddb3426d5..861e6b5f6f2c 100644 --- a/libunwind/src/Registers.hpp +++ b/libunwind/src/Registers.hpp @@ -1815,6 +1815,13 @@ inline const char *Registers_ppc64::getRegisterName(int regNum) { /// process. class _LIBUNWIND_HIDDEN Registers_arm64; extern "C" void __libunwind_Registers_arm64_jumpto(Registers_arm64 *); + +#if defined(_LIBUNWIND_USE_GCS) +extern "C" void *__libunwind_cet_get_jump_target() { + return reinterpret_cast<void *>(&__libunwind_Registers_arm64_jumpto); +} +#endif + class _LIBUNWIND_HIDDEN Registers_arm64 { public: Registers_arm64(); diff --git a/libunwind/src/UnwindCursor.hpp b/libunwind/src/UnwindCursor.hpp index 758557337899..06e654197351 100644 --- a/libunwind/src/UnwindCursor.hpp +++ b/libunwind/src/UnwindCursor.hpp @@ -471,7 +471,7 @@ public: } #endif -#if defined(_LIBUNWIND_USE_CET) +#if defined(_LIBUNWIND_USE_CET) || defined(_LIBUNWIND_USE_GCS) virtual void *get_registers() { _LIBUNWIND_ABORT("get_registers not implemented"); } @@ -954,7 +954,7 @@ public: virtual uintptr_t getDataRelBase(); #endif -#if defined(_LIBUNWIND_USE_CET) +#if defined(_LIBUNWIND_USE_CET) || defined(_LIBUNWIND_USE_GCS) virtual void *get_registers() { return &_registers; } #endif @@ -3005,7 +3005,7 @@ bool UnwindCursor<A, R>::isReadableAddr(const pint_t addr) const { } #endif -#if defined(_LIBUNWIND_USE_CET) +#if defined(_LIBUNWIND_USE_CET) || defined(_LIBUNWIND_USE_GCS) extern "C" void *__libunwind_cet_get_registers(unw_cursor_t *cursor) { AbstractUnwindCursor *co = (AbstractUnwindCursor *)cursor; return co->get_registers(); diff --git a/libunwind/src/UnwindLevel1.c b/libunwind/src/UnwindLevel1.c index 48e7bc3b9e00..7e785f4d31e7 100644 --- a/libunwind/src/UnwindLevel1.c +++ b/libunwind/src/UnwindLevel1.c @@ -44,7 +44,7 @@ // _LIBUNWIND_POP_CET_SSP is used to adjust CET shadow stack pointer and we // directly jump to __libunwind_Registers_x86/x86_64_jumpto instead of using // a regular function call to avoid pushing to CET shadow stack again. -#if !defined(_LIBUNWIND_USE_CET) +#if !defined(_LIBUNWIND_USE_CET) && !defined(_LIBUNWIND_USE_GCS) #define __unw_phase2_resume(cursor, fn) \ do { \ (void)fn; \ @@ -72,6 +72,19 @@ __asm__ volatile("jmpq *%%rdx\n\t" :: "D"(cetRegContext), \ "d"(cetJumpAddress)); \ } while (0) +#elif defined(_LIBUNWIND_TARGET_AARCH64) +#define __cet_ss_step_size 8 +#define __unw_phase2_resume(cursor, fn) \ + do { \ + _LIBUNWIND_POP_CET_SSP((fn)); \ + void *cetRegContext = __libunwind_cet_get_registers((cursor)); \ + void *cetJumpAddress = __libunwind_cet_get_jump_target(); \ + __asm__ volatile("mov x0, %0\n\t" \ + "br %1\n\t" \ + : \ + : "r"(cetRegContext), "r"(cetJumpAddress) \ + : "x0"); \ + } while (0) #endif static _Unwind_Reason_Code @@ -170,6 +183,10 @@ unwind_phase1(unw_context_t *uc, unw_cursor_t *cursor, _Unwind_Exception *except } extern int __unw_step_stage2(unw_cursor_t *); +#if defined(_LIBUNWIND_USE_GCS) +// Enable the GCS target feature to permit gcspop instructions to be used. +__attribute__((target("gcs"))) +#endif static _Unwind_Reason_Code unwind_phase2(unw_context_t *uc, unw_cursor_t *cursor, _Unwind_Exception *exception_object) { __unw_init_local(cursor, uc); @@ -180,8 +197,12 @@ unwind_phase2(unw_context_t *uc, unw_cursor_t *cursor, _Unwind_Exception *except // uc is initialized by __unw_getcontext in the parent frame. The first stack // frame walked is unwind_phase2. unsigned framesWalked = 1; -#ifdef _LIBUNWIND_USE_CET +#if defined(_LIBUNWIND_USE_CET) unsigned long shadowStackTop = _get_ssp(); +#elif defined(_LIBUNWIND_USE_GCS) + unsigned long shadowStackTop = 0; + if (__chkfeat(_CHKFEAT_GCS)) + shadowStackTop = (unsigned long)__gcspr(); #endif // Walk each frame until we reach where search phase said to stop. while (true) { @@ -238,7 +259,7 @@ unwind_phase2(unw_context_t *uc, unw_cursor_t *cursor, _Unwind_Exception *except // against return address stored in CET shadow stack, if the 2 addresses don't // match, it means return address in normal stack has been corrupted, we return // _URC_FATAL_PHASE2_ERROR. -#ifdef _LIBUNWIND_USE_CET +#if defined(_LIBUNWIND_USE_CET) || defined(_LIBUNWIND_USE_GCS) if (shadowStackTop != 0) { unw_word_t retInNormalStack; __unw_get_reg(cursor, UNW_REG_IP, &retInNormalStack); @@ -306,6 +327,10 @@ unwind_phase2(unw_context_t *uc, unw_cursor_t *cursor, _Unwind_Exception *except return _URC_FATAL_PHASE2_ERROR; } +#if defined(_LIBUNWIND_USE_GCS) +// Enable the GCS target feature to permit gcspop instructions to be used. +__attribute__((target("gcs"))) +#endif static _Unwind_Reason_Code unwind_phase2_forced(unw_context_t *uc, unw_cursor_t *cursor, _Unwind_Exception *exception_object, diff --git a/libunwind/src/UnwindRegistersRestore.S b/libunwind/src/UnwindRegistersRestore.S index 67d9e0571189..9d34c7909ed3 100644 --- a/libunwind/src/UnwindRegistersRestore.S +++ b/libunwind/src/UnwindRegistersRestore.S @@ -629,6 +629,10 @@ Lnovec: #elif defined(__aarch64__) +#if defined(__ARM_FEATURE_GCS_DEFAULT) +.arch_extension gcs +#endif + // // extern "C" void __libunwind_Registers_arm64_jumpto(Registers_arm64 *); // @@ -680,6 +684,16 @@ DEFINE_LIBUNWIND_FUNCTION(__libunwind_Registers_arm64_jumpto) ldr x16, [x0, #0x0F8] ldp x0, x1, [x0, #0x000] // restore x0,x1 mov sp,x16 // restore sp +#if defined(__ARM_FEATURE_GCS_DEFAULT) + // If GCS is enabled we need to push the address we're returning to onto the + // GCS stack. We can't just return using br, as there won't be a BTI landing + // pad instruction at the destination. + mov x16, #1 + chkfeat x16 + cbnz x16, Lnogcs + gcspushm x30 +Lnogcs: +#endif ret x30 // jump to pc #elif defined(__arm__) && !defined(__APPLE__) diff --git a/libunwind/src/assembly.h b/libunwind/src/assembly.h index fb07d04071af..f8e83e138eff 100644 --- a/libunwind/src/assembly.h +++ b/libunwind/src/assembly.h @@ -82,7 +82,22 @@ #define PPC64_OPD2 #endif -#if defined(__aarch64__) && defined(__ARM_FEATURE_BTI_DEFAULT) +#if defined(__aarch64__) +#if defined(__ARM_FEATURE_GCS_DEFAULT) && defined(__ARM_FEATURE_BTI_DEFAULT) +// Set BTI, PAC, and GCS gnu property bits +#define GNU_PROPERTY 7 +// We indirectly branch to __libunwind_Registers_arm64_jumpto from +// __unw_phase2_resume, so we need to use bti jc. +#define AARCH64_BTI bti jc +#elif defined(__ARM_FEATURE_GCS_DEFAULT) +// Set GCS gnu property bit +#define GNU_PROPERTY 4 +#elif defined(__ARM_FEATURE_BTI_DEFAULT) +// Set BTI and PAC gnu property bits +#define GNU_PROPERTY 3 +#define AARCH64_BTI bti c +#endif +#ifdef GNU_PROPERTY .pushsection ".note.gnu.property", "a" SEPARATOR \ .balign 8 SEPARATOR \ .long 4 SEPARATOR \ @@ -91,12 +106,12 @@ .asciz "GNU" SEPARATOR \ .long 0xc0000000 SEPARATOR /* GNU_PROPERTY_AARCH64_FEATURE_1_AND */ \ .long 4 SEPARATOR \ - .long 3 SEPARATOR /* GNU_PROPERTY_AARCH64_FEATURE_1_BTI AND */ \ - /* GNU_PROPERTY_AARCH64_FEATURE_1_PAC */ \ + .long GNU_PROPERTY SEPARATOR \ .long 0 SEPARATOR \ .popsection SEPARATOR -#define AARCH64_BTI bti c -#else +#endif +#endif +#if !defined(AARCH64_BTI) #define AARCH64_BTI #endif diff --git a/libunwind/src/cet_unwind.h b/libunwind/src/cet_unwind.h index c364ed3e12fe..47d7616a7322 100644 --- a/libunwind/src/cet_unwind.h +++ b/libunwind/src/cet_unwind.h @@ -35,6 +35,28 @@ } while (0) #endif +// On AArch64 we use _LIBUNWIND_USE_GCS to indicate that GCS is supported. We +// need to guard any use of GCS instructions with __chkfeat though, as GCS may +// not be enabled. +#if defined(_LIBUNWIND_TARGET_AARCH64) && defined(__ARM_FEATURE_GCS_DEFAULT) +#include <arm_acle.h> + +// We can only use GCS if arm_acle.h defines the GCS intrinsics. +#ifdef _CHKFEAT_GCS +#define _LIBUNWIND_USE_GCS 1 +#endif + +#define _LIBUNWIND_POP_CET_SSP(x) \ + do { \ + if (__chkfeat(_CHKFEAT_GCS)) { \ + unsigned tmp = (x); \ + while (tmp--) \ + __gcspopm(); \ + } \ + } while (0) + +#endif + extern void *__libunwind_cet_get_registers(unw_cursor_t *); extern void *__libunwind_cet_get_jump_target(void); diff --git a/lld/ELF/Arch/ARM.cpp b/lld/ELF/Arch/ARM.cpp index 3e0efe540e1b..07a7535c4a23 100644 --- a/lld/ELF/Arch/ARM.cpp +++ b/lld/ELF/Arch/ARM.cpp @@ -228,10 +228,16 @@ static void writePltHeaderLong(uint8_t *buf) { write32(buf + 16, gotPlt - l1 - 8); } +// True if we should use Thumb PLTs, which currently require Thumb2, and are +// only used if the target does not have the ARM ISA. +static bool useThumbPLTs() { + return config->armHasThumb2ISA && !config->armHasArmISA; +} + // The default PLT header requires the .got.plt to be within 128 Mb of the // .plt in the positive direction. void ARM::writePltHeader(uint8_t *buf) const { - if (config->armThumbPLTs) { + if (useThumbPLTs()) { // The instruction sequence for thumb: // // 0: b500 push {lr} @@ -289,7 +295,7 @@ void ARM::writePltHeader(uint8_t *buf) const { } void ARM::addPltHeaderSymbols(InputSection &isec) const { - if (config->armThumbPLTs) { + if (useThumbPLTs()) { addSyntheticLocal("$t", STT_NOTYPE, 0, 0, isec); addSyntheticLocal("$d", STT_NOTYPE, 12, 0, isec); } else { @@ -315,7 +321,7 @@ static void writePltLong(uint8_t *buf, uint64_t gotPltEntryAddr, void ARM::writePlt(uint8_t *buf, const Symbol &sym, uint64_t pltEntryAddr) const { - if (!config->armThumbPLTs) { + if (!useThumbPLTs()) { uint64_t offset = sym.getGotPltVA() - pltEntryAddr - 8; // The PLT entry is similar to the example given in Appendix A of ELF for @@ -367,7 +373,7 @@ void ARM::writePlt(uint8_t *buf, const Symbol &sym, } void ARM::addPltSymbols(InputSection &isec, uint64_t off) const { - if (config->armThumbPLTs) { + if (useThumbPLTs()) { addSyntheticLocal("$t", STT_NOTYPE, off, 0, isec); } else { addSyntheticLocal("$a", STT_NOTYPE, off, 0, isec); @@ -393,7 +399,7 @@ bool ARM::needsThunk(RelExpr expr, RelType type, const InputFile *file, case R_ARM_JUMP24: // Source is ARM, all PLT entries are ARM so no interworking required. // Otherwise we need to interwork if STT_FUNC Symbol has bit 0 set (Thumb). - assert(!config->armThumbPLTs && + assert(!useThumbPLTs() && "If the source is ARM, we should not need Thumb PLTs"); if (s.isFunc() && expr == R_PC && (s.getVA() & 1)) return true; @@ -407,7 +413,8 @@ bool ARM::needsThunk(RelExpr expr, RelType type, const InputFile *file, case R_ARM_THM_JUMP24: // Source is Thumb, when all PLT entries are ARM interworking is required. // Otherwise we need to interwork if STT_FUNC Symbol has bit 0 clear (ARM). - if ((expr == R_PLT_PC && !config->armThumbPLTs) || (s.isFunc() && (s.getVA() & 1) == 0)) + if ((expr == R_PLT_PC && !useThumbPLTs()) || + (s.isFunc() && (s.getVA() & 1) == 0)) return true; [[fallthrough]]; case R_ARM_THM_CALL: { @@ -675,7 +682,7 @@ void ARM::relocate(uint8_t *loc, const Relocation &rel, uint64_t val) const { // PLT entries are always ARM state so we know we need to interwork. assert(rel.sym); // R_ARM_THM_CALL is always reached via relocate(). bool bit0Thumb = val & 1; - bool useThumb = bit0Thumb || config->armThumbPLTs; + bool useThumb = bit0Thumb || useThumbPLTs(); bool isBlx = (read16(loc + 2) & 0x1000) == 0; // lld 10.0 and before always used bit0Thumb when deciding to write a BLX // even when type not STT_FUNC. diff --git a/lld/ELF/Config.h b/lld/ELF/Config.h index 0173be396163..28726d48e428 100644 --- a/lld/ELF/Config.h +++ b/lld/ELF/Config.h @@ -217,7 +217,8 @@ struct Config { bool allowMultipleDefinition; bool fatLTOObjects; bool androidPackDynRelocs = false; - bool armThumbPLTs = false; + bool armHasArmISA = false; + bool armHasThumb2ISA = false; bool armHasBlx = false; bool armHasMovtMovw = false; bool armJ1J2BranchEncoding = false; diff --git a/lld/ELF/Driver.cpp b/lld/ELF/Driver.cpp index 40e095a133d9..eb6734dfd458 100644 --- a/lld/ELF/Driver.cpp +++ b/lld/ELF/Driver.cpp @@ -991,6 +991,15 @@ processCallGraphRelocations(SmallVector<uint32_t, 32> &symbolIndices, for (size_t i = 0, e = objSections.size(); i < e; ++i) { const Elf_Shdr_Impl<ELFT> &sec = objSections[i]; if (sec.sh_info == inputObj->cgProfileSectionIndex) { + if (sec.sh_type == SHT_CREL) { + auto crels = + CHECK(obj.crels(sec), "could not retrieve cg profile rela section"); + for (const auto &rel : crels.first) + symbolIndices.push_back(rel.getSymbol(false)); + for (const auto &rel : crels.second) + symbolIndices.push_back(rel.getSymbol(false)); + break; + } if (sec.sh_type == SHT_RELA) { ArrayRef<typename ELFT::Rela> relas = CHECK(obj.relas(sec), "could not retrieve cg profile rela section"); diff --git a/lld/ELF/InputFiles.cpp b/lld/ELF/InputFiles.cpp index f1c0eb292361..48f5a9609ecf 100644 --- a/lld/ELF/InputFiles.cpp +++ b/lld/ELF/InputFiles.cpp @@ -203,10 +203,8 @@ static void updateSupportedARMFeatures(const ARMAttributeParser &attributes) { attributes.getAttributeValue(ARMBuildAttrs::ARM_ISA_use); std::optional<unsigned> thumb = attributes.getAttributeValue(ARMBuildAttrs::THUMB_ISA_use); - bool noArmISA = !armISA || *armISA == ARMBuildAttrs::Not_Allowed; - bool hasThumb2 = thumb && *thumb >= ARMBuildAttrs::AllowThumb32; - if (noArmISA && hasThumb2) - config->armThumbPLTs = true; + config->armHasArmISA |= armISA && *armISA >= ARMBuildAttrs::Allowed; + config->armHasThumb2ISA |= thumb && *thumb >= ARMBuildAttrs::AllowThumb32; } InputFile::InputFile(Kind k, MemoryBufferRef m) diff --git a/lld/ELF/Relocations.cpp b/lld/ELF/Relocations.cpp index e19b1e6c8efb..707768dee6d3 100644 --- a/lld/ELF/Relocations.cpp +++ b/lld/ELF/Relocations.cpp @@ -459,7 +459,8 @@ private: // InputSectionBase. class RelocationScanner { public: - template <class ELFT> void scanSection(InputSectionBase &s); + template <class ELFT> + void scanSection(InputSectionBase &s, bool isEH = false); private: InputSectionBase *sec; @@ -1617,10 +1618,11 @@ void RelocationScanner::scan(Relocs<RelTy> rels) { }); } -template <class ELFT> void RelocationScanner::scanSection(InputSectionBase &s) { +template <class ELFT> +void RelocationScanner::scanSection(InputSectionBase &s, bool isEH) { sec = &s; getter = OffsetGetter(s); - const RelsOrRelas<ELFT> rels = s.template relsOrRelas<ELFT>(); + const RelsOrRelas<ELFT> rels = s.template relsOrRelas<ELFT>(!isEH); if (rels.areRelocsCrel()) scan<ELFT>(rels.crels); else if (rels.areRelocsRel()) @@ -1658,7 +1660,7 @@ template <class ELFT> void elf::scanRelocations() { RelocationScanner scanner; for (Partition &part : partitions) { for (EhInputSection *sec : part.ehFrame->sections) - scanner.template scanSection<ELFT>(*sec); + scanner.template scanSection<ELFT>(*sec, /*isEH=*/true); if (part.armExidx && part.armExidx->isLive()) for (InputSection *sec : part.armExidx->exidxSections) if (sec->isLive()) diff --git a/lldb/include/lldb/API/SBSaveCoreOptions.h b/lldb/include/lldb/API/SBSaveCoreOptions.h index e77496bd3a4a..75506fd752e7 100644 --- a/lldb/include/lldb/API/SBSaveCoreOptions.h +++ b/lldb/include/lldb/API/SBSaveCoreOptions.h @@ -17,7 +17,7 @@ class LLDB_API SBSaveCoreOptions { public: SBSaveCoreOptions(); SBSaveCoreOptions(const lldb::SBSaveCoreOptions &rhs); - ~SBSaveCoreOptions() = default; + ~SBSaveCoreOptions(); const SBSaveCoreOptions &operator=(const lldb::SBSaveCoreOptions &rhs); diff --git a/lldb/include/lldb/Utility/AddressableBits.h b/lldb/include/lldb/Utility/AddressableBits.h index 0d27c3561ec2..8c7a1ec5f52c 100644 --- a/lldb/include/lldb/Utility/AddressableBits.h +++ b/lldb/include/lldb/Utility/AddressableBits.h @@ -12,6 +12,8 @@ #include "lldb/lldb-forward.h" #include "lldb/lldb-public.h" +#include <cstdint> + namespace lldb_private { /// \class AddressableBits AddressableBits.h "lldb/Core/AddressableBits.h" diff --git a/lldb/source/API/SBSaveCoreOptions.cpp b/lldb/source/API/SBSaveCoreOptions.cpp index 6c3f74596203..19ca83f932bc 100644 --- a/lldb/source/API/SBSaveCoreOptions.cpp +++ b/lldb/source/API/SBSaveCoreOptions.cpp @@ -29,6 +29,8 @@ SBSaveCoreOptions::SBSaveCoreOptions(const SBSaveCoreOptions &rhs) { m_opaque_up = clone(rhs.m_opaque_up); } +SBSaveCoreOptions::~SBSaveCoreOptions() = default; + const SBSaveCoreOptions & SBSaveCoreOptions::operator=(const SBSaveCoreOptions &rhs) { LLDB_INSTRUMENT_VA(this, rhs); diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp index 85c59a605c67..ac769ad9fbd5 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp @@ -269,8 +269,15 @@ static void PrepareContextToReceiveMembers(TypeSystemClang &ast, } // We don't have a type definition and/or the import failed, but we need to - // add members to it. Start the definition to make that possible. - tag_decl_ctx->startDefinition(); + // add members to it. Start the definition to make that possible. If the type + // has no external storage we also have to complete the definition. Otherwise, + // that will happen when we are asked to complete the type + // (CompleteTypeFromDWARF). + ast.StartTagDeclarationDefinition(type); + if (!tag_decl_ctx->hasExternalLexicalStorage()) { + ast.SetDeclIsForcefullyCompleted(tag_decl_ctx); + ast.CompleteTagDeclarationDefinition(type); + } } ParsedDWARFTypeAttributes::ParsedDWARFTypeAttributes(const DWARFDIE &die) { diff --git a/llvm/include/llvm/ADT/SmallVector.h b/llvm/include/llvm/ADT/SmallVector.h index 09676d792dfe..17444147b102 100644 --- a/llvm/include/llvm/ADT/SmallVector.h +++ b/llvm/include/llvm/ADT/SmallVector.h @@ -19,6 +19,7 @@ #include <algorithm> #include <cassert> #include <cstddef> +#include <cstdint> #include <cstdlib> #include <cstring> #include <functional> diff --git a/llvm/include/llvm/Analysis/LoopAccessAnalysis.h b/llvm/include/llvm/Analysis/LoopAccessAnalysis.h index afafb74bdcb0..95a74b91f7ac 100644 --- a/llvm/include/llvm/Analysis/LoopAccessAnalysis.h +++ b/llvm/include/llvm/Analysis/LoopAccessAnalysis.h @@ -199,9 +199,8 @@ public: /// Check whether the dependencies between the accesses are safe. /// /// Only checks sets with elements in \p CheckDeps. - bool areDepsSafe(DepCandidates &AccessSets, MemAccessInfoList &CheckDeps, - const DenseMap<Value *, SmallVector<const Value *, 16>> - &UnderlyingObjects); + bool areDepsSafe(const DepCandidates &AccessSets, + const MemAccessInfoList &CheckDeps); /// No memory dependence was encountered that would inhibit /// vectorization. @@ -351,11 +350,8 @@ private: /// element access it records this distance in \p MinDepDistBytes (if this /// distance is smaller than any other distance encountered so far). /// Otherwise, this function returns true signaling a possible dependence. - Dependence::DepType - isDependent(const MemAccessInfo &A, unsigned AIdx, const MemAccessInfo &B, - unsigned BIdx, - const DenseMap<Value *, SmallVector<const Value *, 16>> - &UnderlyingObjects); + Dependence::DepType isDependent(const MemAccessInfo &A, unsigned AIdx, + const MemAccessInfo &B, unsigned BIdx); /// Check whether the data dependence could prevent store-load /// forwarding. @@ -392,11 +388,9 @@ private: /// determined, or a struct containing (Distance, Stride, TypeSize, AIsWrite, /// BIsWrite). std::variant<Dependence::DepType, DepDistanceStrideAndSizeInfo> - getDependenceDistanceStrideAndSize( - const MemAccessInfo &A, Instruction *AInst, const MemAccessInfo &B, - Instruction *BInst, - const DenseMap<Value *, SmallVector<const Value *, 16>> - &UnderlyingObjects); + getDependenceDistanceStrideAndSize(const MemAccessInfo &A, Instruction *AInst, + const MemAccessInfo &B, + Instruction *BInst); }; class RuntimePointerChecking; @@ -797,7 +791,8 @@ replaceSymbolicStrideSCEV(PredicatedScalarEvolution &PSE, Value *Ptr); /// If the pointer has a constant stride return it in units of the access type -/// size. Otherwise return std::nullopt. +/// size. If the pointer is loop-invariant, return 0. Otherwise return +/// std::nullopt. /// /// Ensure that it does not wrap in the address space, assuming the predicate /// associated with \p PSE is true. diff --git a/llvm/include/llvm/CodeGen/TargetFrameLowering.h b/llvm/include/llvm/CodeGen/TargetFrameLowering.h index 0656c0d739fd..d8c9d0a432ad 100644 --- a/llvm/include/llvm/CodeGen/TargetFrameLowering.h +++ b/llvm/include/llvm/CodeGen/TargetFrameLowering.h @@ -15,6 +15,7 @@ #include "llvm/ADT/BitVector.h" #include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h" #include "llvm/Support/TypeSize.h" #include <vector> @@ -473,6 +474,11 @@ public: /// Return the frame base information to be encoded in the DWARF subprogram /// debug info. virtual DwarfFrameBase getDwarfFrameBase(const MachineFunction &MF) const; + + /// This method is called at the end of prolog/epilog code insertion, so + /// targets can emit remarks based on the final frame layout. + virtual void emitRemarks(const MachineFunction &MF, + MachineOptimizationRemarkEmitter *ORE) const {}; }; } // End llvm namespace diff --git a/llvm/include/llvm/CodeGenData/CodeGenData.h b/llvm/include/llvm/CodeGenData/CodeGenData.h deleted file mode 100644 index 659008c78abd..000000000000 --- a/llvm/include/llvm/CodeGenData/CodeGenData.h +++ /dev/null @@ -1,204 +0,0 @@ -//===- CodeGenData.h --------------------------------------------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file contains support for codegen data that has stable summary which -// can be used to optimize the code in the subsequent codegen. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_CODEGENDATA_CODEGENDATA_H -#define LLVM_CODEGENDATA_CODEGENDATA_H - -#include "llvm/ADT/BitmaskEnum.h" -#include "llvm/Bitcode/BitcodeReader.h" -#include "llvm/CodeGenData/OutlinedHashTree.h" -#include "llvm/CodeGenData/OutlinedHashTreeRecord.h" -#include "llvm/IR/Module.h" -#include "llvm/Object/ObjectFile.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/TargetParser/Triple.h" -#include <mutex> - -namespace llvm { - -enum CGDataSectKind { -#define CG_DATA_SECT_ENTRY(Kind, SectNameCommon, SectNameCoff, Prefix) Kind, -#include "llvm/CodeGenData/CodeGenData.inc" -}; - -std::string getCodeGenDataSectionName(CGDataSectKind CGSK, - Triple::ObjectFormatType OF, - bool AddSegmentInfo = true); - -enum class CGDataKind { - Unknown = 0x0, - // A function outlining info. - FunctionOutlinedHashTree = 0x1, - LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/FunctionOutlinedHashTree) -}; - -const std::error_category &cgdata_category(); - -enum class cgdata_error { - success = 0, - eof, - bad_magic, - bad_header, - empty_cgdata, - malformed, - unsupported_version, -}; - -inline std::error_code make_error_code(cgdata_error E) { - return std::error_code(static_cast<int>(E), cgdata_category()); -} - -class CGDataError : public ErrorInfo<CGDataError> { -public: - CGDataError(cgdata_error Err, const Twine &ErrStr = Twine()) - : Err(Err), Msg(ErrStr.str()) { - assert(Err != cgdata_error::success && "Not an error"); - } - - std::string message() const override; - - void log(raw_ostream &OS) const override { OS << message(); } - - std::error_code convertToErrorCode() const override { - return make_error_code(Err); - } - - cgdata_error get() const { return Err; } - const std::string &getMessage() const { return Msg; } - - /// Consume an Error and return the raw enum value contained within it, and - /// the optional error message. The Error must either be a success value, or - /// contain a single CGDataError. - static std::pair<cgdata_error, std::string> take(Error E) { - auto Err = cgdata_error::success; - std::string Msg; - handleAllErrors(std::move(E), [&Err, &Msg](const CGDataError &IPE) { - assert(Err == cgdata_error::success && "Multiple errors encountered"); - Err = IPE.get(); - Msg = IPE.getMessage(); - }); - return {Err, Msg}; - } - - static char ID; - -private: - cgdata_error Err; - std::string Msg; -}; - -enum CGDataMode { - None, - Read, - Write, -}; - -class CodeGenData { - /// Global outlined hash tree that has oulined hash sequences across modules. - std::unique_ptr<OutlinedHashTree> PublishedHashTree; - - /// This flag is set when -fcodegen-data-generate is passed. - /// Or, it can be mutated with -fcodegen-data-thinlto-two-rounds. - bool EmitCGData; - - /// This is a singleton instance which is thread-safe. Unlike profile data - /// which is largely function-based, codegen data describes the whole module. - /// Therefore, this can be initialized once, and can be used across modules - /// instead of constructing the same one for each codegen backend. - static std::unique_ptr<CodeGenData> Instance; - static std::once_flag OnceFlag; - - CodeGenData() = default; - -public: - ~CodeGenData() = default; - - static CodeGenData &getInstance(); - - /// Returns true if we have a valid outlined hash tree. - bool hasOutlinedHashTree() { - return PublishedHashTree && !PublishedHashTree->empty(); - } - - /// Returns the outlined hash tree. This can be globally used in a read-only - /// manner. - const OutlinedHashTree *getOutlinedHashTree() { - return PublishedHashTree.get(); - } - - /// Returns true if we should write codegen data. - bool emitCGData() { return EmitCGData; } - - /// Publish the (globally) merged or read outlined hash tree. - void publishOutlinedHashTree(std::unique_ptr<OutlinedHashTree> HashTree) { - PublishedHashTree = std::move(HashTree); - // Ensure we disable emitCGData as we do not want to read and write both. - EmitCGData = false; - } -}; - -namespace cgdata { - -inline bool hasOutlinedHashTree() { - return CodeGenData::getInstance().hasOutlinedHashTree(); -} - -inline const OutlinedHashTree *getOutlinedHashTree() { - return CodeGenData::getInstance().getOutlinedHashTree(); -} - -inline bool emitCGData() { return CodeGenData::getInstance().emitCGData(); } - -inline void -publishOutlinedHashTree(std::unique_ptr<OutlinedHashTree> HashTree) { - CodeGenData::getInstance().publishOutlinedHashTree(std::move(HashTree)); -} - -void warn(Error E, StringRef Whence = ""); -void warn(Twine Message, std::string Whence = "", std::string Hint = ""); - -} // end namespace cgdata - -namespace IndexedCGData { - -// A signature for data validation, representing "\xffcgdata\x81" in -// little-endian order -const uint64_t Magic = 0x81617461646763ff; - -enum CGDataVersion { - // Version 1 is the first version. This version supports the outlined - // hash tree. - Version1 = 1, - CurrentVersion = CG_DATA_INDEX_VERSION -}; -const uint64_t Version = CGDataVersion::CurrentVersion; - -struct Header { - uint64_t Magic; - uint32_t Version; - uint32_t DataKind; - uint64_t OutlinedHashTreeOffset; - - // New fields should only be added at the end to ensure that the size - // computation is correct. The methods below need to be updated to ensure that - // the new field is read correctly. - - // Reads a header struct from the buffer. - static Expected<Header> readFromBuffer(const unsigned char *Curr); -}; - -} // end namespace IndexedCGData - -} // end namespace llvm - -#endif // LLVM_CODEGEN_PREPARE_H diff --git a/llvm/include/llvm/CodeGenData/CodeGenData.inc b/llvm/include/llvm/CodeGenData/CodeGenData.inc deleted file mode 100644 index 08ec14ea051a..000000000000 --- a/llvm/include/llvm/CodeGenData/CodeGenData.inc +++ /dev/null @@ -1,46 +0,0 @@ -/*===-- CodeGenData.inc ----------------------------------------*- C++ -*-=== *\ -|* -|* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -|* See https://llvm.org/LICENSE.txt for license information. -|* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -|* -\*===----------------------------------------------------------------------===*/ -/* - * This is the main file that defines all the data structure, signature, - * constant literals that are shared across compiler, host tools (reader/writer) - * to support codegen data. - * -\*===----------------------------------------------------------------------===*/ - -/* Helper macros. */ -#define CG_DATA_SIMPLE_QUOTE(x) #x -#define CG_DATA_QUOTE(x) CG_DATA_SIMPLE_QUOTE(x) - -#ifdef CG_DATA_SECT_ENTRY -#define CG_DATA_DEFINED -CG_DATA_SECT_ENTRY(CG_outline, CG_DATA_QUOTE(CG_DATA_OUTLINE_COMMON), - CG_DATA_OUTLINE_COFF, "__DATA,") - -#undef CG_DATA_SECT_ENTRY -#endif - -/* section name strings common to all targets other - than WIN32 */ -#define CG_DATA_OUTLINE_COMMON __llvm_outline -/* Since cg data sections are not allocated, we don't need to - * access them at runtime. - */ -#define CG_DATA_OUTLINE_COFF ".loutline" - -#ifdef _WIN32 -/* Runtime section names and name strings. */ -#define CG_DATA_SECT_NAME CG_DATA_OUTLINE_COFF - -#else -/* Runtime section names and name strings. */ -#define CG_DATA_SECT_NAME CG_DATA_QUOTE(CG_DATA_OUTLINE_COMMON) - -#endif - -/* Indexed codegen data format version (start from 1). */ -#define CG_DATA_INDEX_VERSION 1 diff --git a/llvm/include/llvm/CodeGenData/CodeGenDataReader.h b/llvm/include/llvm/CodeGenData/CodeGenDataReader.h deleted file mode 100644 index df4ae3ed24e7..000000000000 --- a/llvm/include/llvm/CodeGenData/CodeGenDataReader.h +++ /dev/null @@ -1,154 +0,0 @@ -//===- CodeGenDataReader.h --------------------------------------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file contains support for reading codegen data. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_CODEGENDATA_CODEGENDATAREADER_H -#define LLVM_CODEGENDATA_CODEGENDATAREADER_H - -#include "llvm/CodeGenData/CodeGenData.h" -#include "llvm/CodeGenData/OutlinedHashTreeRecord.h" -#include "llvm/Support/LineIterator.h" -#include "llvm/Support/VirtualFileSystem.h" - -namespace llvm { - -class CodeGenDataReader { - cgdata_error LastError = cgdata_error::success; - std::string LastErrorMsg; - -public: - CodeGenDataReader() = default; - virtual ~CodeGenDataReader() = default; - - /// Read the header. Required before reading first record. - virtual Error read() = 0; - /// Return the codegen data version. - virtual uint32_t getVersion() const = 0; - /// Return the codegen data kind. - virtual CGDataKind getDataKind() const = 0; - /// Return true if the data has an outlined hash tree. - virtual bool hasOutlinedHashTree() const = 0; - /// Return the outlined hash tree that is released from the reader. - std::unique_ptr<OutlinedHashTree> releaseOutlinedHashTree() { - return std::move(HashTreeRecord.HashTree); - } - - /// Factory method to create an appropriately typed reader for the given - /// codegen data file path and file system. - static Expected<std::unique_ptr<CodeGenDataReader>> - create(const Twine &Path, vfs::FileSystem &FS); - - /// Factory method to create an appropriately typed reader for the given - /// memory buffer. - static Expected<std::unique_ptr<CodeGenDataReader>> - create(std::unique_ptr<MemoryBuffer> Buffer); - - /// Extract the cgdata embedded in sections from the given object file and - /// merge them into the GlobalOutlineRecord. This is a static helper that - /// is used by `llvm-cgdata merge` or ThinLTO's two-codegen rounds. - static Error mergeFromObjectFile(const object::ObjectFile *Obj, - OutlinedHashTreeRecord &GlobalOutlineRecord); - -protected: - /// The outlined hash tree that has been read. When it's released by - /// releaseOutlinedHashTree(), it's no longer valid. - OutlinedHashTreeRecord HashTreeRecord; - - /// Set the current error and return same. - Error error(cgdata_error Err, const std::string &ErrMsg = "") { - LastError = Err; - LastErrorMsg = ErrMsg; - if (Err == cgdata_error::success) - return Error::success(); - return make_error<CGDataError>(Err, ErrMsg); - } - - Error error(Error &&E) { - handleAllErrors(std::move(E), [&](const CGDataError &IPE) { - LastError = IPE.get(); - LastErrorMsg = IPE.getMessage(); - }); - return make_error<CGDataError>(LastError, LastErrorMsg); - } - - /// Clear the current error and return a successful one. - Error success() { return error(cgdata_error::success); } -}; - -class IndexedCodeGenDataReader : public CodeGenDataReader { - /// The codegen data file contents. - std::unique_ptr<MemoryBuffer> DataBuffer; - /// The header - IndexedCGData::Header Header; - -public: - IndexedCodeGenDataReader(std::unique_ptr<MemoryBuffer> DataBuffer) - : DataBuffer(std::move(DataBuffer)) {} - IndexedCodeGenDataReader(const IndexedCodeGenDataReader &) = delete; - IndexedCodeGenDataReader & - operator=(const IndexedCodeGenDataReader &) = delete; - - /// Return true if the given buffer is in binary codegen data format. - static bool hasFormat(const MemoryBuffer &Buffer); - /// Read the contents including the header. - Error read() override; - /// Return the codegen data version. - uint32_t getVersion() const override { return Header.Version; } - /// Return the codegen data kind. - CGDataKind getDataKind() const override { - return static_cast<CGDataKind>(Header.DataKind); - } - /// Return true if the header indicates the data has an outlined hash tree. - /// This does not mean that the data is still available. - bool hasOutlinedHashTree() const override { - return Header.DataKind & - static_cast<uint32_t>(CGDataKind::FunctionOutlinedHashTree); - } -}; - -/// This format is a simple text format that's suitable for test data. -/// The header is a custom format starting with `:` per line to indicate which -/// codegen data is recorded. `#` is used to indicate a comment. -/// The subsequent data is a YAML format per each codegen data in order. -/// Currently, it only has a function outlined hash tree. -class TextCodeGenDataReader : public CodeGenDataReader { - /// The codegen data file contents. - std::unique_ptr<MemoryBuffer> DataBuffer; - /// Iterator over the profile data. - line_iterator Line; - /// Describe the kind of the codegen data. - CGDataKind DataKind = CGDataKind::Unknown; - -public: - TextCodeGenDataReader(std::unique_ptr<MemoryBuffer> DataBuffer_) - : DataBuffer(std::move(DataBuffer_)), Line(*DataBuffer, true, '#') {} - TextCodeGenDataReader(const TextCodeGenDataReader &) = delete; - TextCodeGenDataReader &operator=(const TextCodeGenDataReader &) = delete; - - /// Return true if the given buffer is in text codegen data format. - static bool hasFormat(const MemoryBuffer &Buffer); - /// Read the contents including the header. - Error read() override; - /// Text format does not have version, so return 0. - uint32_t getVersion() const override { return 0; } - /// Return the codegen data kind. - CGDataKind getDataKind() const override { return DataKind; } - /// Return true if the header indicates the data has an outlined hash tree. - /// This does not mean that the data is still available. - bool hasOutlinedHashTree() const override { - return static_cast<uint32_t>(DataKind) & - static_cast<uint32_t>(CGDataKind::FunctionOutlinedHashTree); - } -}; - -} // end namespace llvm - -#endif // LLVM_CODEGENDATA_CODEGENDATAREADER_H diff --git a/llvm/include/llvm/CodeGenData/CodeGenDataWriter.h b/llvm/include/llvm/CodeGenData/CodeGenDataWriter.h deleted file mode 100644 index e17ffc3482ec..000000000000 --- a/llvm/include/llvm/CodeGenData/CodeGenDataWriter.h +++ /dev/null @@ -1,68 +0,0 @@ -//===- CodeGenDataWriter.h --------------------------------------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file contains support for writing codegen data. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_CODEGENDATA_CODEGENDATAWRITER_H -#define LLVM_CODEGENDATA_CODEGENDATAWRITER_H - -#include "llvm/CodeGenData/CodeGenData.h" -#include "llvm/CodeGenData/OutlinedHashTreeRecord.h" -#include "llvm/Support/Error.h" - -namespace llvm { - -class CGDataOStream; - -class CodeGenDataWriter { - /// The outlined hash tree to be written. - OutlinedHashTreeRecord HashTreeRecord; - - /// A bit mask describing the kind of the codegen data. - CGDataKind DataKind = CGDataKind::Unknown; - -public: - CodeGenDataWriter() = default; - ~CodeGenDataWriter() = default; - - /// Add the outlined hash tree record. The input Record is released. - void addRecord(OutlinedHashTreeRecord &Record); - - /// Write the codegen data to \c OS - Error write(raw_fd_ostream &OS); - - /// Write the codegen data in text format to \c OS - Error writeText(raw_fd_ostream &OS); - - /// Return the attributes of the current CGData. - CGDataKind getCGDataKind() const { return DataKind; } - - /// Return true if the header indicates the data has an outlined hash tree. - bool hasOutlinedHashTree() const { - return static_cast<uint32_t>(DataKind) & - static_cast<uint32_t>(CGDataKind::FunctionOutlinedHashTree); - } - -private: - /// The offset of the outlined hash tree in the file. - uint64_t OutlinedHashTreeOffset; - - /// Write the codegen data header to \c COS - Error writeHeader(CGDataOStream &COS); - - /// Write the codegen data header in text to \c OS - Error writeHeaderText(raw_fd_ostream &OS); - - Error writeImpl(CGDataOStream &COS); -}; - -} // end namespace llvm - -#endif // LLVM_CODEGENDATA_CODEGENDATAWRITER_H diff --git a/llvm/include/llvm/IR/Metadata.h b/llvm/include/llvm/IR/Metadata.h index 22da54a1f03c..7b54c74fb1b9 100644 --- a/llvm/include/llvm/IR/Metadata.h +++ b/llvm/include/llvm/IR/Metadata.h @@ -846,8 +846,10 @@ struct AAMDNodes { AAMDNodes concat(const AAMDNodes &Other) const; /// Create a new AAMDNode for accessing \p AccessSize bytes of this AAMDNode. - /// If his AAMDNode has !tbaa.struct and \p AccessSize matches the size of the - /// field at offset 0, get the TBAA tag describing the accessed field. + /// If this AAMDNode has !tbaa.struct and \p AccessSize matches the size of + /// the field at offset 0, get the TBAA tag describing the accessed field. + /// If such an AAMDNode already embeds !tbaa, the existing one is retrieved. + /// Finally, !tbaa.struct is zeroed out. AAMDNodes adjustForAccess(unsigned AccessSize); AAMDNodes adjustForAccess(size_t Offset, Type *AccessTy, const DataLayout &DL); diff --git a/llvm/include/llvm/Support/MathExtras.h b/llvm/include/llvm/Support/MathExtras.h index 0d0fa826f7bb..e568e42afcf4 100644 --- a/llvm/include/llvm/Support/MathExtras.h +++ b/llvm/include/llvm/Support/MathExtras.h @@ -770,6 +770,14 @@ std::enable_if_t<std::is_signed_v<T>, T> MulOverflow(T X, T Y, T &Result) { #endif } +/// Type to force float point values onto the stack, so that x86 doesn't add +/// hidden precision, avoiding rounding differences on various platforms. +#if defined(__i386__) || defined(_M_IX86) +using stack_float_t = volatile float; +#else +using stack_float_t = float; +#endif + } // namespace llvm #endif diff --git a/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/llvm/lib/Analysis/LoopAccessAnalysis.cpp index 84214c47a10e..f3fc69c86cd1 100644 --- a/llvm/lib/Analysis/LoopAccessAnalysis.cpp +++ b/llvm/lib/Analysis/LoopAccessAnalysis.cpp @@ -728,11 +728,6 @@ public: MemAccessInfoList &getDependenciesToCheck() { return CheckDeps; } - const DenseMap<Value *, SmallVector<const Value *, 16>> & - getUnderlyingObjects() { - return UnderlyingObjects; - } - private: typedef MapVector<MemAccessInfo, SmallSetVector<Type *, 1>> PtrAccessMap; @@ -1459,22 +1454,23 @@ static bool isNoWrapAddRec(Value *Ptr, const SCEVAddRecExpr *AR, } /// Check whether the access through \p Ptr has a constant stride. -std::optional<int64_t> llvm::getPtrStride(PredicatedScalarEvolution &PSE, - Type *AccessTy, Value *Ptr, - const Loop *Lp, - const DenseMap<Value *, const SCEV *> &StridesMap, - bool Assume, bool ShouldCheckWrap) { +std::optional<int64_t> +llvm::getPtrStride(PredicatedScalarEvolution &PSE, Type *AccessTy, Value *Ptr, + const Loop *Lp, + const DenseMap<Value *, const SCEV *> &StridesMap, + bool Assume, bool ShouldCheckWrap) { + const SCEV *PtrScev = replaceSymbolicStrideSCEV(PSE, StridesMap, Ptr); + if (PSE.getSE()->isLoopInvariant(PtrScev, Lp)) + return {0}; + Type *Ty = Ptr->getType(); assert(Ty->isPointerTy() && "Unexpected non-ptr"); - if (isa<ScalableVectorType>(AccessTy)) { LLVM_DEBUG(dbgs() << "LAA: Bad stride - Scalable object: " << *AccessTy << "\n"); return std::nullopt; } - const SCEV *PtrScev = replaceSymbolicStrideSCEV(PSE, StridesMap, Ptr); - const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(PtrScev); if (Assume && !AR) AR = PSE.getAsAddRec(Ptr); @@ -1899,24 +1895,12 @@ static bool areStridedAccessesIndependent(uint64_t Distance, uint64_t Stride, return ScaledDist % Stride; } -/// Returns true if any of the underlying objects has a loop varying address, -/// i.e. may change in \p L. -static bool -isLoopVariantIndirectAddress(ArrayRef<const Value *> UnderlyingObjects, - ScalarEvolution &SE, const Loop *L) { - return any_of(UnderlyingObjects, [&SE, L](const Value *UO) { - return !SE.isLoopInvariant(SE.getSCEV(const_cast<Value *>(UO)), L); - }); -} - std::variant<MemoryDepChecker::Dependence::DepType, MemoryDepChecker::DepDistanceStrideAndSizeInfo> MemoryDepChecker::getDependenceDistanceStrideAndSize( const AccessAnalysis::MemAccessInfo &A, Instruction *AInst, - const AccessAnalysis::MemAccessInfo &B, Instruction *BInst, - const DenseMap<Value *, SmallVector<const Value *, 16>> - &UnderlyingObjects) { - auto &DL = InnermostLoop->getHeader()->getDataLayout(); + const AccessAnalysis::MemAccessInfo &B, Instruction *BInst) { + const auto &DL = InnermostLoop->getHeader()->getDataLayout(); auto &SE = *PSE.getSE(); auto [APtr, AIsWrite] = A; auto [BPtr, BIsWrite] = B; @@ -1933,12 +1917,10 @@ MemoryDepChecker::getDependenceDistanceStrideAndSize( BPtr->getType()->getPointerAddressSpace()) return MemoryDepChecker::Dependence::Unknown; - int64_t StrideAPtr = - getPtrStride(PSE, ATy, APtr, InnermostLoop, SymbolicStrides, true) - .value_or(0); - int64_t StrideBPtr = - getPtrStride(PSE, BTy, BPtr, InnermostLoop, SymbolicStrides, true) - .value_or(0); + std::optional<int64_t> StrideAPtr = + getPtrStride(PSE, ATy, APtr, InnermostLoop, SymbolicStrides, true, true); + std::optional<int64_t> StrideBPtr = + getPtrStride(PSE, BTy, BPtr, InnermostLoop, SymbolicStrides, true, true); const SCEV *Src = PSE.getSCEV(APtr); const SCEV *Sink = PSE.getSCEV(BPtr); @@ -1946,26 +1928,19 @@ MemoryDepChecker::getDependenceDistanceStrideAndSize( // If the induction step is negative we have to invert source and sink of the // dependence when measuring the distance between them. We should not swap // AIsWrite with BIsWrite, as their uses expect them in program order. - if (StrideAPtr < 0) { + if (StrideAPtr && *StrideAPtr < 0) { std::swap(Src, Sink); std::swap(AInst, BInst); + std::swap(StrideAPtr, StrideBPtr); } const SCEV *Dist = SE.getMinusSCEV(Sink, Src); LLVM_DEBUG(dbgs() << "LAA: Src Scev: " << *Src << "Sink Scev: " << *Sink - << "(Induction step: " << StrideAPtr << ")\n"); + << "\n"); LLVM_DEBUG(dbgs() << "LAA: Distance for " << *AInst << " to " << *BInst << ": " << *Dist << "\n"); - // Needs accesses where the addresses of the accessed underlying objects do - // not change within the loop. - if (isLoopVariantIndirectAddress(UnderlyingObjects.find(APtr)->second, SE, - InnermostLoop) || - isLoopVariantIndirectAddress(UnderlyingObjects.find(BPtr)->second, SE, - InnermostLoop)) - return MemoryDepChecker::Dependence::IndirectUnsafe; - // Check if we can prove that Sink only accesses memory after Src's end or // vice versa. At the moment this is limited to cases where either source or // sink are loop invariant to avoid compile-time increases. This is not @@ -1987,12 +1962,33 @@ MemoryDepChecker::getDependenceDistanceStrideAndSize( } } - // Need accesses with constant strides and the same direction. We don't want - // to vectorize "A[B[i]] += ..." and similar code or pointer arithmetic that - // could wrap in the address space. - if (!StrideAPtr || !StrideBPtr || (StrideAPtr > 0 && StrideBPtr < 0) || - (StrideAPtr < 0 && StrideBPtr > 0)) { + // Need accesses with constant strides and the same direction for further + // dependence analysis. We don't want to vectorize "A[B[i]] += ..." and + // similar code or pointer arithmetic that could wrap in the address space. + + // If either Src or Sink are not strided (i.e. not a non-wrapping AddRec) and + // not loop-invariant (stride will be 0 in that case), we cannot analyze the + // dependence further and also cannot generate runtime checks. + if (!StrideAPtr || !StrideBPtr) { LLVM_DEBUG(dbgs() << "Pointer access with non-constant stride\n"); + return MemoryDepChecker::Dependence::IndirectUnsafe; + } + + int64_t StrideAPtrInt = *StrideAPtr; + int64_t StrideBPtrInt = *StrideBPtr; + LLVM_DEBUG(dbgs() << "LAA: Src induction step: " << StrideAPtrInt + << " Sink induction step: " << StrideBPtrInt << "\n"); + // At least Src or Sink are loop invariant and the other is strided or + // invariant. We can generate a runtime check to disambiguate the accesses. + if (StrideAPtrInt == 0 || StrideBPtrInt == 0) + return MemoryDepChecker::Dependence::Unknown; + + // Both Src and Sink have a constant stride, check if they are in the same + // direction. + if ((StrideAPtrInt > 0 && StrideBPtrInt < 0) || + (StrideAPtrInt < 0 && StrideBPtrInt > 0)) { + LLVM_DEBUG( + dbgs() << "Pointer access with strides in different directions\n"); return MemoryDepChecker::Dependence::Unknown; } @@ -2001,22 +1997,20 @@ MemoryDepChecker::getDependenceDistanceStrideAndSize( DL.getTypeStoreSizeInBits(ATy) == DL.getTypeStoreSizeInBits(BTy); if (!HasSameSize) TypeByteSize = 0; - return DepDistanceStrideAndSizeInfo(Dist, std::abs(StrideAPtr), - std::abs(StrideBPtr), TypeByteSize, + return DepDistanceStrideAndSizeInfo(Dist, std::abs(StrideAPtrInt), + std::abs(StrideBPtrInt), TypeByteSize, AIsWrite, BIsWrite); } -MemoryDepChecker::Dependence::DepType MemoryDepChecker::isDependent( - const MemAccessInfo &A, unsigned AIdx, const MemAccessInfo &B, - unsigned BIdx, - const DenseMap<Value *, SmallVector<const Value *, 16>> - &UnderlyingObjects) { +MemoryDepChecker::Dependence::DepType +MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx, + const MemAccessInfo &B, unsigned BIdx) { assert(AIdx < BIdx && "Must pass arguments in program order"); // Get the dependence distance, stride, type size and what access writes for // the dependence between A and B. - auto Res = getDependenceDistanceStrideAndSize( - A, InstMap[AIdx], B, InstMap[BIdx], UnderlyingObjects); + auto Res = + getDependenceDistanceStrideAndSize(A, InstMap[AIdx], B, InstMap[BIdx]); if (std::holds_alternative<Dependence::DepType>(Res)) return std::get<Dependence::DepType>(Res); @@ -2250,10 +2244,8 @@ MemoryDepChecker::Dependence::DepType MemoryDepChecker::isDependent( return Dependence::BackwardVectorizable; } -bool MemoryDepChecker::areDepsSafe( - DepCandidates &AccessSets, MemAccessInfoList &CheckDeps, - const DenseMap<Value *, SmallVector<const Value *, 16>> - &UnderlyingObjects) { +bool MemoryDepChecker::areDepsSafe(const DepCandidates &AccessSets, + const MemAccessInfoList &CheckDeps) { MinDepDistBytes = -1; SmallPtrSet<MemAccessInfo, 8> Visited; @@ -2296,8 +2288,8 @@ bool MemoryDepChecker::areDepsSafe( if (*I1 > *I2) std::swap(A, B); - Dependence::DepType Type = isDependent(*A.first, A.second, *B.first, - B.second, UnderlyingObjects); + Dependence::DepType Type = + isDependent(*A.first, A.second, *B.first, B.second); mergeInStatus(Dependence::isSafeForVectorization(Type)); // Gather dependences unless we accumulated MaxDependences @@ -2652,8 +2644,7 @@ bool LoopAccessInfo::analyzeLoop(AAResults *AA, LoopInfo *LI, if (Accesses.isDependencyCheckNeeded()) { LLVM_DEBUG(dbgs() << "LAA: Checking memory dependencies\n"); DepsAreSafe = DepChecker->areDepsSafe(DependentAccesses, - Accesses.getDependenciesToCheck(), - Accesses.getUnderlyingObjects()); + Accesses.getDependenciesToCheck()); if (!DepsAreSafe && DepChecker->shouldRetryWithRuntimeCheck()) { LLVM_DEBUG(dbgs() << "LAA: Retrying with memory checks\n"); diff --git a/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp b/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp index e1cb63a9ab8f..0d7eb7da8d6b 100644 --- a/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp +++ b/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp @@ -822,16 +822,16 @@ MDNode *AAMDNodes::extendToTBAA(MDNode *MD, ssize_t Len) { AAMDNodes AAMDNodes::adjustForAccess(unsigned AccessSize) { AAMDNodes New = *this; MDNode *M = New.TBAAStruct; - if (M && M->getNumOperands() >= 3 && M->getOperand(0) && + if (!New.TBAA && M && M->getNumOperands() >= 3 && M->getOperand(0) && mdconst::hasa<ConstantInt>(M->getOperand(0)) && mdconst::extract<ConstantInt>(M->getOperand(0))->isZero() && M->getOperand(1) && mdconst::hasa<ConstantInt>(M->getOperand(1)) && mdconst::extract<ConstantInt>(M->getOperand(1))->getValue() == AccessSize && - M->getOperand(2) && isa<MDNode>(M->getOperand(2))) { - New.TBAAStruct = nullptr; + M->getOperand(2) && isa<MDNode>(M->getOperand(2))) New.TBAA = cast<MDNode>(M->getOperand(2)); - } + + New.TBAAStruct = nullptr; return New; } diff --git a/llvm/lib/CodeGen/CalcSpillWeights.cpp b/llvm/lib/CodeGen/CalcSpillWeights.cpp index 1d767a3484bc..9d8c9119f771 100644 --- a/llvm/lib/CodeGen/CalcSpillWeights.cpp +++ b/llvm/lib/CodeGen/CalcSpillWeights.cpp @@ -22,6 +22,7 @@ #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/CodeGen/VirtRegMap.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" #include <cassert> #include <tuple> @@ -257,7 +258,9 @@ float VirtRegAuxInfo::weightCalcHelper(LiveInterval &LI, SlotIndex *Start, return -1.0f; } - float Weight = 1.0f; + // Force Weight onto the stack so that x86 doesn't add hidden precision, + // similar to HWeight below. + stack_float_t Weight = 1.0f; if (IsSpillable) { // Get loop info for mi. if (MI->getParent() != MBB) { @@ -284,11 +287,9 @@ float VirtRegAuxInfo::weightCalcHelper(LiveInterval &LI, SlotIndex *Start, Register HintReg = copyHint(MI, LI.reg(), TRI, MRI); if (!HintReg) continue; - // Force hweight onto the stack so that x86 doesn't add hidden precision, + // Force HWeight onto the stack so that x86 doesn't add hidden precision, // making the comparison incorrectly pass (i.e., 1 > 1 == true??). - // - // FIXME: we probably shouldn't use floats at all. - volatile float HWeight = Hint[HintReg] += Weight; + stack_float_t HWeight = Hint[HintReg] += Weight; if (HintReg.isVirtual() || MRI.isAllocatable(HintReg)) CopyHints.insert(CopyHint(HintReg, HWeight)); } diff --git a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp index 68a8a273a1b4..eb010afd41b6 100644 --- a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp +++ b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp @@ -3889,6 +3889,7 @@ bool IRTranslator::runOnMachineFunction(MachineFunction &CurMF) { F.getSubprogram(), &F.getEntryBlock()); R << "unable to translate in big endian mode"; reportTranslationError(*MF, *TPC, *ORE, R); + return false; } // Release the per-function state when we return, whether we succeeded or not. diff --git a/llvm/lib/CodeGen/PrologEpilogInserter.cpp b/llvm/lib/CodeGen/PrologEpilogInserter.cpp index cd5d877e53d8..f4490873cfdc 100644 --- a/llvm/lib/CodeGen/PrologEpilogInserter.cpp +++ b/llvm/lib/CodeGen/PrologEpilogInserter.cpp @@ -341,6 +341,9 @@ bool PEI::runOnMachineFunction(MachineFunction &MF) { << ore::NV("Function", MF.getFunction().getName()) << "'"; }); + // Emit any remarks implemented for the target, based on final frame layout. + TFI->emitRemarks(MF, ORE); + delete RS; SaveBlocks.clear(); RestoreBlocks.clear(); diff --git a/llvm/lib/CodeGenData/CodeGenData.cpp b/llvm/lib/CodeGenData/CodeGenData.cpp deleted file mode 100644 index 49b744744095..000000000000 --- a/llvm/lib/CodeGenData/CodeGenData.cpp +++ /dev/null @@ -1,196 +0,0 @@ -//===-- CodeGenData.cpp ---------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file contains support for codegen data that has stable summary which -// can be used to optimize the code in the subsequent codegen. -// -//===----------------------------------------------------------------------===// - -#include "llvm/Bitcode/BitcodeWriter.h" -#include "llvm/CodeGenData/CodeGenDataReader.h" -#include "llvm/CodeGenData/OutlinedHashTreeRecord.h" -#include "llvm/Object/ObjectFile.h" -#include "llvm/Support/CommandLine.h" -#include "llvm/Support/FileSystem.h" -#include "llvm/Support/WithColor.h" - -#define DEBUG_TYPE "cg-data" - -using namespace llvm; -using namespace cgdata; - -static std::string getCGDataErrString(cgdata_error Err, - const std::string &ErrMsg = "") { - std::string Msg; - raw_string_ostream OS(Msg); - - switch (Err) { - case cgdata_error::success: - OS << "success"; - break; - case cgdata_error::eof: - OS << "end of File"; - break; - case cgdata_error::bad_magic: - OS << "invalid codegen data (bad magic)"; - break; - case cgdata_error::bad_header: - OS << "invalid codegen data (file header is corrupt)"; - break; - case cgdata_error::empty_cgdata: - OS << "empty codegen data"; - break; - case cgdata_error::malformed: - OS << "malformed codegen data"; - break; - case cgdata_error::unsupported_version: - OS << "unsupported codegen data version"; - break; - } - - // If optional error message is not empty, append it to the message. - if (!ErrMsg.empty()) - OS << ": " << ErrMsg; - - return OS.str(); -} - -namespace { - -// FIXME: This class is only here to support the transition to llvm::Error. It -// will be removed once this transition is complete. Clients should prefer to -// deal with the Error value directly, rather than converting to error_code. -class CGDataErrorCategoryType : public std::error_category { - const char *name() const noexcept override { return "llvm.cgdata"; } - - std::string message(int IE) const override { - return getCGDataErrString(static_cast<cgdata_error>(IE)); - } -}; - -} // end anonymous namespace - -const std::error_category &llvm::cgdata_category() { - static CGDataErrorCategoryType ErrorCategory; - return ErrorCategory; -} - -std::string CGDataError::message() const { - return getCGDataErrString(Err, Msg); -} - -char CGDataError::ID = 0; - -namespace { - -const char *CodeGenDataSectNameCommon[] = { -#define CG_DATA_SECT_ENTRY(Kind, SectNameCommon, SectNameCoff, Prefix) \ - SectNameCommon, -#include "llvm/CodeGenData/CodeGenData.inc" -}; - -const char *CodeGenDataSectNameCoff[] = { -#define CG_DATA_SECT_ENTRY(Kind, SectNameCommon, SectNameCoff, Prefix) \ - SectNameCoff, -#include "llvm/CodeGenData/CodeGenData.inc" -}; - -const char *CodeGenDataSectNamePrefix[] = { -#define CG_DATA_SECT_ENTRY(Kind, SectNameCommon, SectNameCoff, Prefix) Prefix, -#include "llvm/CodeGenData/CodeGenData.inc" -}; - -} // namespace - -namespace llvm { - -std::string getCodeGenDataSectionName(CGDataSectKind CGSK, - Triple::ObjectFormatType OF, - bool AddSegmentInfo) { - std::string SectName; - - if (OF == Triple::MachO && AddSegmentInfo) - SectName = CodeGenDataSectNamePrefix[CGSK]; - - if (OF == Triple::COFF) - SectName += CodeGenDataSectNameCoff[CGSK]; - else - SectName += CodeGenDataSectNameCommon[CGSK]; - - return SectName; -} - -std::unique_ptr<CodeGenData> CodeGenData::Instance = nullptr; -std::once_flag CodeGenData::OnceFlag; - -CodeGenData &CodeGenData::getInstance() { - std::call_once(CodeGenData::OnceFlag, []() { - Instance = std::unique_ptr<CodeGenData>(new CodeGenData()); - - // TODO: Initialize writer or reader mode for the client optimization. - }); - return *(Instance.get()); -} - -namespace IndexedCGData { - -Expected<Header> Header::readFromBuffer(const unsigned char *Curr) { - using namespace support; - - static_assert(std::is_standard_layout_v<llvm::IndexedCGData::Header>, - "The header should be standard layout type since we use offset " - "of fields to read."); - Header H; - H.Magic = endian::readNext<uint64_t, endianness::little, unaligned>(Curr); - if (H.Magic != IndexedCGData::Magic) - return make_error<CGDataError>(cgdata_error::bad_magic); - H.Version = endian::readNext<uint32_t, endianness::little, unaligned>(Curr); - if (H.Version > IndexedCGData::CGDataVersion::CurrentVersion) - return make_error<CGDataError>(cgdata_error::unsupported_version); - H.DataKind = endian::readNext<uint32_t, endianness::little, unaligned>(Curr); - - switch (H.Version) { - // When a new field is added to the header add a case statement here to - // compute the size as offset of the new field + size of the new field. This - // relies on the field being added to the end of the list. - static_assert(IndexedCGData::CGDataVersion::CurrentVersion == Version1, - "Please update the size computation below if a new field has " - "been added to the header, if not add a case statement to " - "fall through to the latest version."); - case 1ull: - H.OutlinedHashTreeOffset = - endian::readNext<uint64_t, endianness::little, unaligned>(Curr); - } - - return H; -} - -} // end namespace IndexedCGData - -namespace cgdata { - -void warn(Twine Message, std::string Whence, std::string Hint) { - WithColor::warning(); - if (!Whence.empty()) - errs() << Whence << ": "; - errs() << Message << "\n"; - if (!Hint.empty()) - WithColor::note() << Hint << "\n"; -} - -void warn(Error E, StringRef Whence) { - if (E.isA<CGDataError>()) { - handleAllErrors(std::move(E), [&](const CGDataError &IPE) { - warn(IPE.message(), Whence.str(), ""); - }); - } -} - -} // end namespace cgdata - -} // end namespace llvm diff --git a/llvm/lib/CodeGenData/CodeGenDataReader.cpp b/llvm/lib/CodeGenData/CodeGenDataReader.cpp deleted file mode 100644 index bcd61047079f..000000000000 --- a/llvm/lib/CodeGenData/CodeGenDataReader.cpp +++ /dev/null @@ -1,175 +0,0 @@ -//===- CodeGenDataReader.cpp ----------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file contains support for reading codegen data. -// -//===----------------------------------------------------------------------===// - -#include "llvm/CodeGenData/CodeGenDataReader.h" -#include "llvm/CodeGenData/OutlinedHashTreeRecord.h" -#include "llvm/Object/ObjectFile.h" -#include "llvm/Support/MemoryBuffer.h" - -#define DEBUG_TYPE "cg-data-reader" - -using namespace llvm; - -namespace llvm { - -static Expected<std::unique_ptr<MemoryBuffer>> -setupMemoryBuffer(const Twine &Filename, vfs::FileSystem &FS) { - auto BufferOrErr = Filename.str() == "-" ? MemoryBuffer::getSTDIN() - : FS.getBufferForFile(Filename); - if (std::error_code EC = BufferOrErr.getError()) - return errorCodeToError(EC); - return std::move(BufferOrErr.get()); -} - -Error CodeGenDataReader::mergeFromObjectFile( - const object::ObjectFile *Obj, - OutlinedHashTreeRecord &GlobalOutlineRecord) { - Triple TT = Obj->makeTriple(); - auto CGOutLineName = - getCodeGenDataSectionName(CG_outline, TT.getObjectFormat(), false); - - for (auto &Section : Obj->sections()) { - Expected<StringRef> NameOrErr = Section.getName(); - if (!NameOrErr) - return NameOrErr.takeError(); - Expected<StringRef> ContentsOrErr = Section.getContents(); - if (!ContentsOrErr) - return ContentsOrErr.takeError(); - auto *Data = reinterpret_cast<const unsigned char *>(ContentsOrErr->data()); - auto *EndData = Data + ContentsOrErr->size(); - - if (*NameOrErr == CGOutLineName) { - // In case dealing with an executable that has concatenated cgdata, - // we want to merge them into a single cgdata. - // Although it's not a typical workflow, we support this scenario. - while (Data != EndData) { - OutlinedHashTreeRecord LocalOutlineRecord; - LocalOutlineRecord.deserialize(Data); - GlobalOutlineRecord.merge(LocalOutlineRecord); - } - } - // TODO: Add support for other cgdata sections. - } - - return Error::success(); -} - -Error IndexedCodeGenDataReader::read() { - using namespace support; - - // The smallest header with the version 1 is 24 bytes - const unsigned MinHeaderSize = 24; - if (DataBuffer->getBufferSize() < MinHeaderSize) - return error(cgdata_error::bad_header); - - auto *Start = - reinterpret_cast<const unsigned char *>(DataBuffer->getBufferStart()); - auto *End = - reinterpret_cast<const unsigned char *>(DataBuffer->getBufferEnd()); - if (auto E = IndexedCGData::Header::readFromBuffer(Start).moveInto(Header)) - return E; - - if (hasOutlinedHashTree()) { - const unsigned char *Ptr = Start + Header.OutlinedHashTreeOffset; - if (Ptr >= End) - return error(cgdata_error::eof); - HashTreeRecord.deserialize(Ptr); - } - - return success(); -} - -Expected<std::unique_ptr<CodeGenDataReader>> -CodeGenDataReader::create(const Twine &Path, vfs::FileSystem &FS) { - // Set up the buffer to read. - auto BufferOrError = setupMemoryBuffer(Path, FS); - if (Error E = BufferOrError.takeError()) - return std::move(E); - return CodeGenDataReader::create(std::move(BufferOrError.get())); -} - -Expected<std::unique_ptr<CodeGenDataReader>> -CodeGenDataReader::create(std::unique_ptr<MemoryBuffer> Buffer) { - if (Buffer->getBufferSize() == 0) - return make_error<CGDataError>(cgdata_error::empty_cgdata); - - std::unique_ptr<CodeGenDataReader> Reader; - // Create the reader. - if (IndexedCodeGenDataReader::hasFormat(*Buffer)) - Reader = std::make_unique<IndexedCodeGenDataReader>(std::move(Buffer)); - else if (TextCodeGenDataReader::hasFormat(*Buffer)) - Reader = std::make_unique<TextCodeGenDataReader>(std::move(Buffer)); - else - return make_error<CGDataError>(cgdata_error::malformed); - - // Initialize the reader and return the result. - if (Error E = Reader->read()) - return std::move(E); - - return std::move(Reader); -} - -bool IndexedCodeGenDataReader::hasFormat(const MemoryBuffer &DataBuffer) { - using namespace support; - if (DataBuffer.getBufferSize() < sizeof(IndexedCGData::Magic)) - return false; - - uint64_t Magic = endian::read<uint64_t, llvm::endianness::little, aligned>( - DataBuffer.getBufferStart()); - // Verify that it's magical. - return Magic == IndexedCGData::Magic; -} - -bool TextCodeGenDataReader::hasFormat(const MemoryBuffer &Buffer) { - // Verify that this really looks like plain ASCII text by checking a - // 'reasonable' number of characters (up to the magic size). - StringRef Prefix = Buffer.getBuffer().take_front(sizeof(uint64_t)); - return llvm::all_of(Prefix, [](char c) { return isPrint(c) || isSpace(c); }); -} -Error TextCodeGenDataReader::read() { - using namespace support; - - // Parse the custom header line by line. - for (; !Line.is_at_eof(); ++Line) { - // Skip empty or whitespace-only lines - if (Line->trim().empty()) - continue; - - if (!Line->starts_with(":")) - break; - StringRef Str = Line->drop_front().rtrim(); - if (Str.equals_insensitive("outlined_hash_tree")) - DataKind |= CGDataKind::FunctionOutlinedHashTree; - else - return error(cgdata_error::bad_header); - } - - // We treat an empty header (that is a comment # only) as a valid header. - if (Line.is_at_eof()) { - if (DataKind == CGDataKind::Unknown) - return Error::success(); - return error(cgdata_error::bad_header); - } - - // The YAML docs follow after the header. - const char *Pos = Line->data(); - size_t Size = reinterpret_cast<size_t>(DataBuffer->getBufferEnd()) - - reinterpret_cast<size_t>(Pos); - yaml::Input YOS(StringRef(Pos, Size)); - if (hasOutlinedHashTree()) - HashTreeRecord.deserializeYAML(YOS); - - // TODO: Add more yaml cgdata in order - - return Error::success(); -} -} // end namespace llvm diff --git a/llvm/lib/CodeGenData/CodeGenDataWriter.cpp b/llvm/lib/CodeGenData/CodeGenDataWriter.cpp deleted file mode 100644 index 3c91a1b30345..000000000000 --- a/llvm/lib/CodeGenData/CodeGenDataWriter.cpp +++ /dev/null @@ -1,162 +0,0 @@ -//===- CodeGenDataWriter.cpp ----------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file contains support for writing codegen data. -// -//===----------------------------------------------------------------------===// - -#include "llvm/CodeGenData/CodeGenDataWriter.h" -#include "llvm/Support/Endian.h" -#include "llvm/Support/EndianStream.h" - -#define DEBUG_TYPE "cg-data-writer" - -using namespace llvm; - -namespace llvm { - -/// A struct to define how the data stream should be patched. -struct CGDataPatchItem { - uint64_t Pos; // Where to patch. - uint64_t *D; // Pointer to an array of source data. - int N; // Number of elements in \c D array. -}; - -// A wrapper class to abstract writer stream with support of bytes -// back patching. -class CGDataOStream { -public: - CGDataOStream(raw_fd_ostream &FD) - : IsFDOStream(true), OS(FD), LE(FD, llvm::endianness::little) {} - CGDataOStream(raw_string_ostream &STR) - : IsFDOStream(false), OS(STR), LE(STR, llvm::endianness::little) {} - - uint64_t tell() { return OS.tell(); } - void write(uint64_t V) { LE.write<uint64_t>(V); } - void write32(uint32_t V) { LE.write<uint32_t>(V); } - void write8(uint8_t V) { LE.write<uint8_t>(V); } - - // \c patch can only be called when all data is written and flushed. - // For raw_string_ostream, the patch is done on the target string - // directly and it won't be reflected in the stream's internal buffer. - void patch(ArrayRef<CGDataPatchItem> P) { - using namespace support; - - if (IsFDOStream) { - raw_fd_ostream &FDOStream = static_cast<raw_fd_ostream &>(OS); - const uint64_t LastPos = FDOStream.tell(); - for (const auto &K : P) { - FDOStream.seek(K.Pos); - for (int I = 0; I < K.N; I++) - write(K.D[I]); - } - // Reset the stream to the last position after patching so that users - // don't accidentally overwrite data. This makes it consistent with - // the string stream below which replaces the data directly. - FDOStream.seek(LastPos); - } else { - raw_string_ostream &SOStream = static_cast<raw_string_ostream &>(OS); - std::string &Data = SOStream.str(); // with flush - for (const auto &K : P) { - for (int I = 0; I < K.N; I++) { - uint64_t Bytes = - endian::byte_swap<uint64_t, llvm::endianness::little>(K.D[I]); - Data.replace(K.Pos + I * sizeof(uint64_t), sizeof(uint64_t), - (const char *)&Bytes, sizeof(uint64_t)); - } - } - } - } - - // If \c OS is an instance of \c raw_fd_ostream, this field will be - // true. Otherwise, \c OS will be an raw_string_ostream. - bool IsFDOStream; - raw_ostream &OS; - support::endian::Writer LE; -}; - -} // end namespace llvm - -void CodeGenDataWriter::addRecord(OutlinedHashTreeRecord &Record) { - assert(Record.HashTree && "empty hash tree in the record"); - HashTreeRecord.HashTree = std::move(Record.HashTree); - - DataKind |= CGDataKind::FunctionOutlinedHashTree; -} - -Error CodeGenDataWriter::write(raw_fd_ostream &OS) { - CGDataOStream COS(OS); - return writeImpl(COS); -} - -Error CodeGenDataWriter::writeHeader(CGDataOStream &COS) { - using namespace support; - IndexedCGData::Header Header; - Header.Magic = IndexedCGData::Magic; - Header.Version = IndexedCGData::Version; - - // Set the CGDataKind depending on the kind. - Header.DataKind = 0; - if (static_cast<bool>(DataKind & CGDataKind::FunctionOutlinedHashTree)) - Header.DataKind |= - static_cast<uint32_t>(CGDataKind::FunctionOutlinedHashTree); - - Header.OutlinedHashTreeOffset = 0; - - // Only write up to the CGDataKind. We need to remember the offset of the - // remaining fields to allow back-patching later. - COS.write(Header.Magic); - COS.write32(Header.Version); - COS.write32(Header.DataKind); - - // Save the location of Header.OutlinedHashTreeOffset field in \c COS. - OutlinedHashTreeOffset = COS.tell(); - - // Reserve the space for OutlinedHashTreeOffset field. - COS.write(0); - - return Error::success(); -} - -Error CodeGenDataWriter::writeImpl(CGDataOStream &COS) { - if (Error E = writeHeader(COS)) - return E; - - uint64_t OutlinedHashTreeFieldStart = COS.tell(); - if (hasOutlinedHashTree()) - HashTreeRecord.serialize(COS.OS); - - // Back patch the offsets. - CGDataPatchItem PatchItems[] = { - {OutlinedHashTreeOffset, &OutlinedHashTreeFieldStart, 1}}; - COS.patch(PatchItems); - - return Error::success(); -} - -Error CodeGenDataWriter::writeHeaderText(raw_fd_ostream &OS) { - if (hasOutlinedHashTree()) - OS << "# Outlined stable hash tree\n:outlined_hash_tree\n"; - - // TODO: Add more data types in this header - - return Error::success(); -} - -Error CodeGenDataWriter::writeText(raw_fd_ostream &OS) { - if (Error E = writeHeaderText(OS)) - return E; - - yaml::Output YOS(OS); - if (hasOutlinedHashTree()) - HashTreeRecord.serializeYAML(YOS); - - // TODO: Write more yaml cgdata in order - - return Error::success(); -} diff --git a/llvm/lib/IR/LegacyPassManager.cpp b/llvm/lib/IR/LegacyPassManager.cpp index 9c44eff7953a..01aaedcf7d54 100644 --- a/llvm/lib/IR/LegacyPassManager.cpp +++ b/llvm/lib/IR/LegacyPassManager.cpp @@ -12,7 +12,6 @@ #include "llvm/IR/LegacyPassManager.h" #include "llvm/ADT/MapVector.h" -#include "llvm/Demangle/Demangle.h" #include "llvm/IR/DiagnosticInfo.h" #include "llvm/IR/IRPrintingPasses.h" #include "llvm/IR/LLVMContext.h" @@ -1416,8 +1415,7 @@ bool FPPassManager::runOnFunction(Function &F) { // Store name outside of loop to avoid redundant calls. const StringRef Name = F.getName(); - llvm::TimeTraceScope FunctionScope( - "OptFunction", [&F]() { return demangle(F.getName().str()); }); + llvm::TimeTraceScope FunctionScope("OptFunction", Name); for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) { FunctionPass *FP = getContainedPass(Index); diff --git a/llvm/lib/Passes/StandardInstrumentations.cpp b/llvm/lib/Passes/StandardInstrumentations.cpp index fc7b82d522bf..4eff2deef9ab 100644 --- a/llvm/lib/Passes/StandardInstrumentations.cpp +++ b/llvm/lib/Passes/StandardInstrumentations.cpp @@ -23,7 +23,6 @@ #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineVerifier.h" -#include "llvm/Demangle/Demangle.h" #include "llvm/IR/Constants.h" #include "llvm/IR/Function.h" #include "llvm/IR/Module.h" @@ -236,12 +235,12 @@ void printIR(raw_ostream &OS, const MachineFunction *MF) { MF->print(OS); } -std::string getIRName(Any IR, bool demangled = false) { +std::string getIRName(Any IR) { if (unwrapIR<Module>(IR)) return "[module]"; if (const auto *F = unwrapIR<Function>(IR)) - return demangled ? demangle(F->getName()) : F->getName().str(); + return F->getName().str(); if (const auto *C = unwrapIR<LazyCallGraph::SCC>(IR)) return C->getName(); @@ -251,7 +250,7 @@ std::string getIRName(Any IR, bool demangled = false) { L->getHeader()->getParent()->getName().str(); if (const auto *MF = unwrapIR<MachineFunction>(IR)) - return demangled ? demangle(MF->getName()) : MF->getName().str(); + return MF->getName().str(); llvm_unreachable("Unknown wrapped IR type"); } @@ -1589,7 +1588,7 @@ void TimeProfilingPassesHandler::registerCallbacks( } void TimeProfilingPassesHandler::runBeforePass(StringRef PassID, Any IR) { - timeTraceProfilerBegin(PassID, getIRName(IR, true)); + timeTraceProfilerBegin(PassID, getIRName(IR)); } void TimeProfilingPassesHandler::runAfterPass() { timeTraceProfilerEnd(); } diff --git a/llvm/lib/Support/regcomp.c b/llvm/lib/Support/regcomp.c index 990aef32a396..daa41eb4912e 100644 --- a/llvm/lib/Support/regcomp.c +++ b/llvm/lib/Support/regcomp.c @@ -278,7 +278,7 @@ static char nuls[10]; /* place to point scanner in event of error */ #else #define DUPMAX 255 #endif -#define INFINITY (DUPMAX + 1) +#define REGINFINITY (DUPMAX + 1) #ifndef NDEBUG static int never = 0; /* for use in asserts; shuts lint up */ @@ -582,7 +582,7 @@ p_ere_exp(struct parse *p) count2 = p_count(p); REQUIRE(count <= count2, REG_BADBR); } else /* single number with comma */ - count2 = INFINITY; + count2 = REGINFINITY; } else /* just a single number */ count2 = count; repeat(p, pos, count, count2); @@ -753,7 +753,7 @@ p_simp_re(struct parse *p, count2 = p_count(p); REQUIRE(count <= count2, REG_BADBR); } else /* single number with comma */ - count2 = INFINITY; + count2 = REGINFINITY; } else /* just a single number */ count2 = count; repeat(p, pos, count, count2); @@ -1115,7 +1115,7 @@ repeat(struct parse *p, # define N 2 # define INF 3 # define REP(f, t) ((f)*8 + (t)) -# define MAP(n) (((n) <= 1) ? (n) : ((n) == INFINITY) ? INF : N) +# define MAP(n) (((n) <= 1) ? (n) : ((n) == REGINFINITY) ? INF : N) sopno copy; if (p->error != 0) /* head off possible runaway recursion */ diff --git a/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp b/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp index 3c9b07ad45bf..c64454cc253c 100644 --- a/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp +++ b/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp @@ -1292,6 +1292,13 @@ void AArch64AsmPrinter::emitGlobalAlias(const Module &M, StringRef ExpStr = cast<MDString>(Node->getOperand(0))->getString(); MCSymbol *ExpSym = MMI->getContext().getOrCreateSymbol(ExpStr); MCSymbol *Sym = MMI->getContext().getOrCreateSymbol(GA.getName()); + + OutStreamer->beginCOFFSymbolDef(ExpSym); + OutStreamer->emitCOFFSymbolStorageClass(COFF::IMAGE_SYM_CLASS_EXTERNAL); + OutStreamer->emitCOFFSymbolType(COFF::IMAGE_SYM_DTYPE_FUNCTION + << COFF::SCT_COMPLEX_TYPE_SHIFT); + OutStreamer->endCOFFSymbolDef(); + OutStreamer->beginCOFFSymbolDef(Sym); OutStreamer->emitCOFFSymbolStorageClass(COFF::IMAGE_SYM_CLASS_EXTERNAL); OutStreamer->emitCOFFSymbolType(COFF::IMAGE_SYM_DTYPE_FUNCTION diff --git a/llvm/lib/Target/AArch64/AArch64DeadRegisterDefinitionsPass.cpp b/llvm/lib/Target/AArch64/AArch64DeadRegisterDefinitionsPass.cpp index 2bc14f9821e6..161cf24dd403 100644 --- a/llvm/lib/Target/AArch64/AArch64DeadRegisterDefinitionsPass.cpp +++ b/llvm/lib/Target/AArch64/AArch64DeadRegisterDefinitionsPass.cpp @@ -108,6 +108,10 @@ static bool atomicReadDroppedOnZero(unsigned Opcode) { case AArch64::LDUMINW: case AArch64::LDUMINX: case AArch64::LDUMINLB: case AArch64::LDUMINLH: case AArch64::LDUMINLW: case AArch64::LDUMINLX: + case AArch64::SWPB: case AArch64::SWPH: + case AArch64::SWPW: case AArch64::SWPX: + case AArch64::SWPLB: case AArch64::SWPLH: + case AArch64::SWPLW: case AArch64::SWPLX: return true; } return false; diff --git a/llvm/lib/Target/AArch64/AArch64Features.td b/llvm/lib/Target/AArch64/AArch64Features.td index a1ae0873fc19..a4f8f8c2d962 100644 --- a/llvm/lib/Target/AArch64/AArch64Features.td +++ b/llvm/lib/Target/AArch64/AArch64Features.td @@ -438,6 +438,15 @@ def FeatureSVE2p1: ExtensionWithMArch<"sve2p1", "SVE2p1", "FEAT_SVE2p1", def FeatureB16B16 : ExtensionWithMArch<"b16b16", "B16B16", "FEAT_SVE_B16B16", "Enable SVE2.1 or SME2.1 non-widening BFloat16 to BFloat16 instructions", [FeatureBF16]>; +// FeatureSVEB16B16 and FeatureSMEB16B16 act as aliases for {FeatureB16B16}, and +// {FeatureB16B16, FeatureSME2} respectively. This allows LLVM-20 interfacing programs +// that use '+sve-b16b16' and '+sme-b16b16' to compile in LLVM-19. +def FeatureSVEB16B16 : ExtensionWithMArch<"sve-b16b16", "SVEB16B16", "FEAT_SVE_B16B16", + "Enable SVE2 non-widening and SME2 Z-targeting non-widening BFloat16 instructions", [FeatureB16B16]>; + +def FeatureSMEB16B16 : ExtensionWithMArch<"sme-b16b16", "SMEB16B16", "FEAT_SME_B16B16", + "Enable SME2.1 ZA-targeting non-widening BFloat16 instructions", [FeatureSME2, FeatureB16B16]>; + def FeatureSMEF16F16 : ExtensionWithMArch<"sme-f16f16", "SMEF16F16", "FEAT_SME_F16F16", "Enable SME non-widening Float16 instructions", [FeatureSME2]>; @@ -778,27 +787,26 @@ def HasV8_2aOps : Architecture64<8, 2, "a", "v8.2a", [HasV8_1aOps, FeaturePsUAO, FeaturePAN_RWV, FeatureRAS, FeatureCCPP], !listconcat(HasV8_1aOps.DefaultExts, [FeatureRAS])>; def HasV8_3aOps : Architecture64<8, 3, "a", "v8.3a", - [HasV8_2aOps, FeatureRCPC, FeaturePAuth, FeatureJS, FeatureCCIDX, - FeatureComplxNum], + [HasV8_2aOps, FeatureRCPC, FeaturePAuth, FeatureJS, FeatureComplxNum], !listconcat(HasV8_2aOps.DefaultExts, [FeatureComplxNum, FeatureJS, - FeaturePAuth, FeatureRCPC])>; + FeaturePAuth, FeatureRCPC, FeatureCCIDX])>; def HasV8_4aOps : Architecture64<8, 4, "a", "v8.4a", [HasV8_3aOps, FeatureDotProd, FeatureNV, FeatureMPAM, FeatureDIT, FeatureTRACEV8_4, FeatureAM, FeatureSEL2, FeatureTLB_RMI, FeatureFlagM, FeatureRCPC_IMMO, FeatureLSE2], - !listconcat(HasV8_3aOps.DefaultExts, [FeatureDotProd])>; + !listconcat(HasV8_3aOps.DefaultExts, [FeatureDotProd, FeatureDIT, FeatureFlagM])>; def HasV8_5aOps : Architecture64<8, 5, "a", "v8.5a", [HasV8_4aOps, FeatureAltFPCmp, FeatureFRInt3264, FeatureSpecRestrict, - FeatureSSBS, FeatureSB, FeaturePredRes, FeatureCacheDeepPersist, + FeatureSB, FeaturePredRes, FeatureCacheDeepPersist, FeatureBranchTargetId], - !listconcat(HasV8_4aOps.DefaultExts, [])>; + !listconcat(HasV8_4aOps.DefaultExts, [FeaturePredRes, FeatureSSBS, FeatureBranchTargetId, FeatureSB])>; def HasV8_6aOps : Architecture64<8, 6, "a", "v8.6a", [HasV8_5aOps, FeatureAMVS, FeatureBF16, FeatureFineGrainedTraps, FeatureEnhancedCounterVirtualization, FeatureMatMulInt8], !listconcat(HasV8_5aOps.DefaultExts, [FeatureBF16, FeatureMatMulInt8])>; def HasV8_7aOps : Architecture64<8, 7, "a", "v8.7a", [HasV8_6aOps, FeatureXS, FeatureWFxT, FeatureHCX], - !listconcat(HasV8_6aOps.DefaultExts, [])>; + !listconcat(HasV8_6aOps.DefaultExts, [FeatureWFxT])>; def HasV8_8aOps : Architecture64<8, 8, "a", "v8.8a", [HasV8_7aOps, FeatureHBC, FeatureMOPS, FeatureNMI], !listconcat(HasV8_7aOps.DefaultExts, [FeatureMOPS, FeatureHBC])>; @@ -816,7 +824,7 @@ def HasV9_1aOps : Architecture64<9, 1, "a", "v9.1a", !listconcat(HasV9_0aOps.DefaultExts, [FeatureBF16, FeatureMatMulInt8, FeatureRME])>; def HasV9_2aOps : Architecture64<9, 2, "a", "v9.2a", [HasV8_7aOps, HasV9_1aOps], - !listconcat(HasV9_1aOps.DefaultExts, [FeatureMEC])>; + !listconcat(HasV9_1aOps.DefaultExts, [FeatureMEC, FeatureWFxT])>; def HasV9_3aOps : Architecture64<9, 3, "a", "v9.3a", [HasV8_8aOps, HasV9_2aOps], !listconcat(HasV9_2aOps.DefaultExts, [FeatureMOPS, FeatureHBC])>; @@ -833,7 +841,7 @@ def HasV8_0rOps : Architecture64<8, 0, "r", "v8r", //v8.2 FeatureRAS, FeaturePsUAO, FeatureCCPP, FeaturePAN_RWV, //v8.3 - FeatureCCIDX, FeaturePAuth, FeatureRCPC, + FeaturePAuth, FeatureRCPC, //v8.4 FeatureTRACEV8_4, FeatureTLB_RMI, FeatureFlagM, FeatureDIT, FeatureSEL2, FeatureRCPC_IMMO, @@ -844,7 +852,7 @@ def HasV8_0rOps : Architecture64<8, 0, "r", "v8r", // For v8-R, we do not enable crypto and align with GCC that enables a more // minimal set of optional architecture extensions. !listconcat( - !listremove(HasV8_5aOps.DefaultExts, [FeatureLSE]), + !listremove(HasV8_5aOps.DefaultExts, [FeatureBranchTargetId, FeaturePredRes]), [FeatureSSBS, FeatureFullFP16, FeatureFP16FML, FeatureSB] )>; diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp index bd530903bb66..ba46ededc63a 100644 --- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp @@ -240,6 +240,7 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/FormatVariadic.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetMachine.h" @@ -275,6 +276,10 @@ cl::opt<bool> EnableHomogeneousPrologEpilog( // Stack hazard padding size. 0 = disabled. static cl::opt<unsigned> StackHazardSize("aarch64-stack-hazard-size", cl::init(0), cl::Hidden); +// Stack hazard size for analysis remarks. StackHazardSize takes precedence. +static cl::opt<unsigned> + StackHazardRemarkSize("aarch64-stack-hazard-remark-size", cl::init(0), + cl::Hidden); // Whether to insert padding into non-streaming functions (for testing). static cl::opt<bool> StackHazardInNonStreaming("aarch64-stack-hazard-in-non-streaming", @@ -2615,9 +2620,16 @@ AArch64FrameLowering::getFrameIndexReferenceFromSP(const MachineFunction &MF, const auto &MFI = MF.getFrameInfo(); int64_t ObjectOffset = MFI.getObjectOffset(FI); + StackOffset SVEStackSize = getSVEStackSize(MF); + + // For VLA-area objects, just emit an offset at the end of the stack frame. + // Whilst not quite correct, these objects do live at the end of the frame and + // so it is more useful for analysis for the offset to reflect this. + if (MFI.isVariableSizedObjectIndex(FI)) { + return StackOffset::getFixed(-((int64_t)MFI.getStackSize())) - SVEStackSize; + } // This is correct in the absence of any SVE stack objects. - StackOffset SVEStackSize = getSVEStackSize(MF); if (!SVEStackSize) return StackOffset::getFixed(ObjectOffset - getOffsetOfLocalArea()); @@ -3528,13 +3540,9 @@ bool AArch64FrameLowering::restoreCalleeSavedRegisters( return true; } -// Return the FrameID for a Load/Store instruction by looking at the MMO. -static std::optional<int> getLdStFrameID(const MachineInstr &MI, - const MachineFrameInfo &MFI) { - if (!MI.mayLoadOrStore() || MI.getNumMemOperands() < 1) - return std::nullopt; - - MachineMemOperand *MMO = *MI.memoperands_begin(); +// Return the FrameID for a MMO. +static std::optional<int> getMMOFrameID(MachineMemOperand *MMO, + const MachineFrameInfo &MFI) { auto *PSV = dyn_cast_or_null<FixedStackPseudoSourceValue>(MMO->getPseudoValue()); if (PSV) @@ -3552,6 +3560,15 @@ static std::optional<int> getLdStFrameID(const MachineInstr &MI, return std::nullopt; } +// Return the FrameID for a Load/Store instruction by looking at the first MMO. +static std::optional<int> getLdStFrameID(const MachineInstr &MI, + const MachineFrameInfo &MFI) { + if (!MI.mayLoadOrStore() || MI.getNumMemOperands() < 1) + return std::nullopt; + + return getMMOFrameID(*MI.memoperands_begin(), MFI); +} + // Check if a Hazard slot is needed for the current function, and if so create // one for it. The index is stored in AArch64FunctionInfo->StackHazardSlotIndex, // which can be used to determine if any hazard padding is needed. @@ -5029,3 +5046,174 @@ void AArch64FrameLowering::inlineStackProbe(MachineFunction &MF, MI->eraseFromParent(); } } + +struct StackAccess { + enum AccessType { + NotAccessed = 0, // Stack object not accessed by load/store instructions. + GPR = 1 << 0, // A general purpose register. + PPR = 1 << 1, // A predicate register. + FPR = 1 << 2, // A floating point/Neon/SVE register. + }; + + int Idx; + StackOffset Offset; + int64_t Size; + unsigned AccessTypes; + + StackAccess() : Idx(0), Offset(), Size(0), AccessTypes(NotAccessed) {} + + bool operator<(const StackAccess &Rhs) const { + return std::make_tuple(start(), Idx) < + std::make_tuple(Rhs.start(), Rhs.Idx); + } + + bool isCPU() const { + // Predicate register load and store instructions execute on the CPU. + return AccessTypes & (AccessType::GPR | AccessType::PPR); + } + bool isSME() const { return AccessTypes & AccessType::FPR; } + bool isMixed() const { return isCPU() && isSME(); } + + int64_t start() const { return Offset.getFixed() + Offset.getScalable(); } + int64_t end() const { return start() + Size; } + + std::string getTypeString() const { + switch (AccessTypes) { + case AccessType::FPR: + return "FPR"; + case AccessType::PPR: + return "PPR"; + case AccessType::GPR: + return "GPR"; + case AccessType::NotAccessed: + return "NA"; + default: + return "Mixed"; + } + } + + void print(raw_ostream &OS) const { + OS << getTypeString() << " stack object at [SP" + << (Offset.getFixed() < 0 ? "" : "+") << Offset.getFixed(); + if (Offset.getScalable()) + OS << (Offset.getScalable() < 0 ? "" : "+") << Offset.getScalable() + << " * vscale"; + OS << "]"; + } +}; + +static inline raw_ostream &operator<<(raw_ostream &OS, const StackAccess &SA) { + SA.print(OS); + return OS; +} + +void AArch64FrameLowering::emitRemarks( + const MachineFunction &MF, MachineOptimizationRemarkEmitter *ORE) const { + + SMEAttrs Attrs(MF.getFunction()); + if (Attrs.hasNonStreamingInterfaceAndBody()) + return; + + const uint64_t HazardSize = + (StackHazardSize) ? StackHazardSize : StackHazardRemarkSize; + + if (HazardSize == 0) + return; + + const MachineFrameInfo &MFI = MF.getFrameInfo(); + // Bail if function has no stack objects. + if (!MFI.hasStackObjects()) + return; + + std::vector<StackAccess> StackAccesses(MFI.getNumObjects()); + + size_t NumFPLdSt = 0; + size_t NumNonFPLdSt = 0; + + // Collect stack accesses via Load/Store instructions. + for (const MachineBasicBlock &MBB : MF) { + for (const MachineInstr &MI : MBB) { + if (!MI.mayLoadOrStore() || MI.getNumMemOperands() < 1) + continue; + for (MachineMemOperand *MMO : MI.memoperands()) { + std::optional<int> FI = getMMOFrameID(MMO, MFI); + if (FI && !MFI.isDeadObjectIndex(*FI)) { + int FrameIdx = *FI; + + size_t ArrIdx = FrameIdx + MFI.getNumFixedObjects(); + if (StackAccesses[ArrIdx].AccessTypes == StackAccess::NotAccessed) { + StackAccesses[ArrIdx].Idx = FrameIdx; + StackAccesses[ArrIdx].Offset = + getFrameIndexReferenceFromSP(MF, FrameIdx); + StackAccesses[ArrIdx].Size = MFI.getObjectSize(FrameIdx); + } + + unsigned RegTy = StackAccess::AccessType::GPR; + if (MFI.getStackID(FrameIdx) == TargetStackID::ScalableVector) { + if (AArch64::PPRRegClass.contains(MI.getOperand(0).getReg())) + RegTy = StackAccess::PPR; + else + RegTy = StackAccess::FPR; + } else if (AArch64InstrInfo::isFpOrNEON(MI)) { + RegTy = StackAccess::FPR; + } + + StackAccesses[ArrIdx].AccessTypes |= RegTy; + + if (RegTy == StackAccess::FPR) + ++NumFPLdSt; + else + ++NumNonFPLdSt; + } + } + } + } + + if (NumFPLdSt == 0 || NumNonFPLdSt == 0) + return; + + llvm::sort(StackAccesses); + StackAccesses.erase(llvm::remove_if(StackAccesses, + [](const StackAccess &S) { + return S.AccessTypes == + StackAccess::NotAccessed; + }), + StackAccesses.end()); + + SmallVector<const StackAccess *> MixedObjects; + SmallVector<std::pair<const StackAccess *, const StackAccess *>> HazardPairs; + + if (StackAccesses.front().isMixed()) + MixedObjects.push_back(&StackAccesses.front()); + + for (auto It = StackAccesses.begin(), End = std::prev(StackAccesses.end()); + It != End; ++It) { + const auto &First = *It; + const auto &Second = *(It + 1); + + if (Second.isMixed()) + MixedObjects.push_back(&Second); + + if ((First.isSME() && Second.isCPU()) || + (First.isCPU() && Second.isSME())) { + uint64_t Distance = static_cast<uint64_t>(Second.start() - First.end()); + if (Distance < HazardSize) + HazardPairs.emplace_back(&First, &Second); + } + } + + auto EmitRemark = [&](llvm::StringRef Str) { + ORE->emit([&]() { + auto R = MachineOptimizationRemarkAnalysis( + "sme", "StackHazard", MF.getFunction().getSubprogram(), &MF.front()); + return R << formatv("stack hazard in '{0}': ", MF.getName()).str() << Str; + }); + }; + + for (const auto &P : HazardPairs) + EmitRemark(formatv("{0} is too close to {1}", *P.first, *P.second).str()); + + for (const auto *Obj : MixedObjects) + EmitRemark( + formatv("{0} accessed by both GP and FP instructions", *Obj).str()); +} diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.h b/llvm/lib/Target/AArch64/AArch64FrameLowering.h index 0ebab1700e9c..c19731249620 100644 --- a/llvm/lib/Target/AArch64/AArch64FrameLowering.h +++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.h @@ -13,8 +13,9 @@ #ifndef LLVM_LIB_TARGET_AARCH64_AARCH64FRAMELOWERING_H #define LLVM_LIB_TARGET_AARCH64_AARCH64FRAMELOWERING_H -#include "llvm/Support/TypeSize.h" +#include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h" #include "llvm/CodeGen/TargetFrameLowering.h" +#include "llvm/Support/TypeSize.h" namespace llvm { @@ -178,6 +179,9 @@ private: inlineStackProbeLoopExactMultiple(MachineBasicBlock::iterator MBBI, int64_t NegProbeSize, Register TargetReg) const; + + void emitRemarks(const MachineFunction &MF, + MachineOptimizationRemarkEmitter *ORE) const override; }; } // End llvm namespace diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 6d413a09407a..62078822c89b 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -17719,6 +17719,9 @@ static SDValue performVecReduceAddCombineWithUADDLP(SDNode *N, // and generate vecreduce.add(concat_vector(DOT, DOT2, ..)). static SDValue performVecReduceAddCombine(SDNode *N, SelectionDAG &DAG, const AArch64Subtarget *ST) { + if (!ST->isNeonAvailable()) + return SDValue(); + if (!ST->hasDotProd()) return performVecReduceAddCombineWithUADDLP(N, DAG); diff --git a/llvm/lib/Target/AArch64/AArch64Processors.td b/llvm/lib/Target/AArch64/AArch64Processors.td index 71384a23c49a..6df87fc6a815 100644 --- a/llvm/lib/Target/AArch64/AArch64Processors.td +++ b/llvm/lib/Target/AArch64/AArch64Processors.td @@ -688,6 +688,7 @@ def ProcessorFeatures { FeatureMatMulInt8, FeatureBF16, FeatureAM, FeatureMTE, FeatureETE, FeatureSVE2BitPerm, FeatureFP16FML, + FeatureCCIDX, FeatureSB, FeaturePAuth, FeatureSSBS, FeatureSVE, FeatureSVE2, FeatureComplxNum, FeatureCRC, FeatureDotProd, FeatureFPARMv8,FeatureFullFP16, FeatureJS, FeatureLSE, @@ -695,6 +696,7 @@ def ProcessorFeatures { list<SubtargetFeature> A520 = [HasV9_2aOps, FeaturePerfMon, FeatureAM, FeatureMTE, FeatureETE, FeatureSVE2BitPerm, FeatureFP16FML, + FeatureCCIDX, FeatureSB, FeatureSSBS, FeaturePAuth, FeatureFlagM, FeaturePredRes, FeatureSVE, FeatureSVE2, FeatureBF16, FeatureComplxNum, FeatureCRC, FeatureFPARMv8, FeatureFullFP16, FeatureMatMulInt8, FeatureJS, @@ -703,6 +705,7 @@ def ProcessorFeatures { list<SubtargetFeature> A520AE = [HasV9_2aOps, FeaturePerfMon, FeatureAM, FeatureMTE, FeatureETE, FeatureSVE2BitPerm, FeatureFP16FML, + FeatureCCIDX, FeatureSB, FeatureSSBS, FeaturePAuth, FeatureFlagM, FeaturePredRes, FeatureSVE, FeatureSVE2, FeatureBF16, FeatureComplxNum, FeatureCRC, FeatureFPARMv8, FeatureFullFP16, FeatureMatMulInt8, FeatureJS, @@ -734,12 +737,14 @@ def ProcessorFeatures { FeaturePerfMon, FeatureRCPC, FeatureSPE, FeatureSSBS, FeatureCRC, FeatureLSE, FeatureRAS, FeatureRDM]; list<SubtargetFeature> A710 = [HasV9_0aOps, FeatureNEON, FeaturePerfMon, + FeatureCCIDX, FeatureSSBS, FeatureETE, FeatureMTE, FeatureFP16FML, FeatureSVE2BitPerm, FeatureBF16, FeatureMatMulInt8, FeaturePAuth, FeatureFlagM, FeatureSB, FeatureSVE, FeatureSVE2, FeatureComplxNum, FeatureCRC, FeatureDotProd, FeatureFPARMv8, FeatureFullFP16, FeatureJS, FeatureLSE, FeatureRAS, FeatureRCPC, FeatureRDM]; list<SubtargetFeature> A715 = [HasV9_0aOps, FeatureNEON, FeatureMTE, + FeatureCCIDX, FeatureFP16FML, FeatureSVE, FeatureTRBE, FeatureSVE2BitPerm, FeatureBF16, FeatureETE, FeaturePerfMon, FeatureMatMulInt8, FeatureSPE, @@ -749,6 +754,7 @@ def ProcessorFeatures { FeatureJS, FeatureLSE, FeatureRAS, FeatureRCPC, FeatureRDM]; list<SubtargetFeature> A720 = [HasV9_2aOps, FeatureMTE, FeatureFP16FML, + FeatureCCIDX, FeatureTRBE, FeatureSVE2BitPerm, FeatureETE, FeaturePerfMon, FeatureSPE, FeatureSPE_EEF, FeatureSB, FeatureSSBS, FeaturePAuth, FeatureFlagM, FeaturePredRes, @@ -757,6 +763,7 @@ def ProcessorFeatures { FeatureJS, FeatureLSE, FeatureNEON, FeatureRAS, FeatureRCPC, FeatureRDM]; list<SubtargetFeature> A720AE = [HasV9_2aOps, FeatureMTE, FeatureFP16FML, + FeatureCCIDX, FeatureTRBE, FeatureSVE2BitPerm, FeatureETE, FeaturePerfMon, FeatureSPE, FeatureSPE_EEF, FeatureSB, FeatureSSBS, FeaturePAuth, FeatureFlagM, FeaturePredRes, @@ -765,6 +772,7 @@ def ProcessorFeatures { FeatureJS, FeatureLSE, FeatureNEON, FeatureRAS, FeatureRCPC, FeatureRDM]; list<SubtargetFeature> A725 = [HasV9_2aOps, FeatureMTE, FeatureFP16FML, + FeatureCCIDX, FeatureETE, FeaturePerfMon, FeatureSPE, FeatureSVE2BitPerm, FeatureSPE_EEF, FeatureTRBE, FeatureFlagM, FeaturePredRes, FeatureSB, FeatureSSBS, @@ -800,6 +808,7 @@ def ProcessorFeatures { FeatureMatMulInt8, FeatureBF16, FeatureAM, FeatureMTE, FeatureETE, FeatureSVE2BitPerm, FeatureFP16FML, + FeatureCCIDX, FeaturePAuth, FeatureSSBS, FeatureSB, FeatureSVE, FeatureSVE2, FeatureFlagM, FeatureComplxNum, FeatureCRC, FeatureDotProd, FeatureFPARMv8, FeatureFullFP16, FeatureJS, FeatureLSE, FeatureRAS, FeatureRCPC, FeatureRDM]; @@ -808,6 +817,7 @@ def ProcessorFeatures { FeatureSPE, FeatureBF16, FeatureMatMulInt8, FeatureMTE, FeatureSVE2BitPerm, FeatureFullFP16, FeatureFP16FML, + FeatureCCIDX, FeatureSB, FeaturePAuth, FeaturePredRes, FeatureFlagM, FeatureSSBS, FeatureSVE2, FeatureComplxNum, FeatureCRC, FeatureFPARMv8, FeatureJS, FeatureLSE, FeatureRAS, FeatureRCPC, FeatureRDM, FeatureDotProd]; @@ -815,11 +825,13 @@ def ProcessorFeatures { FeaturePerfMon, FeatureETE, FeatureTRBE, FeatureSPE, FeatureMTE, FeatureSVE2BitPerm, FeatureFP16FML, FeatureSPE_EEF, + FeatureCCIDX, FeatureSB, FeatureSSBS, FeaturePAuth, FeatureFlagM, FeaturePredRes, FeatureSVE, FeatureSVE2, FeatureComplxNum, FeatureCRC, FeatureDotProd, FeatureFPARMv8, FeatureFullFP16, FeatureMatMulInt8, FeatureJS, FeatureLSE, FeatureNEON, FeatureRAS, FeatureRCPC, FeatureRDM, FeatureBF16]; list<SubtargetFeature> X925 = [HasV9_2aOps, FeatureMTE, FeatureFP16FML, + FeatureCCIDX, FeatureETE, FeaturePerfMon, FeatureSPE, FeatureSVE2BitPerm, FeatureSPE_EEF, FeatureTRBE, FeatureFlagM, FeaturePredRes, FeatureSB, FeatureSSBS, @@ -863,23 +875,26 @@ def ProcessorFeatures { list<SubtargetFeature> AppleA15 = [HasV8_6aOps, FeatureSHA2, FeatureAES, FeatureFPARMv8, FeatureNEON, FeaturePerfMon, FeatureSHA3, FeatureFullFP16, FeatureFP16FML, - FeatureComplxNum, FeatureCRC, FeatureJS, FeatureLSE, - FeaturePAuth, FeatureRAS, FeatureRCPC, FeatureRDM, - FeatureBF16, FeatureDotProd, FeatureMatMulInt8]; + FeatureComplxNum, FeatureCRC, FeatureJS, + FeatureLSE, FeaturePAuth, + FeatureRAS, FeatureRCPC, FeatureRDM, + FeatureBF16, FeatureDotProd, FeatureMatMulInt8, FeatureSSBS]; list<SubtargetFeature> AppleA16 = [HasV8_6aOps, FeatureSHA2, FeatureAES, FeatureFPARMv8, FeatureNEON, FeaturePerfMon, FeatureSHA3, FeatureFullFP16, FeatureFP16FML, FeatureHCX, - FeatureComplxNum, FeatureCRC, FeatureJS, FeatureLSE, - FeaturePAuth, FeatureRAS, FeatureRCPC, FeatureRDM, - FeatureBF16, FeatureDotProd, FeatureMatMulInt8]; + FeatureComplxNum, FeatureCRC, FeatureJS, + FeatureLSE, FeaturePAuth, + FeatureRAS, FeatureRCPC, FeatureRDM, + FeatureBF16, FeatureDotProd, FeatureMatMulInt8, FeatureSSBS]; list<SubtargetFeature> AppleA17 = [HasV8_6aOps, FeatureSHA2, FeatureAES, FeatureFPARMv8, FeatureNEON, FeaturePerfMon, FeatureSHA3, FeatureFullFP16, FeatureFP16FML, FeatureHCX, - FeatureComplxNum, FeatureCRC, FeatureJS, FeatureLSE, - FeaturePAuth, FeatureRAS, FeatureRCPC, FeatureRDM, - FeatureBF16, FeatureDotProd, FeatureMatMulInt8]; + FeatureComplxNum, FeatureCRC, FeatureJS, + FeatureLSE, FeaturePAuth, + FeatureRAS, FeatureRCPC, FeatureRDM, + FeatureBF16, FeatureDotProd, FeatureMatMulInt8, FeatureSSBS]; list<SubtargetFeature> AppleM4 = [HasV9_2aOps, FeatureSHA2, FeatureFPARMv8, FeatureNEON, FeaturePerfMon, FeatureSHA3, FeatureFullFP16, FeatureFP16FML, @@ -909,6 +924,7 @@ def ProcessorFeatures { FeatureMatMulInt8, FeatureMTE, FeatureSVE2, FeatureSVE2BitPerm, FeatureTRBE, FeaturePerfMon, + FeatureCCIDX, FeatureDotProd, FeatureFullFP16, FeatureSB, FeatureSSBS, FeatureSVE, FeatureComplxNum, FeatureCRC, FeatureFPARMv8, FeatureJS, FeatureLSE, FeatureNEON, FeaturePAuth, FeatureRAS, FeatureRCPC, FeatureRDM]; @@ -916,6 +932,7 @@ def ProcessorFeatures { FeatureFullFP16, FeatureMTE, FeaturePerfMon, FeatureRandGen, FeatureSPE, FeatureSPE_EEF, FeatureSVE2BitPerm, + FeatureCCIDX, FeatureSSBS, FeatureSB, FeaturePredRes, FeaturePAuth, FeatureFlagM, FeatureSVE, FeatureSVE2, FeatureBF16, FeatureComplxNum, FeatureCRC, FeatureDotProd, FeatureFPARMv8, FeatureMatMulInt8, @@ -926,6 +943,7 @@ def ProcessorFeatures { FeatureFullFP16, FeatureMatMulInt8, FeatureNEON, FeaturePerfMon, FeatureRandGen, FeatureSPE, FeatureSSBS, FeatureSVE, + FeatureCCIDX, FeatureSHA3, FeatureSM4, FeatureDotProd, FeatureComplxNum, FeatureCRC, FeatureJS, FeatureLSE, FeaturePAuth, FeatureRAS, FeatureRCPC, FeatureRDM]; @@ -934,6 +952,7 @@ def ProcessorFeatures { FeatureFullFP16, FeatureMatMulInt8, FeatureNEON, FeaturePerfMon, FeatureRandGen, FeatureSPE, FeatureSSBS, FeatureSVE, + FeatureCCIDX, FeatureSHA3, FeatureSM4, FeatureDotProd, FeatureComplxNum, FeatureCRC, FeatureJS, FeatureLSE, FeaturePAuth, FeatureRAS, FeatureRCPC, FeatureRDM]; @@ -941,12 +960,14 @@ def ProcessorFeatures { FeaturePerfMon, FeatureETE, FeatureMatMulInt8, FeatureNEON, FeatureSVE2BitPerm, FeatureFP16FML, FeatureMTE, FeatureRandGen, + FeatureCCIDX, FeatureSVE, FeatureSVE2, FeatureSSBS, FeatureFullFP16, FeatureDotProd, FeatureComplxNum, FeatureCRC, FeatureFPARMv8, FeatureJS, FeatureLSE, FeaturePAuth, FeatureRAS, FeatureRCPC, FeatureRDM]; list<SubtargetFeature> NeoverseV3 = [HasV9_2aOps, FeatureETE, FeatureFP16FML, FeatureFullFP16, FeatureLS64, FeatureMTE, FeaturePerfMon, FeatureRandGen, FeatureSPE, + FeatureCCIDX, FeatureSPE_EEF, FeatureSVE2BitPerm, FeatureBRBE, FeatureSSBS, FeatureSB, FeaturePredRes, FeaturePAuth, FeatureFlagM, FeatureSVE, FeatureSVE2, FeatureBF16, FeatureComplxNum, FeatureCRC, @@ -957,12 +978,14 @@ def ProcessorFeatures { FeaturePerfMon, FeatureRandGen, FeatureSPE, FeatureSPE_EEF, FeatureSVE2BitPerm, FeatureBRBE, FeatureSSBS, FeatureSB, FeaturePredRes, FeaturePAuth, FeatureFlagM, + FeatureCCIDX, FeatureSVE, FeatureSVE2, FeatureBF16, FeatureComplxNum, FeatureCRC, FeatureDotProd, FeatureFPARMv8, FeatureMatMulInt8, FeatureJS, FeatureLSE, FeatureNEON, FeatureRAS, FeatureRCPC, FeatureRDM, FeatureRME]; list<SubtargetFeature> Saphira = [HasV8_4aOps, FeatureSHA2, FeatureAES, FeatureFPARMv8, FeatureNEON, FeatureSPE, FeaturePerfMon, FeatureCRC, + FeatureCCIDX, FeatureLSE, FeatureRDM, FeatureRAS, FeatureRCPC]; list<SubtargetFeature> ThunderX = [HasV8_0aOps, FeatureCRC, FeatureSHA2, FeatureAES, FeatureFPARMv8, FeaturePerfMon, FeatureNEON]; @@ -971,6 +994,7 @@ def ProcessorFeatures { FeatureRDM]; list<SubtargetFeature> ThunderX3T110 = [HasV8_3aOps, FeatureCRC, FeatureSHA2, FeatureAES, FeatureFPARMv8, FeatureNEON, FeatureLSE, + FeatureCCIDX, FeaturePAuth, FeaturePerfMon, FeatureComplxNum, FeatureJS, FeatureRAS, FeatureRCPC, FeatureRDM]; list<SubtargetFeature> TSV110 = [HasV8_2aOps, FeatureSHA2, FeatureAES, FeatureFPARMv8, @@ -983,6 +1007,7 @@ def ProcessorFeatures { FeatureSHA2, FeatureSHA3, FeatureAES, FeatureFullFP16, FeatureBF16, FeatureComplxNum, FeatureCRC, FeatureDotProd, FeatureFPARMv8, FeatureMatMulInt8, FeatureJS, + FeatureCCIDX, FeatureLSE, FeaturePAuth, FeatureRAS, FeatureRCPC, FeatureRDM]; list<SubtargetFeature> Ampere1A = [HasV8_6aOps, FeatureNEON, FeaturePerfMon, FeatureMTE, FeatureSSBS, FeatureRandGen, @@ -991,6 +1016,7 @@ def ProcessorFeatures { FeatureFullFP16, FeatureBF16, FeatureComplxNum, FeatureCRC, FeatureDotProd, FeatureFPARMv8, FeatureMatMulInt8, FeatureJS, FeatureLSE, FeaturePAuth, FeatureRAS, FeatureRCPC, + FeatureCCIDX, FeatureRDM]; list<SubtargetFeature> Ampere1B = [HasV8_7aOps, FeatureNEON, FeaturePerfMon, FeatureMTE, FeatureSSBS, FeatureRandGen, @@ -999,6 +1025,7 @@ def ProcessorFeatures { FeatureWFxT, FeatureFullFP16, FeatureBF16, FeatureComplxNum, FeatureCRC, FeatureDotProd, FeatureFPARMv8, FeatureMatMulInt8, FeatureJS, FeatureLSE, FeaturePAuth, FeatureRAS, FeatureRCPC, + FeatureCCIDX, FeatureRDM]; list<SubtargetFeature> Oryon = [HasV8_6aOps, FeatureNEON, FeaturePerfMon, @@ -1007,6 +1034,7 @@ def ProcessorFeatures { FeatureSHA3, FeatureAES, FeatureSPE, FeatureBF16, FeatureComplxNum, FeatureCRC, FeatureDotProd, FeatureFPARMv8, FeatureMatMulInt8, + FeatureSSBS, FeatureCCIDX, FeatureJS, FeatureLSE, FeatureRAS, FeatureRCPC, FeatureRDM]; // ETE and TRBE are future architecture extensions. We temporarily enable them diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp index 45148449dfb8..39fba6a257bb 100644 --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -254,7 +254,8 @@ bool AArch64TTIImpl::areInlineCompatible(const Function *Caller, return false; if (CallerAttrs.requiresLazySave(CalleeAttrs) || - CallerAttrs.requiresSMChange(CalleeAttrs)) { + CallerAttrs.requiresSMChange(CalleeAttrs) || + CallerAttrs.requiresPreservingZT0(CalleeAttrs)) { if (hasPossibleIncompatibleOps(Callee)) return false; } @@ -540,7 +541,15 @@ static InstructionCost getHistogramCost(const IntrinsicCostAttributes &ICA) { InstructionCost AArch64TTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind) { + // The code-generator is currently not able to handle scalable vectors + // of <vscale x 1 x eltty> yet, so return an invalid cost to avoid selecting + // it. This change will be removed when code-generation for these types is + // sufficiently reliable. auto *RetTy = ICA.getReturnType(); + if (auto *VTy = dyn_cast<ScalableVectorType>(RetTy)) + if (VTy->getElementCount() == ElementCount::getScalable(1)) + return InstructionCost::getInvalid(); + switch (ICA.getID()) { case Intrinsic::experimental_vector_histogram_add: if (!ST->hasSVE2()) @@ -2295,6 +2304,11 @@ std::optional<Value *> AArch64TTIImpl::simplifyDemandedVectorEltsIntrinsic( return std::nullopt; } +bool AArch64TTIImpl::enableScalableVectorization() const { + return ST->isSVEAvailable() || (ST->isSVEorStreamingSVEAvailable() && + EnableScalableAutovecInStreamingMode); +} + TypeSize AArch64TTIImpl::getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const { switch (K) { @@ -3018,6 +3032,14 @@ InstructionCost AArch64TTIImpl::getArithmeticInstrCost( ArrayRef<const Value *> Args, const Instruction *CxtI) { + // The code-generator is currently not able to handle scalable vectors + // of <vscale x 1 x eltty> yet, so return an invalid cost to avoid selecting + // it. This change will be removed when code-generation for these types is + // sufficiently reliable. + if (auto *VTy = dyn_cast<ScalableVectorType>(Ty)) + if (VTy->getElementCount() == ElementCount::getScalable(1)) + return InstructionCost::getInvalid(); + // TODO: Handle more cost kinds. if (CostKind != TTI::TCK_RecipThroughput) return BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Op1Info, @@ -3792,6 +3814,14 @@ InstructionCost AArch64TTIImpl::getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF, TTI::TargetCostKind CostKind) { + // The code-generator is currently not able to handle scalable vectors + // of <vscale x 1 x eltty> yet, so return an invalid cost to avoid selecting + // it. This change will be removed when code-generation for these types is + // sufficiently reliable. + if (auto *VTy = dyn_cast<ScalableVectorType>(Ty)) + if (VTy->getElementCount() == ElementCount::getScalable(1)) + return InstructionCost::getInvalid(); + std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(Ty); if (LT.second.getScalarType() == MVT::f16 && !ST->hasFullFP16()) @@ -3836,6 +3866,14 @@ InstructionCost AArch64TTIImpl::getArithmeticReductionCost(unsigned Opcode, VectorType *ValTy, std::optional<FastMathFlags> FMF, TTI::TargetCostKind CostKind) { + // The code-generator is currently not able to handle scalable vectors + // of <vscale x 1 x eltty> yet, so return an invalid cost to avoid selecting + // it. This change will be removed when code-generation for these types is + // sufficiently reliable. + if (auto *VTy = dyn_cast<ScalableVectorType>(ValTy)) + if (VTy->getElementCount() == ElementCount::getScalable(1)) + return InstructionCost::getInvalid(); + if (TTI::requiresOrderedReduction(FMF)) { if (auto *FixedVTy = dyn_cast<FixedVectorType>(ValTy)) { InstructionCost BaseCost = diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h index a9189fd53f40..4a6457d7a7db 100644 --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h @@ -381,7 +381,7 @@ public: return ST->isSVEorStreamingSVEAvailable(); } - bool enableScalableVectorization() const { return ST->isSVEAvailable(); } + bool enableScalableVectorization() const; bool isLegalToVectorizeReduction(const RecurrenceDescriptor &RdxDesc, ElementCount VF) const; diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp index 9b2cab2eb73a..32ecf350db59 100644 --- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp +++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp @@ -1581,7 +1581,18 @@ bool SIFoldOperands::tryFoldClamp(MachineInstr &MI) { // Clamp is applied after omod, so it is OK if omod is set. DefClamp->setImm(1); - MRI->replaceRegWith(MI.getOperand(0).getReg(), Def->getOperand(0).getReg()); + + Register DefReg = Def->getOperand(0).getReg(); + Register MIDstReg = MI.getOperand(0).getReg(); + if (TRI->isSGPRReg(*MRI, DefReg)) { + // Pseudo scalar instructions have a SGPR for dst and clamp is a v_max* + // instruction with a VGPR dst. + BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), TII->get(AMDGPU::COPY), + MIDstReg) + .addReg(DefReg); + } else { + MRI->replaceRegWith(MIDstReg, DefReg); + } MI.eraseFromParent(); // Use of output modifiers forces VOP3 encoding for a VOP2 mac/fmac diff --git a/llvm/lib/Target/ARM/ARMCallLowering.cpp b/llvm/lib/Target/ARM/ARMCallLowering.cpp index 9cc162d041f4..883808ae981f 100644 --- a/llvm/lib/Target/ARM/ARMCallLowering.cpp +++ b/llvm/lib/Target/ARM/ARMCallLowering.cpp @@ -50,6 +50,13 @@ using namespace llvm; +// Whether Big-endian GISel is enabled, defaults to off, can be enabled for +// testing. +static cl::opt<bool> + EnableGISelBigEndian("enable-arm-gisel-bigendian", cl::Hidden, + cl::init(false), + cl::desc("Enable Global-ISel Big Endian Lowering")); + ARMCallLowering::ARMCallLowering(const ARMTargetLowering &TLI) : CallLowering(&TLI) {} @@ -539,3 +546,5 @@ bool ARMCallLowering::lowerCall(MachineIRBuilder &MIRBuilder, CallLoweringInfo & return true; } + +bool ARMCallLowering::enableBigEndian() const { return EnableGISelBigEndian; }
\ No newline at end of file diff --git a/llvm/lib/Target/ARM/ARMCallLowering.h b/llvm/lib/Target/ARM/ARMCallLowering.h index 38095617fb4f..32c95a044d7b 100644 --- a/llvm/lib/Target/ARM/ARMCallLowering.h +++ b/llvm/lib/Target/ARM/ARMCallLowering.h @@ -42,6 +42,8 @@ public: bool lowerCall(MachineIRBuilder &MIRBuilder, CallLoweringInfo &Info) const override; + bool enableBigEndian() const override; + private: bool lowerReturnVal(MachineIRBuilder &MIRBuilder, const Value *Val, ArrayRef<Register> VRegs, diff --git a/llvm/lib/Target/Hexagon/HexagonConstExtenders.cpp b/llvm/lib/Target/Hexagon/HexagonConstExtenders.cpp index f0933765bbcb..86ce6b4e05ed 100644 --- a/llvm/lib/Target/Hexagon/HexagonConstExtenders.cpp +++ b/llvm/lib/Target/Hexagon/HexagonConstExtenders.cpp @@ -1223,6 +1223,10 @@ void HCE::recordExtender(MachineInstr &MI, unsigned OpNum) { if (ER.Kind == MachineOperand::MO_GlobalAddress) if (ER.V.GV->getName().empty()) return; + // Ignore block address that points to block in another function + if (ER.Kind == MachineOperand::MO_BlockAddress) + if (ER.V.BA->getFunction() != &(MI.getMF()->getFunction())) + return; Extenders.push_back(ED); } diff --git a/llvm/lib/Target/Mips/MipsFastISel.cpp b/llvm/lib/Target/Mips/MipsFastISel.cpp index bd8ef43da625..64a0e9321598 100644 --- a/llvm/lib/Target/Mips/MipsFastISel.cpp +++ b/llvm/lib/Target/Mips/MipsFastISel.cpp @@ -1608,8 +1608,8 @@ bool MipsFastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) { } emitInst(Mips::SLL, TempReg[0]).addReg(SrcReg).addImm(8); emitInst(Mips::SRL, TempReg[1]).addReg(SrcReg).addImm(8); - emitInst(Mips::OR, TempReg[2]).addReg(TempReg[0]).addReg(TempReg[1]); - emitInst(Mips::ANDi, DestReg).addReg(TempReg[2]).addImm(0xFFFF); + emitInst(Mips::ANDi, TempReg[2]).addReg(TempReg[1]).addImm(0xFF); + emitInst(Mips::OR, DestReg).addReg(TempReg[0]).addReg(TempReg[2]); updateValueMap(II, DestReg); return true; } diff --git a/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp b/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp index 1963582ce686..a57ed33bda9c 100644 --- a/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp @@ -1007,7 +1007,7 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF, // R0 cannot be used as a base register, but it can be used as an // index in a store-indexed. int LastOffset = 0; - if (HasFP) { + if (HasFP) { // R0 += (FPOffset-LastOffset). // Need addic, since addi treats R0 as 0. BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDIC), ScratchReg) @@ -2025,8 +2025,18 @@ void PPCFrameLowering::determineCalleeSaves(MachineFunction &MF, // code. Same goes for the base pointer and the PIC base register. if (needsFP(MF)) SavedRegs.reset(isPPC64 ? PPC::X31 : PPC::R31); - if (RegInfo->hasBasePointer(MF)) + if (RegInfo->hasBasePointer(MF)) { SavedRegs.reset(RegInfo->getBaseRegister(MF)); + // On AIX, when BaseRegister(R30) is used, need to spill r31 too to match + // AIX trackback table requirement. + if (!needsFP(MF) && !SavedRegs.test(isPPC64 ? PPC::X31 : PPC::R31) && + Subtarget.isAIXABI()) { + assert( + (RegInfo->getBaseRegister(MF) == (isPPC64 ? PPC::X30 : PPC::R30)) && + "Invalid base register on AIX!"); + SavedRegs.set(isPPC64 ? PPC::X31 : PPC::R31); + } + } if (FI->usesPICBase()) SavedRegs.reset(PPC::R30); diff --git a/llvm/lib/Target/RISCV/RISCVCodeGenPrepare.cpp b/llvm/lib/Target/RISCV/RISCVCodeGenPrepare.cpp index 0a66a38f6d5a..be2e880ecd3a 100644 --- a/llvm/lib/Target/RISCV/RISCVCodeGenPrepare.cpp +++ b/llvm/lib/Target/RISCV/RISCVCodeGenPrepare.cpp @@ -187,25 +187,10 @@ bool RISCVCodeGenPrepare::expandVPStrideLoad(IntrinsicInst &II) { auto *VTy = cast<VectorType>(II.getType()); IRBuilder<> Builder(&II); - - // Extend VL from i32 to XLen if needed. - if (ST->is64Bit()) - VL = Builder.CreateZExt(VL, Builder.getInt64Ty()); - Type *STy = VTy->getElementType(); Value *Val = Builder.CreateLoad(STy, BasePtr); - const auto &TLI = *ST->getTargetLowering(); - Value *Res; - - // TODO: Also support fixed/illegal vector types to splat with evl = vl. - if (isa<ScalableVectorType>(VTy) && TLI.isTypeLegal(EVT::getEVT(VTy))) { - unsigned VMVOp = STy->isFloatingPointTy() ? Intrinsic::riscv_vfmv_v_f - : Intrinsic::riscv_vmv_v_x; - Res = Builder.CreateIntrinsic(VMVOp, {VTy, VL->getType()}, - {PoisonValue::get(VTy), Val, VL}); - } else { - Res = Builder.CreateVectorSplat(VTy->getElementCount(), Val); - } + Value *Res = Builder.CreateIntrinsic(Intrinsic::experimental_vp_splat, {VTy}, + {Val, II.getOperand(2), VL}); II.replaceAllUsesWith(Res); II.eraseFromParent(); diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp index 809be499ee0f..9d2990c98ce2 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -171,7 +171,7 @@ Instruction *InstCombinerImpl::SimplifyAnyMemTransfer(AnyMemTransferInst *MI) { IntegerType* IntType = IntegerType::get(MI->getContext(), Size<<3); // If the memcpy has metadata describing the members, see if we can get the - // TBAA tag describing our copy. + // TBAA, scope and noalias tags describing our copy. AAMDNodes AACopyMD = MI->getAAMetadata().adjustForAccess(Size); Value *Src = MI->getArgOperand(1); diff --git a/llvm/utils/TableGen/ARMTargetDefEmitter.cpp b/llvm/utils/TableGen/ARMTargetDefEmitter.cpp index a4b25025b3c6..71ca331461c0 100644 --- a/llvm/utils/TableGen/ARMTargetDefEmitter.cpp +++ b/llvm/utils/TableGen/ARMTargetDefEmitter.cpp @@ -19,10 +19,38 @@ #include "llvm/TableGen/Record.h" #include "llvm/TableGen/TableGenBackend.h" #include <cstdint> +#include <set> #include <string> using namespace llvm; +/// Collect the full set of implied features for a SubtargetFeature. +static void CollectImpliedFeatures(std::set<Record *> &SeenFeats, Record *Rec) { + assert(Rec->isSubClassOf("SubtargetFeature") && + "Rec is not a SubtargetFeature"); + + SeenFeats.insert(Rec); + for (Record *Implied : Rec->getValueAsListOfDefs("Implies")) + CollectImpliedFeatures(SeenFeats, Implied); +} + +static void CheckFeatureTree(Record *Root) { + std::set<Record *> SeenFeats; + CollectImpliedFeatures(SeenFeats, Root); + + // Check that each of the mandatory (implied) features which is an + // ExtensionWithMArch is also enabled by default. + auto DefaultExtsVec = Root->getValueAsListOfDefs("DefaultExts"); + std::set<Record *> DefaultExts{DefaultExtsVec.begin(), DefaultExtsVec.end()}; + for (auto *Feat : SeenFeats) { + if (Feat->isSubClassOf("ExtensionWithMArch") && !DefaultExts.count(Feat)) + PrintFatalError(Root->getLoc(), + "ExtensionWithMArch " + Feat->getName() + + " is implied (mandatory) as a SubtargetFeature, but " + "is not present in DefaultExts"); + } +} + static void EmitARMTargetDef(RecordKeeper &RK, raw_ostream &OS) { OS << "// Autogenerated by ARMTargetDefEmitter.cpp\n\n"; @@ -283,9 +311,7 @@ static void EmitARMTargetDef(RecordKeeper &RK, raw_ostream &OS) { auto Profile = Arch->getValueAsString("Profile"); auto ArchInfo = ArchInfoName(Major, Minor, Profile); - // The apple-latest alias is backend only, do not expose it to -mcpu. - if (Name == "apple-latest") - continue; + CheckFeatureTree(Arch); OS << " {\n" << " \"" << Name << "\",\n" diff --git a/openmp/runtime/src/z_Linux_asm.S b/openmp/runtime/src/z_Linux_asm.S index 5b614e26a833..223ad091030e 100644 --- a/openmp/runtime/src/z_Linux_asm.S +++ b/openmp/runtime/src/z_Linux_asm.S @@ -176,6 +176,53 @@ KMP_PREFIX_UNDERSCORE(\proc): .endm # endif // KMP_OS_DARWIN +# if KMP_OS_LINUX +// BTI and PAC gnu property note +# define NT_GNU_PROPERTY_TYPE_0 5 +# define GNU_PROPERTY_AARCH64_FEATURE_1_AND 0xc0000000 +# define GNU_PROPERTY_AARCH64_FEATURE_1_BTI 1 +# define GNU_PROPERTY_AARCH64_FEATURE_1_PAC 2 + +# define GNU_PROPERTY(type, value) \ + .pushsection .note.gnu.property, "a"; \ + .p2align 3; \ + .word 4; \ + .word 16; \ + .word NT_GNU_PROPERTY_TYPE_0; \ + .asciz "GNU"; \ + .word type; \ + .word 4; \ + .word value; \ + .word 0; \ + .popsection +# endif + +# if defined(__ARM_FEATURE_BTI_DEFAULT) +# define BTI_FLAG GNU_PROPERTY_AARCH64_FEATURE_1_BTI +# else +# define BTI_FLAG 0 +# endif +# if __ARM_FEATURE_PAC_DEFAULT & 3 +# define PAC_FLAG GNU_PROPERTY_AARCH64_FEATURE_1_PAC +# else +# define PAC_FLAG 0 +# endif + +# if (BTI_FLAG | PAC_FLAG) != 0 +# if PAC_FLAG != 0 +# define PACBTI_C hint #25 +# define PACBTI_RET hint #29 +# else +# define PACBTI_C hint #34 +# define PACBTI_RET +# endif +# define GNU_PROPERTY_BTI_PAC \ + GNU_PROPERTY(GNU_PROPERTY_AARCH64_FEATURE_1_AND, BTI_FLAG | PAC_FLAG) +# else +# define PACBTI_C +# define PACBTI_RET +# define GNU_PROPERTY_BTI_PAC +# endif #endif // (KMP_OS_LINUX || KMP_OS_DARWIN || KMP_OS_WINDOWS) && (KMP_ARCH_AARCH64 || KMP_ARCH_AARCH64_32 || KMP_ARCH_ARM) .macro COMMON name, size, align_power @@ -1296,6 +1343,7 @@ __tid = 8 // mark_begin; .text PROC __kmp_invoke_microtask + PACBTI_C stp x29, x30, [sp, #-16]! # if OMPT_SUPPORT @@ -1359,6 +1407,7 @@ KMP_LABEL(kmp_1): ldp x19, x20, [sp], #16 # endif ldp x29, x30, [sp], #16 + PACBTI_RET ret DEBUG_INFO __kmp_invoke_microtask @@ -2472,3 +2521,7 @@ __kmp_unnamed_critical_addr: .4byte .gomp_critical_user_ .size __kmp_unnamed_critical_addr, 4 #endif + +#if KMP_OS_LINUX && (KMP_ARCH_AARCH64 || KMP_ARCH_AARCH64_32) +GNU_PROPERTY_BTI_PAC +#endif |